Download

Link

CURL

bash

1curl -O https://www.outcoldsolutions.com/docs/monitoring-kubernetes/collectorforkubernetes.yaml

WGET

bash

1wget https://www.outcoldsolutions.com/docs/monitoring-kubernetes/collectorforkubernetes.yaml

collectorforkubernetes.yaml

   1apiVersion: v1
   2kind: Namespace
   3metadata:
   4  labels:
   5    app: collectorforkubernetes
   6  name: collectorforkubernetes
   7---
   8apiVersion: apiextensions.k8s.io/v1
   9kind: CustomResourceDefinition
  10metadata:
  11  name: configurations.collectord.io
  12spec:
  13  group: collectord.io
  14  versions:
  15    - name: v1
  16      served: true
  17      storage: true
  18      schema:
  19        openAPIV3Schema:
  20          type: object
  21          properties:
  22            spec:
  23              type: object
  24              additionalProperties: true
  25            force:
  26              type: boolean
  27  scope: Cluster
  28  names:
  29    listKind: ConfigurationList
  30    plural: configurations
  31    singular: configuration
  32    kind: Configuration
  33---
  34apiVersion: apiextensions.k8s.io/v1
  35kind: CustomResourceDefinition
  36metadata:
  37  name: splunkoutputs.collectord.io
  38spec:
  39  group: collectord.io
  40  versions:
  41    - name: v1
  42      served: true
  43      storage: true
  44      schema:
  45        openAPIV3Schema:
  46          type: object
  47          properties:
  48            spec:
  49              type: object
  50              properties:
  51                url:
  52                  type: string
  53                  format: uri
  54                insecure:
  55                  type: boolean
  56                token:
  57                  type: string
  58                  description: "Plain token"
  59                tokenFromSecret:
  60                  type: object
  61                  description: "Reference to a Kubernetes Secret"
  62                  properties:
  63                    secret:
  64                      type: string
  65                    key:
  66                      type: string
  67              oneOf:
  68                - required: ["token"]
  69                - required: ["tokenFromSecret"]
  70  scope: Namespaced
  71  names:
  72    listKind: SplunkOutputList
  73    plural: splunkoutputs
  74    singular: splunkoutput
  75    kind: SplunkOutput
  76---
  77apiVersion: v1
  78kind: ServiceAccount
  79metadata:
  80  labels:
  81    app: collectorforkubernetes
  82  name: collectorforkubernetes
  83  namespace: collectorforkubernetes
  84---
  85apiVersion: scheduling.k8s.io/v1
  86kind: PriorityClass
  87metadata:
  88  name: collectorforkubernetes-critical
  89value: 1000000000
  90---
  91apiVersion: rbac.authorization.k8s.io/v1
  92kind: ClusterRole
  93metadata:
  94  labels:
  95    app: collectorforkubernetes
  96  name: collectorforkubernetes
  97rules:
  98- apiGroups: ['extensions']
  99  resources: ['podsecuritypolicies']
 100  verbs:     ['use']
 101  resourceNames:
 102  - privileged
 103- apiGroups:
 104  - ""
 105  - apps
 106  - batch
 107  - extensions
 108  - collectord.io
 109  - rbac.authorization.k8s.io
 110  resources:
 111  - splunkoutputs
 112  - alertmanagers
 113  - cronjobs
 114  - daemonsets
 115  - deployments
 116  - endpoints
 117  - events
 118  - jobs
 119  - namespaces
 120  - nodes
 121  - nodes/metrics
 122  - nodes/proxy
 123  - pods
 124  - replicasets
 125  - replicationcontrollers
 126  - scheduledjobs
 127  - secrets
 128  - services
 129  - statefulsets
 130  - persistentvolumeclaims
 131  - configurations
 132  - resourcequotas
 133  - clusterroles
 134  verbs:
 135  - get
 136  - list
 137  - watch
 138- nonResourceURLs:
 139  - /metrics
 140  verbs:
 141  - get
 142  apiGroups: []
 143  resources: []
 144---
 145apiVersion: rbac.authorization.k8s.io/v1
 146kind: ClusterRoleBinding
 147metadata:
 148  labels:
 149    app: collectorforkubernetes
 150  name: collectorforkubernetes
 151roleRef:
 152  apiGroup: rbac.authorization.k8s.io
 153  kind: ClusterRole
 154  name: collectorforkubernetes
 155subjects:
 156  - kind: ServiceAccount
 157    name: collectorforkubernetes
 158    namespace: collectorforkubernetes
 159---
 160apiVersion: v1
 161kind: ConfigMap
 162metadata:
 163  name: collectorforkubernetes
 164  namespace: collectorforkubernetes
 165  labels:
 166    app: collectorforkubernetes
 167data:
 168  001-general.conf: |
 169    # The general configuration is used for all deployments
 170    #
 171    # Run collectord with the flag -conf and specify location of the configuration files.
 172    #
 173    # You can override all the values using environment variables with the format like
 174    #   COLLECTOR__<ANYNAME>=<section>__<key>=<value>
 175    # As an example you can set dataPath in [general] section as
 176    #   COLLECTOR__DATAPATH=general__dataPath=C:\\some\\path\\data.db
 177    # This parameter can be configured using -env-override, set it to empty string to disable this feature
 178
 179    [general]
 180
 181    # Please review license https://www.outcoldsolutions.com/docs/license-agreement/
 182    # and accept license by changing the value to *true*
 183    acceptLicense = false
 184
 185    # Location for the database
 186    # Collectord stores positions of the files and internal state
 187    dataPath = ./data/
 188
 189    # log level (accepted values are trace, debug, info, warn, error, fatal)
 190    logLevel = info
 191
 192    # http server gives access to two endpoints
 193    # /healthz
 194    # /metrics/json
 195    # /metrics/prometheus
 196    # httpServerBinding = 0.0.0.0:11888
 197    httpServerBinding =
 198
 199    # log requests to the http server
 200    httpServerLog = false
 201
 202    # telemetry report endpoint, set it to empty string to disable telemetry
 203    telemetryEndpoint = https://license.outcold.solutions/telemetry/
 204
 205    # license check endpoint
 206    licenseEndpoint = https://license.outcold.solutions/license/
 207
 208    # license server through proxy
 209    # This configuration is used only for the Outcold Solutions License Server
 210    # For license server running on-premises, use configuration under [license.client]
 211    licenseServerProxyUrl =
 212
 213    # authentication with basic authorization (user:password)
 214    # This configuration is used only for the Outcold Solutions License Server
 215    # For license server running on-premises, use configuration under [license.client]
 216    licenseServerProxyBasicAuth =
 217
 218    # license key
 219    license =
 220
 221    # Environment variable $KUBERNETES_NODENAME is used by default to setup hostname
 222    # Use value below to override specific name
 223    hostname =
 224
 225    # Default output for events, logs and metrics
 226    # valid values: splunk and devnull
 227    # Use devnull by default if you don't want to redirect data
 228    defaultOutput = splunk
 229
 230    # Default buffer size for file input
 231    fileInputBufferSize = 256b
 232
 233    # Maximum size of one line the file reader can read
 234    fileInputLineMaxSize = 1mb
 235
 236    # Include custom fields to attach to every event, in example below every event sent to Splunk will hav
 237    # indexed field my_environment=dev. Fields names should match to ^[a-z][_a-z0-9]*$
 238    # Better way to configure that is to specify labels for Kubernetes Nodes.
 239    # ; fields.my_environment = dev
 240    # Identify the cluster if you are planning to monitor multiple clusters
 241    fields.kubernetes_cluster = -
 242
 243    # Include EC2 Metadata (see list of possible fields https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html)
 244    # Should be in format ec2Metadata.{desired_field_name} = {url path to read the value}
 245    # ec2Metadata.ec2_instance_id = /latest/meta-data/instance-id
 246    # ec2Metadata.ec2_instance_type = /latest/meta-data/instance-type
 247
 248    # subdomain for the annotations added to the pods, workloads, namespaces or containers, like splunk.collectord.io/..
 249    annotationsSubdomain =
 250
 251    # configure global thruput per second for forwarded logs (metrics are not included)
 252    # for example if you set `thruputPerSecond = 512Kb`, that will limit amount of logs forwarded
 253    # from the single Collectord instance to 512Kb per second.
 254    # You can configure thruput individually for the logs (including specific for container logs) below
 255    thruputPerSecond =
 256
 257    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
 258    # older than 7 days
 259    tooOldEvents =
 260
 261    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
 262    tooNewEvents =
 263
 264    # For input.files::X and application logs, when glob or match are configured, Collectord can automatically
 265    # detect gzipped files and skip them (based on the extensions or magic numbers)
 266    autoSkipGzipFiles = true
 267    
 268    # Multi-output async publishing. When enabled (default), events routed to
 269    # non-default outputs are published asynchronously so that a slow or down
 270    # output does not block events destined for other outputs.
 271    ; multioutput.async = true
 272    # Buffer size for the async proxy (default 100). Absorbs transient bursts.
 273    # When this buffer and the output's own queue are both full, events are
 274    # dropped immediately without blocking the pipeline.
 275    ; multioutput.asyncBufferSize = 100
 276
 277    [license.client]
 278    # point to the license located on the HTTP web server, or a hosted by the Collectord running as license server
 279    url =
 280    # basic authentication for the HTTP server
 281    basicAuth =
 282    # if SSL, ignore the certificate verification
 283    insecure = false
 284    # CA Path for the Server certificate
 285    capath =
 286    # CA Name fot the Server certificate
 287    caname =
 288    # license server through proxy
 289    proxyUrl =
 290    # authentication with basic authorization (user:password)
 291    proxyBasicAuth =
 292
 293
 294    # forward internal collectord metrics
 295    [input.collectord_metrics]
 296
 297    # disable collectord internal metrics
 298    disabled = false
 299
 300    # override type
 301    type = kubernetes_prometheus
 302
 303    # how often to collect internal metrics
 304    interval = 1m
 305
 306    # set output (splunk or devnull, default is [general]defaultOutput)
 307    output =
 308
 309    # specify Splunk index
 310    index =
 311
 312    # whitelist or blacklist the metrics
 313    whitelist.1 = ^file_input_open$
 314    whitelist.2 = ^file_input_read_bytes$
 315    whitelist.3 = ^kubernetes_handlers$
 316    whitelist.4 = ^pipe$
 317    whitelist.5 = ^pipelines_num$
 318    whitelist.6 = ^splunk_post_bytes_sum.*$
 319    whitelist.7 = ^splunk_post_events_count_sum.*$
 320    whitelist.8 = ^splunk_post_failed_requests$
 321    whitelist.9 = ^splunk_post_message_max_lag_seconds_bucket.*$
 322    whitelist.10 = ^splunk_post_requests_seconds_sum.*$
 323    whitelist.11 = ^splunk_post_retries_required_sum.*$
 324
 325
 326    # connection to kubernetes api
 327    [general.kubernetes]
 328
 329    # Override service URL for Kubernetes (default is ${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT})
 330    serviceURL =
 331
 332    # Environment variable $KUBERNETES_NODENAME is used by default to setup nodeName
 333    # Use it only when you need to override it
 334    nodeName =
 335
 336    # Configuration to access the API server,
 337    # see https://kubernetes.io/docs/tasks/access-application-cluster/access-cluster/#accessing-the-api-from-a-pod
 338    # for details
 339    tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
 340    certPath = /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
 341
 342    # Default timeout for http responses. The streaming/watch requests depend on this timeout.
 343    timeout = 30m
 344
 345    # How long to keep the cache for the recent calls to API server (to limit number of calls when collectord discovers new pods)
 346    metadataTTL = 30s
 347
 348    # regex to find pods
 349    podsCgroupFilter = ^/([^/\s]+/)*kubepods(\.slice)?/((kubepods-)?(burstable|besteffort)(\.slice)?/)?([^/]*)pod([0-9a-f]{32}|[0-9a-f\-_]{36})(\.slice)?$
 350
 351    # regex to find containers in the pods
 352    containersCgroupFilter = ^/([^/\s]+/)*kubepods(\.slice)?/((kubepods-)?(burstable|besteffort)(\.slice)?/)?([^/]*)pod([0-9a-f]{32}|[0-9a-f\-_]{36})(\.slice)?/(docker-|crio-|cri-\w+-)?[0-9a-f]{64}(\.scope)?(\/.+)?$
 353
 354    # path to the kubelet root location (use it to discover application logs for emptyDir)
 355    # the expected format is `pods/{pod-id}/volumes/kubernetes.io~empty-dir/{volume-name}/_data/`
 356    volumesRootDir = /rootfs/var/lib/kubelet/
 357
 358    # You can attach annotations as a metadata, using the format
 359    #   includeAnnotations.{key} = {regexp}
 360    # For example if you want to include all annotations that starts with `prometheus.io` or `example.com` you can include
 361    # the following format:
 362    #   includeAnnotations.1 = ^prometheus\.io.*
 363    #   includeAnnotations.2 = ^example\.com.*
 364
 365    # You can exclude labels from metadata, using the format
 366    #   excludeLabels.{key} = {regexp}
 367    # For example if you want to exclude all labels that starts with `prometheus.io` or `example.com` you can include
 368    # the following format:
 369    #   excludeLabels.1 = ^prometheus\.io.*
 370    #   excludeLabels.2 = ^example\.com.*
 371
 372    # watch for changes (annotations) in the objects
 373    watch.namespaces = v1/namespace
 374    watch.deployments = apps/v1/deployment
 375    watch.configurations = collectord.io/v1/configuration
 376
 377    # Collectord can review the assigned ClusterRole and traverse metadata for the Pods only for the Owner objects
 378    # that are defined in the ClusterRole, ignoring anything else, it does not have access to.
 379    # This way Collectord does not generate 403 requests on API Server
 380    clusterRole = collectorforkubernetes
 381
 382    # Alternative of telling Collectord about the ClusterRole is to manually list the objects.
 383    # You can define which objects Collectord should traverse when it sees Owners.
 384    ; traverseOwnership.namespaces = v1/namespace
 385
 386    # Implementation of the watch protocol.
 387    # 0 - use the default implementation (2)
 388    # 1 - use the watch implementation that is optimized for the small number of objects (just issue one watch for all objects)
 389    # 2 - use the watch implementation that is optimized for the large number of objects (paginate through the list of objects and issue watch for the last resource version)
 390    watchImplementation = 2
 391
 392    # watch for pods annotations, setup prometheus collection
 393    # for these pods
 394    # Addon listens on Pod Network
 395    # DaemonSets listen on Host Network
 396    [input.prometheus_auto]
 397
 398    # disable prometheus auto discovery for pods
 399    disabled = false
 400
 401    # override type
 402    type = kubernetes_prometheus
 403
 404    # specify Splunk index
 405    index =
 406
 407    # how often to collect prometheus metrics
 408    interval = 60s
 409
 410    # include metrics help with the events
 411    includeHelp = true
 412
 413    # http client timeout
 414    timeout = 30s
 415
 416    # set output (splunk or devnull, default is [general]defaultOutput)
 417    output =
 418
 419    # Include an Authorization header for the prometheus scrapper
 420    # When configuring scrapping with collectord using annotations use prometheus.1-AuthorizationKey=key1
 421    # authorization.key1 = Bearer FOO
 422
 423
 424    # Splunk output
 425    [output.splunk]
 426
 427    # Splunk HTTP Event Collector url
 428    url =
 429    # You can specify muiltiple splunk URls with
 430    #
 431    # urls.0 = https://server1:8088/services/collector/event/1.0
 432    # urls.1 = https://server1:8088/services/collector/event/1.0
 433    # urls.2 = https://server1:8088/services/collector/event/1.0
 434    #
 435    # Limitations:
 436    # * The urls cannot have different path.
 437
 438    # Specify how URL should be picked up (in case if multiple is used)
 439    # urlSelection = random|round-robin|random-with-round-robin
 440    # where:
 441    # * random - choose random url on first selection and after each failure (connection or HTTP status code >= 500)
 442    # * round-robin - choose url starting from first one and bump on each failure (connection or HTTP status code >= 500)
 443    # * random-with-round-robin - choose random url on first selection and after that in round-robin on each
 444    #                             failure (connection or HTTP status code >= 500)
 445    urlSelection = random-with-round-robin
 446
 447    # Splunk HTTP Event Collector Token
 448    token =
 449
 450    # Allow invalid SSL server certificate
 451    insecure = false
 452    # minTLSVersion = TLSv1.2
 453    # maxTLSVersion = TLSv1.3
 454
 455    # Path to CA cerificate
 456    caPath =
 457
 458    # CA Name to verify
 459    caName =
 460
 461    # path for client certificate (if required)
 462    clientCertPath =
 463
 464    # path for client key (if required)
 465    clientKeyPath =
 466
 467    # Events are batched with the maximum size set by batchSize and staying in pipeline for not longer
 468    # than set by frequency
 469    frequency = 5s
 470    batchSize = 768K
 471    # limit by the number of events (0 value has no limit on the number of events)
 472    events = 50
 473
 474    # Splunk through proxy
 475    proxyUrl =
 476
 477    # authentication with basic authorization (user:password)
 478    proxyBasicAuth =
 479
 480    # Splunk acknowledgement url (.../services/collector/ack)
 481    ackUrl =
 482    # You can specify muiltiple splunk URls for ackUrl
 483    #
 484    # ackUrls.0 = https://server1:8088/services/collector/ack
 485    # ackUrls.1 = https://server1:8088/services/collector/ack
 486    # ackUrls.2 = https://server1:8088/services/collector/ack
 487    #
 488    # Make sure that they in the same order as urls for url, to make sure that this Splunk instance will be
 489    # able to acknowledge the payload.
 490    #
 491    # Limitations:
 492    # * The urls cannot have different path.
 493
 494    # Enable index acknowledgment
 495    ackEnabled = false
 496
 497    # Index acknowledgment timeout
 498    ackTimeout = 3m
 499
 500    # Timeout specifies a time limit for requests made by collectord.
 501    # The timeout includes connection time, any
 502    # redirects, and reading the response body.
 503    timeout = 30s
 504
 505    # in case when pipeline can post to multiple indexes, we want to avoid posibility of blocking
 506    # all pipelines, because just some events have incorrect index
 507    dedicatedClientPerIndex = true
 508
 509    # possible values: RedirectToDefault, Drop, Retry
 510    incorrectIndexBehavior = RedirectToDefault
 511
 512    # gzip compression level (nocompression, default, 1...9)
 513    compressionLevel = default
 514
 515    # number of dedicated splunk output threads (to increase throughput above 4k events per second)
 516    threads = 2
 517    # Default algorithm between threads is roundrobin, but you can change it to weighted
 518    ; threadsAlgorithm = weighted
 519
 520    # if you want to exclude some preindexed fields from events
 521    # excludeFields.kubernetes_pod_ip = true
 522
 523    # By default if there are no indexes defined on the message, Collectord sends the event without the index, and
 524    # Splunk HTTP Event Collector going to use the default index for the Token. You can change that, and tell Collectord
 525    # to ignore all events that don't have index defined explicitly
 526    ; requireExplicitIndex = true
 527
 528    # You can define if you want to truncate messages that are larger than 1M in length (or define your own size, like 256K)
 529    ; maximumMessageLength = 1M
 530
 531    # For messages generated from logs, include unique `event_id` in the event
 532    ; includeEventID = false
 533
 534    # Dedicated queue size for the output, default is 1024, larger queue sizes will require more memory,
 535    # but will allow to handle more events in case of network issues
 536    queueSize = 1024
 537
 538    # How many digits after the decimal point to keep for timestamps (0-9)
 539    # Defaults to 3 (milliseconds)
 540    # Change to 6 for microseconds
 541    # Change to 9 for nanoseconds
 542    ; timestampPrecision = 3
 543
 544  002-daemonset.conf: |
 545    # DaemonSet configuration is used for Nodes and Masters.
 546
 547    // connection to CRIO
 548    [general.cri-o]
 549
 550    # url for CRIO API, only unix socket is supported
 551    url = unix:///rootfs/var/run/crio/crio.sock
 552
 553    # Timeout for http responses to docker client. The streaming requests depend on this timeout.
 554    timeout = 1m
 555
 556
 557    [general.containerd]
 558    # Runtime can be on /rootfs/run/containerd (depends on the Linux distribution)
 559    runtimePath = /rootfs/var/run/containerd
 560    namespace = k8s.io
 561
 562
 563    # cgroup input
 564    [input.system_stats]
 565
 566    # disable system level stats
 567    disabled.host = false
 568    disabled.cgroup = false
 569
 570    # cgroups fs location
 571    pathCgroups = /rootfs/sys/fs/cgroup
 572
 573    # proc location
 574    pathProc = /rootfs/proc
 575
 576    # how often to collect cgroup stats
 577    statsInterval = 30s
 578
 579    # override type
 580    type.host = kubernetes_stats_v2_host
 581    type.cgroup = kubernetes_stats_v2_cgroup
 582
 583    # specify Splunk index
 584    index.host =
 585    index.cgroup =
 586
 587    # set output (splunk or devnull, default is [general]defaultOutput)
 588    output.host =
 589    output.cgroup =
 590
 591
 592    # proc input
 593    [input.proc_stats]
 594
 595    # disable proc level stats
 596    disabled = false
 597
 598    # proc location
 599    pathProc = /rootfs/proc
 600
 601    # how often to collect proc stats
 602    statsInterval = 30s
 603
 604    # override type
 605    type = kubernetes_proc_stats_v2
 606
 607    # specify Splunk index
 608    index.host =
 609    index.cgroup =
 610
 611    # proc filesystem includes by default system threads (there can be over 100 of them)
 612    # these stats do not help with the observability
 613    # excluding them can reduce the size of the index, performance of the searches and usage of the collector
 614    includeSystemThreads = false
 615
 616    # set output (splunk or devnull, default is [general]defaultOutput)
 617    output.host =
 618    output.cgroup =
 619
 620    # Hide arguments for the processes, replacing with HIDDEN_ARGS(NUMBER)
 621    hideArgs = false
 622
 623
 624    # network stats
 625    [input.net_stats]
 626
 627    # disable net stats
 628    disabled = false
 629
 630    # proc path location
 631    pathProc = /rootfs/proc
 632
 633    # how often to collect net stats
 634    statsInterval = 30s
 635
 636    # override type
 637    type = kubernetes_net_stats_v2
 638
 639    # specify Splunk index
 640    index.host =
 641    index.cgroup =
 642
 643    # set output (splunk or devnull, default is [general]defaultOutput)
 644    output.host =
 645    output.cgroup =
 646
 647
 648    # network socket table
 649    [input.net_socket_table]
 650
 651    # disable net stats
 652    disabled = false
 653
 654    # proc path location
 655    pathProc = /rootfs/proc
 656
 657    # how often to collect net stats
 658    statsInterval = 30s
 659
 660    # override type
 661    type = kubernetes_net_socket_table
 662
 663    # specify Splunk index
 664    index.host =
 665    index.cgroup =
 666
 667    # set output (splunk or devnull, default is [general]defaultOutput)
 668    output.host =
 669    output.cgroup =
 670
 671    # group connections by tcp_state, localAddr, remoteAddr (if localPort is not the port it is listening on)
 672    # that can significally reduces the amount of events
 673    group = true
 674
 675    # Collectord can watch for services, node, and pod IP addresses, and lookup the names
 676    # for the IP addresses. Keeping this enabled can add a significant load on the API Server, with large number of pods.
 677    disableLookup = false
 678
 679
 680    # mount input (collects mount stats where kubelet runtime is stored)
 681    [input.mount_stats]
 682
 683    # disable system level stats
 684    disabled = false
 685
 686    # how often to collect mount stats
 687    statsInterval = 30s
 688
 689    # override type
 690    type = kubernetes_mount_stats
 691
 692    # specify Splunk index
 693    index =
 694
 695    # set output (splunk or devnull, default is [general]defaultOutput)
 696    output =
 697
 698
 699    # diskstats input (collects /proc/diskstats)
 700    [input.disk_stats]
 701
 702    # disable system level stats
 703    disabled = false
 704
 705    # how often to collect mount stats
 706    statsInterval = 30s
 707
 708    # override type
 709    type = kubernetes_disk_stats
 710
 711    # specify Splunk index
 712    index =
 713
 714    # set output (splunk or devnull, default is [general]defaultOutput)
 715    output =
 716
 717
 718    # Container Log files
 719    [input.files]
 720
 721    # disable container logs monitoring
 722    disabled = false
 723
 724    # root location of docker log files
 725    # logs are expected in standard docker format like {containerID}/{containerID}-json.log
 726    # rotated files
 727    path = /rootfs/var/lib/docker/containers/
 728    # root location of CRI-O files
 729    # logs are expected in Kubernetes format, like {podID}/{containerName}/0.log
 730    crioPath = /rootfs/var/log/pods/
 731
 732    # (obsolete) glob matching pattern for log files
 733    # glob = */*-json.log*
 734
 735    # files are read using polling schema, when reach the EOF how often to check if files got updated
 736    pollingInterval = 250ms
 737
 738    # how often to look for the new files under logs path
 739    walkingInterval = 5s
 740
 741    # include verbose fields in events (file offset)
 742    verboseFields = false
 743
 744    # override type
 745    type = kubernetes_logs
 746
 747    # specify Splunk index
 748    index =
 749
 750    # docker splits events when they are larger than 10-100k (depends on the docker version)
 751    # we join them together by default and forward to Splunk as one event
 752    joinPartialEvents = true
 753
 754    # In case if your containers report messages with terminal colors or other escape sequences
 755    # you can enable strip for all the containers in one place.
 756    # Better is to enable it only for required container with the label collectord.io/strip-terminal-escape-sequences=true
 757    stripTerminalEscapeSequences = false
 758    # Regexp used for stripping terminal colors, it does not stip all the escape sequences
 759    # Read http://man7.org/linux/man-pages/man4/console_codes.4.html for more information
 760    stripTerminalEscapeSequencesRegex = (\x1b\[\d{1,3}(;\d{1,3})*m)|(\x07)|(\x1b]\d+(\s\d)?;[^\x07]+\x07)|(.*\x1b\[K)
 761
 762    # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded)
 763    samplingPercent = -1
 764
 765    # sampling key for hash based sampling (should be regexp with the named match pattern `key`)
 766    samplingKey =
 767
 768    # set output (splunk or devnull, default is [general]defaultOutput)
 769    output =
 770
 771    # configure default thruput per second for for each container log
 772    # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
 773    # from the single container to 128Kb per second.
 774    thruputPerSecond =
 775
 776    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
 777    # older than 7 days
 778    tooOldEvents =
 779
 780    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
 781    tooNewEvents =
 782
 783
 784    # Application Logs
 785    [input.app_logs]
 786
 787    # disable container application logs monitoring
 788    disabled = false
 789
 790    # root location of mounts (applies to hostPath mounts only), if the hostPath differs inside container from the path on host
 791    root = /rootfs/
 792
 793    # how often to review list of available volumes
 794    syncInterval = 5s
 795
 796    # glob matching pattern for log files
 797    glob = *.log*
 798
 799    # files are read using polling schema, when reach the EOF how often to check if files got updated
 800    pollingInterval = 250ms
 801
 802    # how often to look for the new files under logs path
 803    walkingInterval = 5s
 804
 805    # include verbose fields in events (file offset)
 806    verboseFields = false
 807
 808    # override type
 809    type = kubernetes_logs
 810
 811    # specify Splunk index
 812    index =
 813
 814    # we split files using new line character, with this configuration you can specify what defines the new event
 815    # after new line
 816    eventPatternRegex = ^[^\s]
 817    # Maximum interval of messages in pipeline
 818    eventPatternMaxInterval = 100ms
 819    # Maximum time to wait for the messages in pipeline
 820    eventPatternMaxWait = 1s
 821    # Maximum message size
 822    eventPatternMaxSize = 1MB
 823
 824    # set output (splunk or devnull, default is [general]defaultOutput)
 825    output =
 826
 827    # configure default thruput per second for for each container log
 828    # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
 829    # from the single container to 128Kb per second.
 830    thruputPerSecond =
 831
 832    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
 833    # older than 7 days
 834    tooOldEvents =
 835
 836    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
 837    tooNewEvents =
 838
 839    # Configure how long Collectord should keep the file descriptors open for files, that has not been forwarded yet
 840    # When using PVC, and if pipeline is lagging behind, Collectord holding open fd for files, can cause long termination
 841    # of pods, as kubelet cannot unmount the PVC volume from the system
 842    maxHoldAfterClose = 1800s
 843
 844
 845    # Host logs. Input syslog(.\d+)? files
 846    [input.files::syslog]
 847
 848    # disable host level logs
 849    disabled = false
 850
 851    # root location of docker files
 852    path = /rootfs/var/log/
 853
 854    # regex matching pattern
 855    match = ^(syslog|messages)(.\d+)?$
 856
 857    # limit search only on one level
 858    recursive = false
 859
 860    # files are read using polling schema, when reach the EOF how often to check if files got updated
 861    pollingInterval = 250ms
 862
 863    # how often o look for the new files under logs path
 864    walkingInterval = 5s
 865
 866    # include verbose fields in events (file offset)
 867    verboseFields = false
 868
 869    # override type
 870    type = kubernetes_host_logs
 871
 872    # specify Splunk index
 873    index =
 874
 875    # field extraction
 876    extraction = ^(?P<timestamp>[A-Za-z]+\s+\d+\s\d+:\d+:\d+)\s(?P<syslog_hostname>[^\s]+)\s(?P<syslog_component>[^:\[]+)(\[(?P<syslog_pid>\d+)\])?: (.+)$
 877    # extractionMessageField =
 878
 879    # timestamp field
 880    timestampField = timestamp
 881
 882    # format for timestamp
 883    # the layout defines the format by showing how the reference time, defined to be `Mon Jan 2 15:04:05 -0700 MST 2006`
 884    timestampFormat = Jan 2 15:04:05
 885
 886    # Adjust date, if month/day aren't set in format
 887    timestampSetMonth = false
 888    timestampSetDay = false
 889
 890    # timestamp location (if not defined by format)
 891    timestampLocation = Local
 892
 893    # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded)
 894    samplingPercent = -1
 895
 896    # sampling key for hash based sampling (should be regexp with the named match pattern `key`)
 897    samplingKey =
 898
 899    # set output (splunk or devnull, default is [general]defaultOutput)
 900    output =
 901
 902    # configure default thruput per second for this files group
 903    # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
 904    # from the files in this group to 128Kb per second.
 905    thruputPerSecond =
 906
 907    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
 908    # older than 7 days
 909    tooOldEvents =
 910
 911    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
 912    tooNewEvents =
 913
 914    # by default every new event should start from not space symbol
 915    eventPattern = ^[^\s]
 916
 917    # Blacklisting and whitelisting the logs
 918    # whitelist.0 = ^regexp$
 919    # blacklist.0 = ^regexp$
 920
 921
 922    # Host logs. Input all *.log(.\d+)? files
 923    [input.files::logs]
 924
 925    # disable host level logs
 926    disabled = false
 927
 928    # root location of log files
 929    path = /rootfs/var/log/
 930
 931    # regex matching pattern
 932    match = ^(([\w\-.]+\.log(.[\d\-]+)?)|(docker))$
 933
 934    # files are read using polling schema, when reach the EOF how often to check if files got updated
 935    pollingInterval = 250ms
 936
 937    # how often o look for the new files under logs path
 938    walkingInterval = 5s
 939
 940    # include verbose fields in events (file offset)
 941    verboseFields = false
 942
 943    # override type
 944    type = kubernetes_host_logs
 945
 946    # specify Splunk index
 947    index =
 948
 949    # field extraction
 950    extraction =
 951    extractionMessageField =
 952
 953    # timestamp field
 954    timestampField =
 955
 956    # format for timestamp
 957    # the layout defines the format by showing how the reference time, defined to be `Mon Jan 2 15:04:05 -0700 MST 2006`
 958    timestampFormat =
 959
 960    # timestamp location (if not defined by format)
 961    timestampLocation =
 962
 963    # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded)
 964    samplingPercent = -1
 965
 966    # sampling key for hash based sampling (should be regexp with the named match pattern `key`)
 967    samplingKey =
 968
 969    # set output (splunk or devnull, default is [general]defaultOutput)
 970    output =
 971
 972    # configure default thruput per second for this files group
 973    # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
 974    # from the files in this group to 128Kb per second.
 975    thruputPerSecond =
 976
 977    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
 978    # older than 7 days
 979    tooOldEvents =
 980
 981    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
 982    tooNewEvents =
 983
 984    # by default every new event should start from not space symbol
 985    eventPattern = ^[^\s]
 986
 987    # Blacklisting and whitelisting the logs
 988    # whitelist.0 = ^regexp$
 989    # blacklist.0 = ^regexp$
 990
 991
 992    [input.journald]
 993
 994    # disable host level logs
 995    disabled = false
 996
 997    # root location of log files
 998    path.persistent = /rootfs/var/log/journal/
 999    path.volatile = /rootfs/run/log/journal/
1000
1001    # when reach end of journald, how often to pull
1002    pollingInterval = 250ms
1003
1004    # if you don't want to forward journald from the beginning,
1005    # set the oldest event in relative value, like -14h or -30m or -30s (h/m/s supported)
1006    startFromRel =
1007
1008    # override type
1009    type = kubernetes_host_logs
1010
1011    # specify Splunk index
1012    index =
1013
1014    # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded)
1015    samplingPercent = -1
1016
1017    # sampling key (should be regexp with the named match pattern `key`)
1018    samplingKey =
1019
1020    # how often to reopen the journald to free old files
1021    reopenInterval = 1h
1022
1023    # set output (splunk or devnull, default is [general]defaultOutput)
1024    output =
1025
1026    # configure default thruput per second for journald
1027    # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
1028    # from the journald to 128Kb per second.
1029    thruputPerSecond =
1030
1031    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
1032    # older than 7 days
1033    tooOldEvents =
1034
1035    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
1036    tooNewEvents =
1037
1038    # by default every new event should start from not space symbol
1039    eventPattern = ^[^\s]
1040
1041    # Blacklisting and whitelisting the logs
1042    # whitelist.0 = ^regexp$
1043    # blacklist.0 = ^regexp$
1044
1045    # Move Journald logs reader to a separate process, to prevent process from crashing in case of corrupted log files
1046    spawnExternalProcess = false
1047
1048
1049    # Pipe to join events (container logs only)
1050    [pipe.join]
1051
1052    # disable joining event
1053    disabled = false
1054
1055    # Maximum interval of messages in pipeline
1056    maxInterval = 100ms
1057
1058    # Maximum time to wait for the messages in pipeline
1059    maxWait = 1s
1060
1061    # Maximum message size
1062    maxSize = 1MB
1063
1064    # Default pattern to indicate new message (should start not from space)
1065    patternRegex = ^[^\s]
1066
1067
1068    # (depricated, use annotations for settings up join rules)
1069    # Define special event join patterns for matched events
1070    # Section consist of [pipe.join::<name>]
1071    # [pipe.join::my_app]
1072    ## Set match pattern for the fields
1073    #; matchRegex.docker_container_image = my_app
1074    #; matchRegex.stream = stdout
1075    ## All events start from '[<digits>'
1076    #; patternRegex = ^\[\d+
1077
1078
1079    # You can configure global replace rules for the events, which can help to remove sensitive data
1080    # from logs before they are sent to Splunk. Those rules will be applied to all pipelines for container logs, host logs,
1081    # application logs and events.
1082    # In the following example we replace password=TEST with password=********
1083    ; [pipe.replace::name]
1084    ; patternRegex = (password=)([^\s]+)
1085    ; replace = $1********
1086    
1087    # You can configure global hash rules for the events, which can help to hide sensitive data
1088    # from logs before they are sent to outputs. Those rules will be applied to all pipelines for container logs, host logs,
1089    # application logs and events.
1090    # In the following example we hash IP addresses with fnv-1a-64
1091    ; [pipe.hash::name]
1092    ; match = (\d{1,3}\.){3}\d{1,3}'
1093    ; function = fnv-1a-64
1094
1095
1096    [input.prometheus::kubelet]
1097
1098    # disable prometheus kubelet metrics
1099    disabled = false
1100
1101    # override type
1102    type = kubernetes_prometheus
1103
1104    # specify Splunk index
1105    index =
1106
1107    # override host (environment variables are supported, by default Kubernetes node name is used)
1108    host = ${KUBERNETES_NODENAME}
1109
1110    # override source
1111    source = kubelet
1112
1113    # how often to collect prometheus metrics
1114    interval = 60s
1115
1116    # request timeout
1117    timeout = 60s
1118
1119    # Prometheus endpoint, multiple values can be specified, collectord tries them in order till finding the first
1120    # working endpoint.
1121    # At first trying to get it through proxy
1122    endpoint.1proxy = https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT}/api/v1/nodes/${KUBERNETES_NODENAME}/proxy/metrics
1123    # In case if cannot get it through proxy, trying localhost
1124    endpoint.2http = http://127.0.0.1:10255/metrics
1125
1126    # token for "Authorization: Bearer $(cat tokenPath)"
1127    tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
1128
1129    # server certificate for certificate validation
1130    certPath = /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
1131
1132    # client certificate for authentication
1133    clientCertPath =
1134
1135    # Allow invalid SSL server certificate
1136    insecure = true
1137
1138    # include metrics help with the events
1139    includeHelp = false
1140
1141    # set output (splunk or devnull, default is [general]defaultOutput)
1142    output =
1143
1144    whitelist.1 = ^kubernetes_build_info$
1145    whitelist.2 = ^kubelet_runtime_operations_duration_seconds_sum$
1146    whitelist.3 = ^kubelet_docker_operations_duration_seconds_sum$
1147    whitelist.4 = ^kubelet_network_plugin_operations_duration_seconds_sum$
1148    whitelist.5 = ^kubelet_cgroup_manager_duration_seconds_sum$
1149    whitelist.6 = ^storage_operation_duration_seconds_sum$
1150    whitelist.7 = ^kubelet_docker_operations_errors_total$
1151    whitelist.8 = ^kubelet_runtime_operations_errors_total$
1152    whitelist.9 = ^rest_client_requests_total$
1153    whitelist.10 = ^process_cpu_seconds_total$
1154    whitelist.11 = ^process_resident_memory_bytes$
1155    whitelist.12 = ^process_virtual_memory_bytes$
1156    whitelist.13 = ^rest_client_request_duration_seconds_sum$
1157    whitelist.14 = ^kubelet_volume_stats_.+$
1158    whitelist.15 = ^rest_client_requests_total$
1159    
1160
1161    ; # Collectord reports if entropy is low
1162    ; [diagnostics::node-entropy]
1163    ; settings.path = /rootfs/proc/sys/kernel/random/entropy_avail
1164    ; settings.interval = 1h
1165    ; settings.threshold = 800
1166
1167    # Collectord can report if node reboot is required
1168    [diagnostics::node-reboot-required]
1169    settings.path = /rootfs/var/run/reboot-required*
1170    settings.interval = 1h
1171
1172    # See https://www.kernel.org/doc/Documentation/admin-guide/hw-vuln/index.rst
1173    # And https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-system-cpu
1174    [diagnostics::cpu-vulnerabilities]
1175    settings.path = /rootfs/sys/devices/system/cpu/vulnerabilities/*
1176    settings.interval = 1h
1177
1178
1179  003-daemonset-master.conf: |
1180    [input.prometheus::kubernetes-api]
1181
1182    # disable prometheus kubernetes-api metrics
1183    disabled = false
1184
1185    # override type
1186    type = kubernetes_prometheus
1187
1188    # specify Splunk index
1189    index =
1190
1191    # override host (environment variables are supported, by default Kubernetes node name is used)
1192    host = ${KUBERNETES_NODENAME}
1193
1194    # override source
1195    source = kubernetes-api
1196
1197    # how often to collect prometheus metrics
1198    interval = 60s
1199
1200    # request timeout
1201    timeout = 60s
1202
1203    # prometheus endpoint
1204    # at first trying to get it from localhost (avoiding load balancer, if multiple api servers)
1205    endpoint.1localhost = https://127.0.0.1:6443/metrics
1206    # as fallback using proxy
1207    endpoint.2kubeapi = https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT}/metrics
1208
1209    # token for "Authorization: Bearer $(cat tokenPath)"
1210    tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
1211
1212    # server certificate for certificate validation
1213    certPath = /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
1214
1215    # client certificate for authentication
1216    clientCertPath =
1217
1218    # Allow invalid SSL server certificate
1219    insecure = true
1220
1221    # include metrics help with the events
1222    includeHelp = false
1223
1224    # set output (splunk or devnull, default is [general]defaultOutput)
1225    output =
1226
1227    whitelist.1 = ^kubernetes_build_info$
1228    whitelist.2 = ^authenticated_user_requests$
1229    whitelist.3 = ^apiserver_request_total$
1230    whitelist.4 = ^process_cpu_seconds_total$
1231    whitelist.5 = ^process_resident_memory_bytes$
1232    whitelist.6 = ^process_virtual_memory_bytes$
1233    whitelist.7 = ^rest_client_request_duration_seconds_sum$
1234    whitelist.8 = ^rest_client_requests_total$
1235
1236
1237    # This configuration works if scheduled is bind to the localhost:10251
1238    [input.prometheus::scheduler]
1239
1240    # disable prometheus scheduler metrics
1241    disabled = false
1242
1243    # override type
1244    type = kubernetes_prometheus
1245
1246    # specify Splunk index
1247    index =
1248
1249    # override host
1250    host = ${KUBERNETES_NODENAME}
1251
1252    # override source
1253    source = scheduler
1254
1255    # how often to collect prometheus metrics
1256    interval = 60s
1257
1258    # request timeout
1259    timeout = 60s
1260
1261    # prometheus endpoint
1262    endpoint.https = https://:10259/metrics
1263    endpoint.http = http://127.0.0.1:10251/metrics
1264
1265    # token for "Authorization: Bearer $(cat tokenPath)"
1266    tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
1267
1268    # server certificate for certificate validation
1269    certPath =
1270
1271    # client certificate for authentication
1272    clientCertPath =
1273
1274    # Allow invalid SSL server certificate
1275    insecure = true
1276
1277    # include metrics help with the events
1278    includeHelp = false
1279
1280    # set output (splunk or devnull, default is [general]defaultOutput)
1281    output =
1282
1283    whitelist.1 = ^kubernetes_build_info$
1284    whitelist.2 = ^scheduler_e2e_scheduling_duration_seconds_sum$
1285    whitelist.3 = ^scheduler_binding_duration_seconds_sum$
1286    whitelist.4 = ^scheduler_scheduling_algorithm_duration_seconds_sum$
1287    whitelist.5 = ^rest_client_request_duration_seconds_sum$
1288    whitelist.6 = ^rest_client_requests_total$
1289    whitelist.7 = ^process_cpu_seconds_total$
1290    whitelist.8 = ^process_resident_memory_bytes$
1291    whitelist.9 = ^process_virtual_memory_bytes$
1292
1293
1294    # This configuration works if controller-manager is bind to the localhost:10252
1295    [input.prometheus::controller-manager]
1296
1297    # disable prometheus controller-manager metrics
1298    disabled = false
1299
1300    # override type
1301    type = kubernetes_prometheus
1302
1303    # specify Splunk index
1304    index =
1305
1306    # override host
1307    host = ${KUBERNETES_NODENAME}
1308
1309    # override source
1310    source = controller-manager
1311
1312    # how often to collect prometheus metrics
1313    interval = 60s
1314
1315    # request timeout
1316    timeout = 60s
1317
1318    # prometheus endpoint
1319    endpoint.https = https://:10257/metrics
1320    endpoint.http = http://127.0.0.1:10252/metrics
1321
1322    # token for "Authorization: Bearer $(cat tokenPath)"
1323    tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
1324
1325    # server certificate for certificate validation
1326    certPath =
1327
1328    # client certificate for authentication
1329    clientCertPath =
1330
1331    # Allow invalid SSL server certificate
1332    insecure = true
1333
1334    # include metrics help with the events
1335    includeHelp = false
1336
1337    # set output (splunk or devnull, default is [general]defaultOutput)
1338    output =
1339
1340    whitelist.1 = ^kubernetes_build_info$
1341    whitelist.2 = ^node_collector_zone_size$
1342    whitelist.3 = ^node_collector_zone_health$
1343    whitelist.4 = ^node_collector_unhealthy_nodes_in_zone$
1344    whitelist.5 = ^rest_client_request_duration_seconds_sum$
1345    whitelist.6 = ^rest_client_requests_total$
1346    whitelist.7 = ^process_cpu_seconds_total$
1347    whitelist.8 = ^process_resident_memory_bytes$
1348    whitelist.9 = ^process_virtual_memory_bytes$
1349    
1350
1351    [input.prometheus::etcd]
1352
1353    # disable prometheus etcd metrics
1354    disabled = false
1355
1356    # override type
1357    type = kubernetes_prometheus
1358
1359    # specify Splunk index
1360    index =
1361
1362    # override host
1363    host = ${KUBERNETES_NODENAME}
1364
1365    # override source
1366    source = etcd
1367
1368    # how often to collect prometheus metricd
1369    interval = 60s
1370
1371    # request timeout
1372    timeout = 60s
1373
1374    # prometheus endpoint
1375    endpoint.http = http://:2379/metrics
1376    endpoint.https = https://:2379/metrics
1377
1378    # token for "Authorization: Bearer $(cat tokenPath)"
1379    tokenPath =
1380
1381    # server certificate for certificate validation
1382    certPath = /rootfs/etc/kubernetes/pki/etcd/ca.crt
1383
1384    # client certificate for authentication
1385    clientCertPath = /rootfs/etc/kubernetes/pki/apiserver-etcd-client.crt
1386    clientKeyPath = /rootfs/etc/kubernetes/pki/apiserver-etcd-client.key
1387
1388    # Allow invalid SSL server certificate
1389    insecure = true
1390
1391    # include metrics help with the events
1392    includeHelp = false
1393
1394    # set output (splunk or devnull, default is [general]defaultOutput)
1395    output =
1396
1397    whitelist.1 = ^etcd_server_leader_changes_seen_total$
1398    whitelist.2 = ^etcd_server_has_leader$
1399    whitelist.3 = ^etcd_server_proposals_committed_total$
1400    whitelist.4 = ^etcd_server_proposals_applied_total$
1401    whitelist.5 = ^etcd_server_proposals_committed_total$
1402    whitelist.6 = ^etcd_server_proposals_pending$
1403    whitelist.7 = ^etcd_server_proposals_failed_total$
1404    whitelist.8 = ^etcd_disk_wal_fsync_duration_seconds_sum$
1405    whitelist.9 = ^etcd_disk_wal_fsync_duration_seconds_count$
1406    whitelist.10 = ^etcd_disk_backend_commit_duration_seconds_sum$
1407    whitelist.11 = ^etcd_disk_backend_commit_duration_seconds_count$
1408    whitelist.12 = ^etcd_network_client_grpc_.*$
1409    whitelist.13 = ^grpc_server_handled_total$
1410    whitelist.14 = ^etcd_network_peer_round_trip_time_seconds_bucket$
1411    whitelist.15 = ^process_cpu_seconds_total$
1412    whitelist.16 = ^process_resident_memory_bytes$
1413    whitelist.17 = ^process_virtual_memory_bytes$
1414    whitelist.18 = ^process_open_fds$
1415    whitelist.19 = ^process_max_fds$
1416    whitelist.20 = ^etcd_disk_backend_commit_duration_seconds_bucket$
1417    whitelist.21 = ^etcd_disk_wal_fsync_duration_seconds_bucket$
1418
1419  004-addon.conf: |
1420    [general]
1421
1422    # addons can be run in parallel with agents
1423    addon = true
1424
1425    [input.kubernetes_events]
1426
1427    # disable events
1428    disabled = false
1429
1430    # override type
1431    type = kubernetes_events
1432
1433    # specify Splunk index
1434    index =
1435
1436    # set output (splunk or devnull, default is [general]defaultOutput)
1437    output =
1438
1439    # exclude managed fields from the metadata
1440    excludeManagedFields = true
1441
1442
1443    [input.kubernetes_watch::pods]
1444
1445    # disable events
1446    disabled = false
1447
1448    # Set the timeout for how often watch request should refresh the whole list
1449    refresh = 10m
1450
1451    apiVersion = v1
1452    kind = Pod
1453    namespace =
1454
1455    # override type
1456    type = kubernetes_objects
1457
1458    # specify Splunk index
1459    index =
1460
1461    # set output (splunk or devnull, default is [general]defaultOutput)
1462    output =
1463
1464    # exclude managed fields from the metadata
1465    excludeManagedFields = true
1466
1467    # you can remove or hash some values in the events (after modifyValues you can define path in the JSON object,
1468    # and the value can be hash:{hashFunction}, or remove to remove the object )
1469    ; modifyValues.object.data.* = hash:sha256
1470    ; modifyValues.object.metadata.annotations.* = remove
1471
1472    # You can exclude events by namespace with blacklist or whitelist only required namespaces
1473    # blacklist.kubernetes_namespace = ^namespace0$
1474    # whitelist.kubernetes_namespace = ^((namespace1)|(namespace2))$
1475
1476    [input.kubernetes_watch::resourcequota]
1477    # disable events
1478    disabled = false
1479
1480    # Set the timeout for how often watch request should refresh the whole list
1481    refresh = 10m
1482
1483    apiVersion = v1
1484    kind = ResourceQuota
1485    namespace =
1486
1487    # override type
1488    type = kubernetes_objects
1489
1490    # specify Splunk index
1491    index =
1492
1493    # set output (splunk or devnull, default is [general]defaultOutput)
1494    output =
1495
1496    # exclude managed fields from the metadata
1497    excludeManagedFields = true
1498
1499    [input.kubernetes_watch::nodes]
1500    # disable events
1501    disabled = false
1502
1503    # Set the timeout for how often watch request should refresh the whole list
1504    refresh = 10m
1505
1506    apiVersion = v1
1507    kind = Node
1508    namespace =
1509
1510    # override type
1511    type = kubernetes_objects
1512
1513    # specify Splunk index
1514    index =
1515
1516    # set output (splunk or devnull, default is [general]defaultOutput)
1517    output =
1518
1519    # exclude managed fields from the metadata
1520    excludeManagedFields = true
1521
1522---
1523apiVersion: apps/v1
1524kind: DaemonSet
1525metadata:
1526  name: collectorforkubernetes
1527  namespace: collectorforkubernetes
1528  labels:
1529    app: collectorforkubernetes
1530spec:
1531  # Default updateStrategy is OnDelete. For collectord RollingUpdate is suitable
1532  # When you update configuration
1533  updateStrategy:
1534    type: RollingUpdate
1535  selector:
1536    matchLabels:
1537      daemon: collectorforkubernetes
1538  template:
1539    metadata:
1540      name: collectorforkubernetes
1541      labels:
1542        daemon: collectorforkubernetes
1543    spec:
1544      priorityClassName: collectorforkubernetes-critical
1545      dnsPolicy: ClusterFirstWithHostNet
1546      hostNetwork: true
1547      serviceAccountName: collectorforkubernetes
1548      # We run this DaemonSet only for Non-Masters
1549      affinity:
1550        nodeAffinity:
1551          requiredDuringSchedulingIgnoredDuringExecution:
1552            nodeSelectorTerms:
1553            - matchExpressions:
1554              - key: node-role.kubernetes.io/control-plane
1555                operator: DoesNotExist
1556      tolerations:
1557      - operator: "Exists"
1558        effect: "NoSchedule"
1559      - operator: "Exists"
1560        effect: "NoExecute"
1561      containers:
1562      - name: collectorforkubernetes
1563        # Collectord version
1564        image: docker.io/outcoldsolutions/collectorforkubernetes:26.04.1
1565        imagePullPolicy: Always
1566        securityContext:
1567          runAsUser: 0
1568          privileged: true
1569        # Define your resources if you need. Defaults should be fine for most.
1570        # You can lower or increase based on your hosts.
1571        resources:
1572          limits:
1573            cpu: 2000m
1574            memory: 1024Mi
1575          requests:
1576            cpu: 500m
1577            memory: 256Mi
1578        env:
1579        - name: KUBERNETES_NODENAME
1580          valueFrom:
1581            fieldRef:
1582              fieldPath: spec.nodeName
1583        - name: POD_NAME
1584          valueFrom:
1585            fieldRef:
1586              fieldPath: metadata.name
1587        volumeMounts:
1588        # We store state in /data folder (file positions)
1589        - name: collectorforkubernetes-state
1590          mountPath: /data
1591        # Configuration file deployed with ConfigMap
1592        - name: collectorforkubernetes-config
1593          mountPath: /config/
1594          readOnly: true
1595        # Root filesystem to have access to logs and metrics
1596        - name: rootfs
1597          mountPath: /rootfs/
1598          readOnly: false
1599          mountPropagation: HostToContainer
1600        # correct timezone
1601        - name: localtime
1602          mountPath: /etc/localtime
1603          readOnly: true
1604      volumes:
1605      # We store state directly on host, change this location, if
1606      # your persistent volume is somewhere else
1607      - name: collectorforkubernetes-state
1608        hostPath:
1609          path: /var/lib/collectorforkubernetes/data/
1610          type: DirectoryOrCreate
1611      # Location of docker root (for container logs and metadata)
1612      - name: rootfs
1613        hostPath:
1614          path: /
1615      # correct timezone
1616      - name: localtime
1617        hostPath:
1618          path: /etc/localtime
1619      # configuration from ConfigMap
1620      - name: collectorforkubernetes-config
1621        configMap:
1622          name: collectorforkubernetes
1623          items:
1624          - key: 001-general.conf
1625            path: 001-general.conf
1626          - key: 002-daemonset.conf
1627            path: 002-daemonset.conf
1628---
1629apiVersion: apps/v1
1630kind: DaemonSet
1631metadata:
1632  name: collectorforkubernetes-master
1633  namespace: collectorforkubernetes
1634  labels:
1635    app: collectorforkubernetes
1636spec:
1637  updateStrategy:
1638    type: RollingUpdate
1639  selector:
1640    matchLabels:
1641      daemon: collectorforkubernetes
1642  template:
1643    metadata:
1644      name: collectorforkubernetes-master
1645      labels:
1646        daemon: collectorforkubernetes
1647    spec:
1648      priorityClassName: collectorforkubernetes-critical
1649      dnsPolicy: ClusterFirstWithHostNet
1650      hostNetwork: true
1651      serviceAccountName: collectorforkubernetes
1652      affinity:
1653        nodeAffinity:
1654          requiredDuringSchedulingIgnoredDuringExecution:
1655            nodeSelectorTerms:
1656            - matchExpressions:
1657              - key: node-role.kubernetes.io/control-plane
1658                operator: Exists
1659      tolerations:
1660      - operator: "Exists"
1661        effect: "NoSchedule"
1662      - operator: "Exists"
1663        effect: "NoExecute"
1664      containers:
1665      - name: collectorforkubernetes
1666        image: docker.io/outcoldsolutions/collectorforkubernetes:26.04.1
1667        imagePullPolicy: Always
1668        securityContext:
1669          runAsUser: 0
1670          privileged: true
1671        resources:
1672          limits:
1673            cpu: 2000m
1674            memory: 1024Mi
1675          requests:
1676            cpu: 500m
1677            memory: 256Mi
1678        env:
1679        - name: KUBERNETES_NODENAME
1680          valueFrom:
1681            fieldRef:
1682              fieldPath: spec.nodeName
1683        - name: POD_NAME
1684          valueFrom:
1685            fieldRef:
1686              fieldPath: metadata.name
1687        volumeMounts:
1688        - name: collectorforkubernetes-state
1689          mountPath: /data
1690        - name: collectorforkubernetes-config
1691          mountPath: /config/
1692          readOnly: true
1693        - name: rootfs
1694          mountPath: /rootfs/
1695          readOnly: false
1696          mountPropagation: HostToContainer
1697        - name: localtime
1698          mountPath: /etc/localtime
1699          readOnly: true
1700      volumes:
1701      - name: collectorforkubernetes-state
1702        hostPath:
1703          path: /var/lib/collectorforkubernetes/data/
1704          type: DirectoryOrCreate
1705      - name: rootfs
1706        hostPath:
1707          path: /
1708      - name: localtime
1709        hostPath:
1710          path: /etc/localtime
1711      - name: collectorforkubernetes-config
1712        configMap:
1713          name: collectorforkubernetes
1714          items:
1715          - key: 001-general.conf
1716            path: 001-general.conf
1717          - key: 002-daemonset.conf
1718            path: 002-daemonset.conf
1719          - key: 003-daemonset-master.conf
1720            path: 003-daemonset-master.conf
1721---
1722apiVersion: apps/v1
1723kind: Deployment
1724metadata:
1725  name: collectorforkubernetes-addon
1726  namespace: collectorforkubernetes
1727  labels:
1728    app: collectorforkubernetes
1729spec:
1730  replicas: 1
1731  selector:
1732    matchLabels:
1733      daemon: collectorforkubernetes
1734  template:
1735    metadata:
1736      name: collectorforkubernetes-addon
1737      labels:
1738        daemon: collectorforkubernetes
1739    spec:
1740      priorityClassName: collectorforkubernetes-critical
1741      serviceAccountName: collectorforkubernetes
1742      containers:
1743      - name: collectorforkubernetes
1744        image: docker.io/outcoldsolutions/collectorforkubernetes:26.04.1
1745        imagePullPolicy: Always
1746        securityContext:
1747          runAsUser: 0
1748          privileged: true
1749        resources:
1750          limits:
1751            cpu: 1000m
1752            memory: 512Mi
1753          requests:
1754            cpu: 200m
1755            memory: 64Mi
1756        env:
1757        - name: KUBERNETES_NODENAME
1758          valueFrom:
1759            fieldRef:
1760              fieldPath: spec.nodeName
1761        - name: POD_NAME
1762          valueFrom:
1763            fieldRef:
1764              fieldPath: metadata.name
1765        volumeMounts:
1766        - name: collectorforkubernetes-state
1767          mountPath: /data
1768        - name: collectorforkubernetes-config
1769          mountPath: /config/
1770          readOnly: true
1771      volumes:
1772      - name: collectorforkubernetes-state
1773        hostPath:
1774          path: /var/lib/collectorforkubernetes/data/
1775          type: Directory
1776      - name: collectorforkubernetes-config
1777        configMap:
1778          name: collectorforkubernetes
1779          items:
1780          - key: 001-general.conf
1781            path: 001-general.conf
1782          - key: 004-addon.conf
1783            path: 004-addon.conf

Monitoring Kubernetes

Configuration reference

Download

Link

CURL

WGET

collectorforkubernetes.yaml