Monitoring Kubernetes

Configuration reference

Download

collectorforkubernetes.yaml

CURL

bash
1curl -O https://www.outcoldsolutions.com/docs/monitoring-kubernetes/collectorforkubernetes.yaml

WGET

bash
1wget https://www.outcoldsolutions.com/docs/monitoring-kubernetes/collectorforkubernetes.yaml

collectorforkubernetes.yaml

   1apiVersion: v1
   2kind: Namespace
   3metadata:
   4  labels:
   5    app: collectorforkubernetes
   6  name: collectorforkubernetes
   7---
   8apiVersion: apiextensions.k8s.io/v1
   9kind: CustomResourceDefinition
  10metadata:
  11  name: configurations.collectord.io
  12spec:
  13  group: collectord.io
  14  versions:
  15    - name: v1
  16      served: true
  17      storage: true
  18      schema:
  19        openAPIV3Schema:
  20          type: object
  21          properties:
  22            spec:
  23              type: object
  24              additionalProperties: true
  25            force:
  26              type: boolean
  27  scope: Cluster
  28  names:
  29    listKind: ConfigurationList
  30    plural: configurations
  31    singular: configuration
  32    kind: Configuration
  33---
  34apiVersion: apiextensions.k8s.io/v1
  35kind: CustomResourceDefinition
  36metadata:
  37  name: splunkoutputs.collectord.io
  38spec:
  39  group: collectord.io
  40  versions:
  41    - name: v1
  42      served: true
  43      storage: true
  44      schema:
  45        openAPIV3Schema:
  46          type: object
  47          properties:
  48            spec:
  49              type: object
  50              properties:
  51                url:
  52                  type: string
  53                  format: uri
  54                insecure:
  55                  type: boolean
  56                token:
  57                  type: string
  58                  description: "Plain token"
  59                tokenFromSecret:
  60                  type: object
  61                  description: "Reference to a Kubernetes Secret"
  62                  properties:
  63                    secret:
  64                      type: string
  65                    key:
  66                      type: string
  67              oneOf:
  68                - required: ["token"]
  69                - required: ["tokenFromSecret"]
  70  scope: Namespaced
  71  names:
  72    listKind: SplunkOutputList
  73    plural: splunkoutputs
  74    singular: splunkoutput
  75    kind: SplunkOutput
  76---
  77apiVersion: v1
  78kind: ServiceAccount
  79metadata:
  80  labels:
  81    app: collectorforkubernetes
  82  name: collectorforkubernetes
  83  namespace: collectorforkubernetes
  84---
  85apiVersion: scheduling.k8s.io/v1
  86kind: PriorityClass
  87metadata:
  88  name: collectorforkubernetes-critical
  89value: 1000000000
  90---
  91apiVersion: rbac.authorization.k8s.io/v1
  92kind: ClusterRole
  93metadata:
  94  labels:
  95    app: collectorforkubernetes
  96  name: collectorforkubernetes
  97rules:
  98- apiGroups: ['extensions']
  99  resources: ['podsecuritypolicies']
 100  verbs:     ['use']
 101  resourceNames:
 102  - privileged
 103- apiGroups:
 104  - ""
 105  - apps
 106  - batch
 107  - extensions
 108  - rbac.authorization.k8s.io
 109  - collectord.io
 110  - discovery.k8s.io
 111  resources:
 112  - alertmanagers
 113  - clusterroles
 114  - configmaps
 115  - configurations
 116  - cronjobs
 117  - daemonsets
 118  - deployments
 119  - endpointslices
 120  - events
 121  - jobs
 122  - namespaces
 123  - nodes
 124  - nodes/metrics
 125  - nodes/proxy
 126  - persistentvolumeclaims
 127  - pods
 128  - replicasets
 129  - replicationcontrollers
 130  - resourcequotas
 131  - scheduledjobs
 132  - secrets
 133  - services
 134  - splunkoutputs
 135  - statefulsets
 136  verbs:
 137  - get
 138  - list
 139  - watch
 140- nonResourceURLs:
 141  - /metrics
 142  verbs:
 143  - get
 144  apiGroups: []
 145  resources: []
 146---
 147apiVersion: rbac.authorization.k8s.io/v1
 148kind: ClusterRoleBinding
 149metadata:
 150  labels:
 151    app: collectorforkubernetes
 152  name: collectorforkubernetes
 153roleRef:
 154  apiGroup: rbac.authorization.k8s.io
 155  kind: ClusterRole
 156  name: collectorforkubernetes
 157subjects:
 158  - kind: ServiceAccount
 159    name: collectorforkubernetes
 160    namespace: collectorforkubernetes
 161---
 162apiVersion: v1
 163kind: ConfigMap
 164metadata:
 165  name: collectorforkubernetes
 166  namespace: collectorforkubernetes
 167  labels:
 168    app: collectorforkubernetes
 169data:
 170  001-general.conf: |
 171    # The general configuration is used for all deployments
 172    #
 173    # Run collectord with the flag -conf and specify location of the configuration files.
 174    #
 175    # You can override all the values using environment variables with the format like
 176    #   COLLECTOR__<ANYNAME>=<section>__<key>=<value>
 177    # As an example you can set dataPath in [general] section as
 178    #   COLLECTOR__DATAPATH=general__dataPath=C:\\some\\path\\data.db
 179    # This parameter can be configured using -env-override, set it to empty string to disable this feature
 180
 181    [general]
 182
 183    # Please review license https://www.outcoldsolutions.com/legal/license-agreement/
 184    # and accept license by changing the value to *true*
 185    acceptLicense = false
 186
 187    # Location for the database
 188    # Collectord stores positions of the files and internal state
 189    dataPath = ./data/
 190
 191    # log level (accepted values are trace, debug, info, warn, error, fatal)
 192    logLevel = info
 193
 194    # http server gives access to two endpoints
 195    # /healthz
 196    # /metrics/json
 197    # /metrics/prometheus
 198    # httpServerBinding = 0.0.0.0:11888
 199    httpServerBinding =
 200
 201    # log requests to the http server
 202    httpServerLog = false
 203
 204    # telemetry report endpoint, set it to empty string to disable telemetry
 205    telemetryEndpoint = https://license.outcold.solutions/telemetry/
 206
 207    # license check endpoint
 208    licenseEndpoint = https://license.outcold.solutions/license/
 209
 210    # license server through proxy
 211    # This configuration is used only for the Outcold Solutions License Server
 212    # For license server running on-premises, use configuration under [license.client]
 213    licenseServerProxyUrl =
 214
 215    # authentication with basic authorization (user:password)
 216    # This configuration is used only for the Outcold Solutions License Server
 217    # For license server running on-premises, use configuration under [license.client]
 218    licenseServerProxyBasicAuth =
 219
 220    # license key
 221    license =
 222
 223    # Environment variable $KUBERNETES_NODENAME is used by default to setup hostname
 224    # Use value below to override specific name
 225    hostname =
 226
 227    # Default output for events, logs and metrics
 228    # valid values: splunk and devnull
 229    # Use devnull by default if you don't want to redirect data
 230    defaultOutput = splunk
 231
 232    # Default buffer size for file input
 233    fileInputBufferSize = 256b
 234
 235    # Maximum size of one line the file reader can read
 236    fileInputLineMaxSize = 1mb
 237
 238    # Include custom fields to attach to every event, in example below every event sent to Splunk will hav
 239    # indexed field my_environment=dev. Fields names should match to ^[a-z][_a-z0-9]*$
 240    # Better way to configure that is to specify labels for Kubernetes Nodes.
 241    # ; fields.my_environment = dev
 242    # Identify the cluster if you are planning to monitor multiple clusters
 243    fields.kubernetes_cluster = -
 244
 245    # Include EC2 Metadata (see list of possible fields https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html)
 246    # Should be in format ec2Metadata.{desired_field_name} = {url path to read the value}
 247    # ec2Metadata.ec2_instance_id = /latest/meta-data/instance-id
 248    # ec2Metadata.ec2_instance_type = /latest/meta-data/instance-type
 249
 250    # subdomain for the annotations added to the pods, workloads, namespaces or containers, like splunk.collectord.io/..
 251    annotationsSubdomain =
 252
 253    # configure global thruput per second for forwarded logs (metrics are not included)
 254    # for example if you set `thruputPerSecond = 512Kb`, that will limit amount of logs forwarded
 255    # from the single Collectord instance to 512Kb per second.
 256    # You can configure thruput individually for the logs (including specific for container logs) below
 257    thruputPerSecond =
 258
 259    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
 260    # older than 7 days
 261    tooOldEvents =
 262
 263    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
 264    tooNewEvents =
 265
 266    # For input.files::X and application logs, when glob or match are configured, Collectord can automatically
 267    # detect gzipped files and skip them (based on the extensions or magic numbers)
 268    autoSkipGzipFiles = true
 269    
 270    # Multi-output async publishing. When enabled (default), events routed to
 271    # non-default outputs are published asynchronously so that a slow or down
 272    # output does not block events destined for other outputs.
 273    ; multioutput.async = true
 274    # Buffer size for the async proxy (default 100). Absorbs transient bursts.
 275    # When this buffer and the output's own queue are both full, events are
 276    # dropped immediately without blocking the pipeline.
 277    ; multioutput.asyncBufferSize = 100
 278
 279    [license.client]
 280    # point to the license located on the HTTP web server, or a hosted by the Collectord running as license server
 281    url =
 282    # basic authentication for the HTTP server
 283    basicAuth =
 284    # if SSL, ignore the certificate verification
 285    insecure = false
 286    # CA Path for the Server certificate
 287    capath =
 288    # CA Name fot the Server certificate
 289    caname =
 290    # license server through proxy
 291    proxyUrl =
 292    # authentication with basic authorization (user:password)
 293    proxyBasicAuth =
 294
 295
 296    # forward internal collectord metrics
 297    [input.collectord_metrics]
 298
 299    # disable collectord internal metrics
 300    disabled = false
 301
 302    # override type
 303    type = kubernetes_prometheus
 304
 305    # how often to collect internal metrics
 306    interval = 1m
 307
 308    # set output (splunk or devnull, default is [general]defaultOutput)
 309    output =
 310
 311    # specify Splunk index
 312    index =
 313
 314    # whitelist or blacklist the metrics
 315    whitelist.1 = ^file_input_open$
 316    whitelist.2 = ^file_input_read_bytes$
 317    whitelist.3 = ^kubernetes_handlers$
 318    whitelist.4 = ^pipe$
 319    whitelist.5 = ^pipelines_num$
 320    whitelist.6 = ^splunk_post_bytes_sum.*$
 321    whitelist.7 = ^splunk_post_events_count_sum.*$
 322    whitelist.8 = ^splunk_post_failed_requests$
 323    whitelist.9 = ^splunk_post_message_max_lag_seconds_bucket.*$
 324    whitelist.10 = ^splunk_post_requests_seconds_sum.*$
 325    whitelist.11 = ^splunk_post_retries_required_sum.*$
 326
 327
 328    # connection to kubernetes api
 329    [general.kubernetes]
 330
 331    # Override service URL for Kubernetes (default is ${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT})
 332    serviceURL =
 333
 334    # Environment variable $KUBERNETES_NODENAME is used by default to setup nodeName
 335    # Use it only when you need to override it
 336    nodeName =
 337
 338    # Configuration to access the API server,
 339    # see https://kubernetes.io/docs/tasks/access-application-cluster/access-cluster/#accessing-the-api-from-a-pod
 340    # for details
 341    tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
 342    certPath = /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
 343
 344    # Default timeout for http responses. The streaming/watch requests depend on this timeout.
 345    timeout = 30m
 346
 347    # How long to keep the cache for the recent calls to API server (to limit number of calls when collectord discovers new pods)
 348    metadataTTL = 30s
 349
 350    # regex to find pods
 351    podsCgroupFilter = ^/([^/\s]+/)*kubepods(\.slice)?/((kubepods-)?(burstable|besteffort)(\.slice)?/)?([^/]*)pod([0-9a-f]{32}|[0-9a-f\-_]{36})(\.slice)?$
 352
 353    # regex to find containers in the pods
 354    containersCgroupFilter = ^/([^/\s]+/)*kubepods(\.slice)?/((kubepods-)?(burstable|besteffort)(\.slice)?/)?([^/]*)pod([0-9a-f]{32}|[0-9a-f\-_]{36})(\.slice)?/(docker-|crio-|cri-\w+-)?[0-9a-f]{64}(\.scope)?(\/.+)?$
 355
 356    # path to the kubelet root location (use it to discover application logs for emptyDir)
 357    # the expected format is `pods/{pod-id}/volumes/kubernetes.io~empty-dir/{volume-name}/_data/`
 358    volumesRootDir = /rootfs/var/lib/kubelet/
 359
 360    # You can attach annotations as a metadata, using the format
 361    #   includeAnnotations.{key} = {regexp}
 362    # For example if you want to include all annotations that starts with `prometheus.io` or `example.com` you can include
 363    # the following format:
 364    #   includeAnnotations.1 = ^prometheus\.io.*
 365    #   includeAnnotations.2 = ^example\.com.*
 366
 367    # You can exclude labels from metadata, using the format
 368    #   excludeLabels.{key} = {regexp}
 369    # For example if you want to exclude all labels that starts with `prometheus.io` or `example.com` you can include
 370    # the following format:
 371    #   excludeLabels.1 = ^prometheus\.io.*
 372    #   excludeLabels.2 = ^example\.com.*
 373
 374    # watch for changes (annotations) in the objects
 375    watch.namespaces = v1/namespace
 376    watch.deployments = apps/v1/deployment
 377    watch.configurations = collectord.io/v1/configuration
 378
 379    # Collectord can review the assigned ClusterRole and traverse metadata for the Pods only for the Owner objects
 380    # that are defined in the ClusterRole, ignoring anything else, it does not have access to.
 381    # This way Collectord does not generate 403 requests on API Server
 382    clusterRole = collectorforkubernetes
 383
 384    # Alternative of telling Collectord about the ClusterRole is to manually list the objects.
 385    # You can define which objects Collectord should traverse when it sees Owners.
 386    ; traverseOwnership.namespaces = v1/namespace
 387
 388    # Implementation of the watch protocol.
 389    # 0 - use the default implementation (2)
 390    # 1 - use the watch implementation that is optimized for the small number of objects (just issue one watch for all objects)
 391    # 2 - use the watch implementation that is optimized for the large number of objects (paginate through the list of objects and issue watch for the last resource version)
 392    watchImplementation = 2
 393
 394    # watch for pods annotations, setup prometheus collection
 395    # for these pods
 396    # Addon listens on Pod Network
 397    # DaemonSets listen on Host Network
 398    [input.prometheus_auto]
 399
 400    # disable prometheus auto discovery for pods
 401    disabled = false
 402
 403    # override type
 404    type = kubernetes_prometheus
 405
 406    # specify Splunk index
 407    index =
 408
 409    # how often to collect prometheus metrics
 410    interval = 60s
 411
 412    # include metrics help with the events
 413    includeHelp = true
 414
 415    # http client timeout
 416    timeout = 30s
 417
 418    # set output (splunk or devnull, default is [general]defaultOutput)
 419    output =
 420
 421    # Include an Authorization header for the prometheus scrapper
 422    # When configuring scrapping with collectord using annotations use prometheus.1-AuthorizationKey=key1
 423    # authorization.key1 = Bearer FOO
 424
 425
 426    # Splunk output
 427    [output.splunk]
 428
 429    # Splunk HTTP Event Collector url
 430    url =
 431    # You can specify muiltiple splunk URls with
 432    #
 433    # urls.0 = https://server1:8088/services/collector/event/1.0
 434    # urls.1 = https://server1:8088/services/collector/event/1.0
 435    # urls.2 = https://server1:8088/services/collector/event/1.0
 436    #
 437    # Limitations:
 438    # * The urls cannot have different path.
 439
 440    # Specify how URL should be picked up (in case if multiple is used)
 441    # urlSelection = random|round-robin|random-with-round-robin
 442    # where:
 443    # * random - choose random url on first selection and after each failure (connection or HTTP status code >= 500)
 444    # * round-robin - choose url starting from first one and bump on each failure (connection or HTTP status code >= 500)
 445    # * random-with-round-robin - choose random url on first selection and after that in round-robin on each
 446    #                             failure (connection or HTTP status code >= 500)
 447    urlSelection = random-with-round-robin
 448
 449    # Splunk HTTP Event Collector Token
 450    token =
 451
 452    # Allow invalid SSL server certificate
 453    insecure = false
 454    # minTLSVersion = TLSv1.2
 455    # maxTLSVersion = TLSv1.3
 456
 457    # Path to CA cerificate
 458    caPath =
 459
 460    # CA Name to verify
 461    caName =
 462
 463    # path for client certificate (if required)
 464    clientCertPath =
 465
 466    # path for client key (if required)
 467    clientKeyPath =
 468
 469    # Events are batched with the maximum size set by batchSize and staying in pipeline for not longer
 470    # than set by frequency
 471    frequency = 5s
 472    batchSize = 768K
 473    # limit by the number of events (0 value has no limit on the number of events)
 474    events = 50
 475
 476    # Splunk through proxy
 477    proxyUrl =
 478
 479    # authentication with basic authorization (user:password)
 480    proxyBasicAuth =
 481
 482    # Splunk acknowledgement url (.../services/collector/ack)
 483    ackUrl =
 484    # You can specify muiltiple splunk URls for ackUrl
 485    #
 486    # ackUrls.0 = https://server1:8088/services/collector/ack
 487    # ackUrls.1 = https://server1:8088/services/collector/ack
 488    # ackUrls.2 = https://server1:8088/services/collector/ack
 489    #
 490    # Make sure that they in the same order as urls for url, to make sure that this Splunk instance will be
 491    # able to acknowledge the payload.
 492    #
 493    # Limitations:
 494    # * The urls cannot have different path.
 495
 496    # Enable index acknowledgment
 497    ackEnabled = false
 498
 499    # Index acknowledgment timeout
 500    ackTimeout = 3m
 501
 502    # Timeout specifies a time limit for requests made by collectord.
 503    # The timeout includes connection time, any
 504    # redirects, and reading the response body.
 505    timeout = 30s
 506
 507    # in case when pipeline can post to multiple indexes, we want to avoid posibility of blocking
 508    # all pipelines, because just some events have incorrect index
 509    dedicatedClientPerIndex = true
 510
 511    # possible values: RedirectToDefault, Drop, Retry
 512    incorrectIndexBehavior = RedirectToDefault
 513
 514    # gzip compression level (nocompression, default, 1...9)
 515    compressionLevel = default
 516
 517    # number of dedicated splunk output threads (to increase throughput above 4k events per second)
 518    threads = 2
 519    # Default algorithm between threads is roundrobin, but you can change it to weighted
 520    ; threadsAlgorithm = weighted
 521
 522    # if you want to exclude some preindexed fields from events
 523    # excludeFields.kubernetes_pod_ip = true
 524
 525    # By default if there are no indexes defined on the message, Collectord sends the event without the index, and
 526    # Splunk HTTP Event Collector going to use the default index for the Token. You can change that, and tell Collectord
 527    # to ignore all events that don't have index defined explicitly
 528    ; requireExplicitIndex = true
 529
 530    # You can define if you want to truncate messages that are larger than 1M in length (or define your own size, like 256K)
 531    ; maximumMessageLength = 1M
 532
 533    # For messages generated from logs, include unique `event_id` in the event
 534    ; includeEventID = false
 535
 536    # Dedicated queue size for the output, default is 1024, larger queue sizes will require more memory,
 537    # but will allow to handle more events in case of network issues
 538    queueSize = 1024
 539
 540    # How many digits after the decimal point to keep for timestamps (0-9)
 541    # Defaults to 3 (milliseconds)
 542    # Change to 6 for microseconds
 543    # Change to 9 for nanoseconds
 544    ; timestampPrecision = 3
 545
 546  002-daemonset.conf: |
 547    # DaemonSet configuration is used for Nodes and Masters.
 548
 549    // connection to CRIO
 550    [general.cri-o]
 551
 552    # url for CRIO API, only unix socket is supported
 553    url = unix:///rootfs/var/run/crio/crio.sock
 554
 555    # Timeout for http responses to docker client. The streaming requests depend on this timeout.
 556    timeout = 1m
 557
 558
 559    [general.containerd]
 560    # Runtime can be on /rootfs/run/containerd (depends on the Linux distribution)
 561    runtimePath = /rootfs/var/run/containerd
 562    namespace = k8s.io
 563
 564
 565    # cgroup input
 566    [input.system_stats]
 567
 568    # disable system level stats
 569    disabled.host = false
 570    disabled.cgroup = false
 571
 572    # cgroups fs location
 573    pathCgroups = /rootfs/sys/fs/cgroup
 574
 575    # proc location
 576    pathProc = /rootfs/proc
 577
 578    # how often to collect cgroup stats
 579    statsInterval = 30s
 580
 581    # override type
 582    type.host = kubernetes_stats_v2_host
 583    type.cgroup = kubernetes_stats_v2_cgroup
 584
 585    # specify Splunk index
 586    index.host =
 587    index.cgroup =
 588
 589    # set output (splunk or devnull, default is [general]defaultOutput)
 590    output.host =
 591    output.cgroup =
 592
 593
 594    # proc input
 595    [input.proc_stats]
 596
 597    # disable proc level stats
 598    disabled = false
 599
 600    # proc location
 601    pathProc = /rootfs/proc
 602
 603    # how often to collect proc stats
 604    statsInterval = 30s
 605
 606    # override type
 607    type = kubernetes_proc_stats_v2
 608
 609    # specify Splunk index
 610    index.host =
 611    index.cgroup =
 612
 613    # proc filesystem includes by default system threads (there can be over 100 of them)
 614    # these stats do not help with the observability
 615    # excluding them can reduce the size of the index, performance of the searches and usage of the collector
 616    includeSystemThreads = false
 617
 618    # set output (splunk or devnull, default is [general]defaultOutput)
 619    output.host =
 620    output.cgroup =
 621
 622    # Hide arguments for the processes, replacing with HIDDEN_ARGS(NUMBER)
 623    hideArgs = false
 624
 625
 626    # network stats
 627    [input.net_stats]
 628
 629    # disable net stats
 630    disabled = false
 631
 632    # proc path location
 633    pathProc = /rootfs/proc
 634
 635    # how often to collect net stats
 636    statsInterval = 30s
 637
 638    # override type
 639    type = kubernetes_net_stats_v2
 640
 641    # specify Splunk index
 642    index.host =
 643    index.cgroup =
 644
 645    # set output (splunk or devnull, default is [general]defaultOutput)
 646    output.host =
 647    output.cgroup =
 648
 649
 650    # network socket table
 651    [input.net_socket_table]
 652
 653    # disable net stats
 654    disabled = false
 655
 656    # proc path location
 657    pathProc = /rootfs/proc
 658
 659    # how often to collect net stats
 660    statsInterval = 30s
 661
 662    # override type
 663    type = kubernetes_net_socket_table
 664
 665    # specify Splunk index
 666    index.host =
 667    index.cgroup =
 668
 669    # set output (splunk or devnull, default is [general]defaultOutput)
 670    output.host =
 671    output.cgroup =
 672
 673    # group connections by tcp_state, localAddr, remoteAddr (if localPort is not the port it is listening on)
 674    # that can significally reduces the amount of events
 675    group = true
 676
 677    # Collectord can watch for services, node, and pod IP addresses, and lookup the names
 678    # for the IP addresses. Keeping this enabled can add a significant load on the API Server, with large number of pods.
 679    disableLookup = false
 680
 681
 682    # mount input (collects mount stats where kubelet runtime is stored)
 683    [input.mount_stats]
 684
 685    # disable system level stats
 686    disabled = false
 687
 688    # how often to collect mount stats
 689    statsInterval = 30s
 690
 691    # override type
 692    type = kubernetes_mount_stats
 693
 694    # specify Splunk index
 695    index =
 696
 697    # set output (splunk or devnull, default is [general]defaultOutput)
 698    output =
 699
 700
 701    # diskstats input (collects /proc/diskstats)
 702    [input.disk_stats]
 703
 704    # disable system level stats
 705    disabled = false
 706
 707    # how often to collect mount stats
 708    statsInterval = 30s
 709
 710    # override type
 711    type = kubernetes_disk_stats
 712
 713    # specify Splunk index
 714    index =
 715
 716    # set output (splunk or devnull, default is [general]defaultOutput)
 717    output =
 718
 719
 720    # Container Log files
 721    [input.files]
 722
 723    # disable container logs monitoring
 724    disabled = false
 725
 726    # root location of docker log files
 727    # logs are expected in standard docker format like {containerID}/{containerID}-json.log
 728    # rotated files
 729    path = /rootfs/var/lib/docker/containers/
 730    # root location of CRI-O files
 731    # logs are expected in Kubernetes format, like {podID}/{containerName}/0.log
 732    crioPath = /rootfs/var/log/pods/
 733
 734    # (obsolete) glob matching pattern for log files
 735    # glob = */*-json.log*
 736
 737    # files are read using polling schema, when reach the EOF how often to check if files got updated
 738    pollingInterval = 250ms
 739
 740    # how often to look for the new files under logs path
 741    walkingInterval = 5s
 742
 743    # include verbose fields in events (file offset)
 744    verboseFields = false
 745
 746    # override type
 747    type = kubernetes_logs
 748
 749    # specify Splunk index
 750    index =
 751
 752    # docker splits events when they are larger than 10-100k (depends on the docker version)
 753    # we join them together by default and forward to Splunk as one event
 754    joinPartialEvents = true
 755
 756    # In case if your containers report messages with terminal colors or other escape sequences
 757    # you can enable strip for all the containers in one place.
 758    # Better is to enable it only for required container with the label collectord.io/strip-terminal-escape-sequences=true
 759    stripTerminalEscapeSequences = false
 760    # Regexp used for stripping terminal colors, it does not stip all the escape sequences
 761    # Read https://man7.org/linux/man-pages/man4/console_codes.4.html for more information
 762    stripTerminalEscapeSequencesRegex = (\x1b\[\d{1,3}(;\d{1,3})*m)|(\x07)|(\x1b]\d+(\s\d)?;[^\x07]+\x07)|(.*\x1b\[K)
 763
 764    # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded)
 765    samplingPercent = -1
 766
 767    # sampling key for hash based sampling (should be regexp with the named match pattern `key`)
 768    samplingKey =
 769
 770    # set output (splunk or devnull, default is [general]defaultOutput)
 771    output =
 772
 773    # configure default thruput per second for for each container log
 774    # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
 775    # from the single container to 128Kb per second.
 776    thruputPerSecond =
 777
 778    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
 779    # older than 7 days
 780    tooOldEvents =
 781
 782    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
 783    tooNewEvents =
 784
 785
 786    # Application Logs
 787    [input.app_logs]
 788
 789    # disable container application logs monitoring
 790    disabled = false
 791
 792    # root location of mounts (applies to hostPath mounts only), if the hostPath differs inside container from the path on host
 793    root = /rootfs/
 794
 795    # how often to review list of available volumes
 796    syncInterval = 5s
 797
 798    # glob matching pattern for log files
 799    glob = *.log*
 800
 801    # files are read using polling schema, when reach the EOF how often to check if files got updated
 802    pollingInterval = 250ms
 803
 804    # how often to look for the new files under logs path
 805    walkingInterval = 5s
 806
 807    # include verbose fields in events (file offset)
 808    verboseFields = false
 809
 810    # override type
 811    type = kubernetes_logs
 812
 813    # specify Splunk index
 814    index =
 815
 816    # we split files using new line character, with this configuration you can specify what defines the new event
 817    # after new line
 818    eventPatternRegex = ^[^\s]
 819    # Maximum interval of messages in pipeline
 820    eventPatternMaxInterval = 100ms
 821    # Maximum time to wait for the messages in pipeline
 822    eventPatternMaxWait = 1s
 823    # Maximum message size
 824    eventPatternMaxSize = 1MB
 825
 826    # set output (splunk or devnull, default is [general]defaultOutput)
 827    output =
 828
 829    # configure default thruput per second for for each container log
 830    # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
 831    # from the single container to 128Kb per second.
 832    thruputPerSecond =
 833
 834    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
 835    # older than 7 days
 836    tooOldEvents =
 837
 838    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
 839    tooNewEvents =
 840
 841    # Configure how long Collectord should keep the file descriptors open for files, that has not been forwarded yet
 842    # When using PVC, and if pipeline is lagging behind, Collectord holding open fd for files, can cause long termination
 843    # of pods, as kubelet cannot unmount the PVC volume from the system
 844    maxHoldAfterClose = 1800s
 845
 846
 847    # Host logs. Input syslog(.\d+)? files
 848    [input.files::syslog]
 849
 850    # disable host level logs
 851    disabled = false
 852
 853    # root location of docker files
 854    path = /rootfs/var/log/
 855
 856    # regex matching pattern
 857    match = ^(syslog|messages)(.\d+)?$
 858
 859    # limit search only on one level
 860    recursive = false
 861
 862    # files are read using polling schema, when reach the EOF how often to check if files got updated
 863    pollingInterval = 250ms
 864
 865    # how often o look for the new files under logs path
 866    walkingInterval = 5s
 867
 868    # include verbose fields in events (file offset)
 869    verboseFields = false
 870
 871    # override type
 872    type = kubernetes_host_logs
 873
 874    # specify Splunk index
 875    index =
 876
 877    # field extraction
 878    extraction = ^(?P<timestamp>[A-Za-z]+\s+\d+\s\d+:\d+:\d+)\s(?P<syslog_hostname>[^\s]+)\s(?P<syslog_component>[^:\[]+)(\[(?P<syslog_pid>\d+)\])?: (.+)$
 879    # extractionMessageField =
 880
 881    # timestamp field
 882    timestampField = timestamp
 883
 884    # format for timestamp
 885    # the layout defines the format by showing how the reference time, defined to be `Mon Jan 2 15:04:05 -0700 MST 2006`
 886    timestampFormat = Jan 2 15:04:05
 887
 888    # Adjust date, if month/day aren't set in format
 889    timestampSetMonth = false
 890    timestampSetDay = false
 891
 892    # timestamp location (if not defined by format)
 893    timestampLocation = Local
 894
 895    # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded)
 896    samplingPercent = -1
 897
 898    # sampling key for hash based sampling (should be regexp with the named match pattern `key`)
 899    samplingKey =
 900
 901    # set output (splunk or devnull, default is [general]defaultOutput)
 902    output =
 903
 904    # configure default thruput per second for this files group
 905    # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
 906    # from the files in this group to 128Kb per second.
 907    thruputPerSecond =
 908
 909    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
 910    # older than 7 days
 911    tooOldEvents =
 912
 913    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
 914    tooNewEvents =
 915
 916    # by default every new event should start from not space symbol
 917    eventPattern = ^[^\s]
 918
 919    # Blacklisting and whitelisting the logs
 920    # whitelist.0 = ^regexp$
 921    # blacklist.0 = ^regexp$
 922
 923
 924    # Host logs. Input all *.log(.\d+)? files
 925    [input.files::logs]
 926
 927    # disable host level logs
 928    disabled = false
 929
 930    # root location of log files
 931    path = /rootfs/var/log/
 932
 933    # regex matching pattern
 934    match = ^(([\w\-.]+\.log(.[\d\-]+)?)|(docker))$
 935
 936    # files are read using polling schema, when reach the EOF how often to check if files got updated
 937    pollingInterval = 250ms
 938
 939    # how often o look for the new files under logs path
 940    walkingInterval = 5s
 941
 942    # include verbose fields in events (file offset)
 943    verboseFields = false
 944
 945    # override type
 946    type = kubernetes_host_logs
 947
 948    # specify Splunk index
 949    index =
 950
 951    # field extraction
 952    extraction =
 953    extractionMessageField =
 954
 955    # timestamp field
 956    timestampField =
 957
 958    # format for timestamp
 959    # the layout defines the format by showing how the reference time, defined to be `Mon Jan 2 15:04:05 -0700 MST 2006`
 960    timestampFormat =
 961
 962    # timestamp location (if not defined by format)
 963    timestampLocation =
 964
 965    # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded)
 966    samplingPercent = -1
 967
 968    # sampling key for hash based sampling (should be regexp with the named match pattern `key`)
 969    samplingKey =
 970
 971    # set output (splunk or devnull, default is [general]defaultOutput)
 972    output =
 973
 974    # configure default thruput per second for this files group
 975    # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
 976    # from the files in this group to 128Kb per second.
 977    thruputPerSecond =
 978
 979    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
 980    # older than 7 days
 981    tooOldEvents =
 982
 983    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
 984    tooNewEvents =
 985
 986    # by default every new event should start from not space symbol
 987    eventPattern = ^[^\s]
 988
 989    # Blacklisting and whitelisting the logs
 990    # whitelist.0 = ^regexp$
 991    # blacklist.0 = ^regexp$
 992
 993
 994    [input.journald]
 995
 996    # disable host level logs
 997    disabled = false
 998
 999    # root location of log files
1000    path.persistent = /rootfs/var/log/journal/
1001    path.volatile = /rootfs/run/log/journal/
1002
1003    # when reach end of journald, how often to pull
1004    pollingInterval = 250ms
1005
1006    # if you don't want to forward journald from the beginning,
1007    # set the oldest event in relative value, like -14h or -30m or -30s (h/m/s supported)
1008    startFromRel =
1009
1010    # override type
1011    type = kubernetes_host_logs
1012
1013    # specify Splunk index
1014    index =
1015
1016    # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded)
1017    samplingPercent = -1
1018
1019    # sampling key (should be regexp with the named match pattern `key`)
1020    samplingKey =
1021
1022    # how often to reopen the journald to free old files
1023    reopenInterval = 1h
1024
1025    # set output (splunk or devnull, default is [general]defaultOutput)
1026    output =
1027
1028    # configure default thruput per second for journald
1029    # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
1030    # from the journald to 128Kb per second.
1031    thruputPerSecond =
1032
1033    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
1034    # older than 7 days
1035    tooOldEvents =
1036
1037    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
1038    tooNewEvents =
1039
1040    # by default every new event should start from not space symbol
1041    eventPattern = ^[^\s]
1042
1043    # Blacklisting and whitelisting the logs
1044    # whitelist.0 = ^regexp$
1045    # blacklist.0 = ^regexp$
1046
1047    # Move Journald logs reader to a separate process, to prevent process from crashing in case of corrupted log files
1048    spawnExternalProcess = false
1049
1050
1051    # Pipe to join events (container logs only)
1052    [pipe.join]
1053
1054    # disable joining event
1055    disabled = false
1056
1057    # Maximum interval of messages in pipeline
1058    maxInterval = 100ms
1059
1060    # Maximum time to wait for the messages in pipeline
1061    maxWait = 1s
1062
1063    # Maximum message size
1064    maxSize = 1MB
1065
1066    # Default pattern to indicate new message (should start not from space)
1067    patternRegex = ^[^\s]
1068
1069
1070    # (depricated, use annotations for settings up join rules)
1071    # Define special event join patterns for matched events
1072    # Section consist of [pipe.join::<name>]
1073    # [pipe.join::my_app]
1074    ## Set match pattern for the fields
1075    #; matchRegex.docker_container_image = my_app
1076    #; matchRegex.stream = stdout
1077    ## All events start from '[<digits>'
1078    #; patternRegex = ^\[\d+
1079
1080
1081    # You can configure global replace rules for the events, which can help to remove sensitive data
1082    # from logs before they are sent to Splunk. Those rules will be applied to all pipelines for container logs, host logs,
1083    # application logs and events.
1084    # In the following example we replace password=TEST with password=********
1085    ; [pipe.replace::name]
1086    ; patternRegex = (password=)([^\s]+)
1087    ; replace = $1********
1088    
1089    # You can configure global hash rules for the events, which can help to hide sensitive data
1090    # from logs before they are sent to outputs. Those rules will be applied to all pipelines for container logs, host logs,
1091    # application logs and events.
1092    # In the following example we hash IP addresses with fnv-1a-64
1093    ; [pipe.hash::name]
1094    ; match = (\d{1,3}\.){3}\d{1,3}'
1095    ; function = fnv-1a-64
1096
1097
1098    [input.prometheus::kubelet]
1099
1100    # disable prometheus kubelet metrics
1101    disabled = false
1102
1103    # override type
1104    type = kubernetes_prometheus
1105
1106    # specify Splunk index
1107    index =
1108
1109    # override host (environment variables are supported, by default Kubernetes node name is used)
1110    host = ${KUBERNETES_NODENAME}
1111
1112    # override source
1113    source = kubelet
1114
1115    # how often to collect prometheus metrics
1116    interval = 60s
1117
1118    # request timeout
1119    timeout = 60s
1120
1121    # Prometheus endpoint, multiple values can be specified, collectord tries them in order till finding the first
1122    # working endpoint.
1123    # At first trying to get it through proxy
1124    endpoint.1proxy = https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT}/api/v1/nodes/${KUBERNETES_NODENAME}/proxy/metrics
1125    # In case if cannot get it through proxy, trying localhost
1126    endpoint.2http = http://127.0.0.1:10255/metrics
1127
1128    # token for "Authorization: Bearer $(cat tokenPath)"
1129    tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
1130
1131    # server certificate for certificate validation
1132    certPath = /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
1133
1134    # client certificate for authentication
1135    clientCertPath =
1136
1137    # Allow invalid SSL server certificate
1138    insecure = true
1139
1140    # include metrics help with the events
1141    includeHelp = false
1142
1143    # set output (splunk or devnull, default is [general]defaultOutput)
1144    output =
1145
1146    whitelist.1 = ^kubernetes_build_info$
1147    whitelist.2 = ^kubelet_runtime_operations_duration_seconds_sum$
1148    whitelist.3 = ^kubelet_docker_operations_duration_seconds_sum$
1149    whitelist.4 = ^kubelet_network_plugin_operations_duration_seconds_sum$
1150    whitelist.5 = ^kubelet_cgroup_manager_duration_seconds_sum$
1151    whitelist.6 = ^storage_operation_duration_seconds_sum$
1152    whitelist.7 = ^kubelet_docker_operations_errors_total$
1153    whitelist.8 = ^kubelet_runtime_operations_errors_total$
1154    whitelist.9 = ^rest_client_requests_total$
1155    whitelist.10 = ^process_cpu_seconds_total$
1156    whitelist.11 = ^process_resident_memory_bytes$
1157    whitelist.12 = ^process_virtual_memory_bytes$
1158    whitelist.13 = ^rest_client_request_duration_seconds_sum$
1159    whitelist.14 = ^kubelet_volume_stats_.+$
1160    whitelist.15 = ^rest_client_requests_total$
1161    
1162
1163    ; # Collectord reports if entropy is low
1164    ; [diagnostics::node-entropy]
1165    ; settings.path = /rootfs/proc/sys/kernel/random/entropy_avail
1166    ; settings.interval = 1h
1167    ; settings.threshold = 800
1168
1169    # Collectord can report if node reboot is required
1170    [diagnostics::node-reboot-required]
1171    settings.path = /rootfs/var/run/reboot-required*
1172    settings.interval = 1h
1173
1174    # See https://www.kernel.org/doc/Documentation/admin-guide/hw-vuln/index.rst
1175    # And https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-system-cpu
1176    [diagnostics::cpu-vulnerabilities]
1177    settings.path = /rootfs/sys/devices/system/cpu/vulnerabilities/*
1178    settings.interval = 1h
1179
1180
1181  003-daemonset-master.conf: |
1182    [input.prometheus::kubernetes-api]
1183
1184    # disable prometheus kubernetes-api metrics
1185    disabled = false
1186
1187    # override type
1188    type = kubernetes_prometheus
1189
1190    # specify Splunk index
1191    index =
1192
1193    # override host (environment variables are supported, by default Kubernetes node name is used)
1194    host = ${KUBERNETES_NODENAME}
1195
1196    # override source
1197    source = kubernetes-api
1198
1199    # how often to collect prometheus metrics
1200    interval = 60s
1201
1202    # request timeout
1203    timeout = 60s
1204
1205    # prometheus endpoint
1206    # at first trying to get it from localhost (avoiding load balancer, if multiple api servers)
1207    endpoint.1localhost = https://127.0.0.1:6443/metrics
1208    # as fallback using proxy
1209    endpoint.2kubeapi = https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT}/metrics
1210
1211    # token for "Authorization: Bearer $(cat tokenPath)"
1212    tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
1213
1214    # server certificate for certificate validation
1215    certPath = /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
1216
1217    # client certificate for authentication
1218    clientCertPath =
1219
1220    # Allow invalid SSL server certificate
1221    insecure = true
1222
1223    # include metrics help with the events
1224    includeHelp = false
1225
1226    # set output (splunk or devnull, default is [general]defaultOutput)
1227    output =
1228
1229    whitelist.1 = ^kubernetes_build_info$
1230    whitelist.2 = ^authenticated_user_requests$
1231    whitelist.3 = ^apiserver_request_total$
1232    whitelist.4 = ^process_cpu_seconds_total$
1233    whitelist.5 = ^process_resident_memory_bytes$
1234    whitelist.6 = ^process_virtual_memory_bytes$
1235    whitelist.7 = ^rest_client_request_duration_seconds_sum$
1236    whitelist.8 = ^rest_client_requests_total$
1237
1238
1239    # This configuration works if scheduled is bind to the localhost:10251
1240    [input.prometheus::scheduler]
1241
1242    # disable prometheus scheduler metrics
1243    disabled = false
1244
1245    # override type
1246    type = kubernetes_prometheus
1247
1248    # specify Splunk index
1249    index =
1250
1251    # override host
1252    host = ${KUBERNETES_NODENAME}
1253
1254    # override source
1255    source = scheduler
1256
1257    # how often to collect prometheus metrics
1258    interval = 60s
1259
1260    # request timeout
1261    timeout = 60s
1262
1263    # prometheus endpoint
1264    endpoint.https = https://:10259/metrics
1265    endpoint.http = http://127.0.0.1:10251/metrics
1266
1267    # token for "Authorization: Bearer $(cat tokenPath)"
1268    tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
1269
1270    # server certificate for certificate validation
1271    certPath =
1272
1273    # client certificate for authentication
1274    clientCertPath =
1275
1276    # Allow invalid SSL server certificate
1277    insecure = true
1278
1279    # include metrics help with the events
1280    includeHelp = false
1281
1282    # set output (splunk or devnull, default is [general]defaultOutput)
1283    output =
1284
1285    whitelist.1 = ^kubernetes_build_info$
1286    whitelist.2 = ^scheduler_e2e_scheduling_duration_seconds_sum$
1287    whitelist.3 = ^scheduler_binding_duration_seconds_sum$
1288    whitelist.4 = ^scheduler_scheduling_algorithm_duration_seconds_sum$
1289    whitelist.5 = ^rest_client_request_duration_seconds_sum$
1290    whitelist.6 = ^rest_client_requests_total$
1291    whitelist.7 = ^process_cpu_seconds_total$
1292    whitelist.8 = ^process_resident_memory_bytes$
1293    whitelist.9 = ^process_virtual_memory_bytes$
1294
1295
1296    # This configuration works if controller-manager is bind to the localhost:10252
1297    [input.prometheus::controller-manager]
1298
1299    # disable prometheus controller-manager metrics
1300    disabled = false
1301
1302    # override type
1303    type = kubernetes_prometheus
1304
1305    # specify Splunk index
1306    index =
1307
1308    # override host
1309    host = ${KUBERNETES_NODENAME}
1310
1311    # override source
1312    source = controller-manager
1313
1314    # how often to collect prometheus metrics
1315    interval = 60s
1316
1317    # request timeout
1318    timeout = 60s
1319
1320    # prometheus endpoint
1321    endpoint.https = https://:10257/metrics
1322    endpoint.http = http://127.0.0.1:10252/metrics
1323
1324    # token for "Authorization: Bearer $(cat tokenPath)"
1325    tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
1326
1327    # server certificate for certificate validation
1328    certPath =
1329
1330    # client certificate for authentication
1331    clientCertPath =
1332
1333    # Allow invalid SSL server certificate
1334    insecure = true
1335
1336    # include metrics help with the events
1337    includeHelp = false
1338
1339    # set output (splunk or devnull, default is [general]defaultOutput)
1340    output =
1341
1342    whitelist.1 = ^kubernetes_build_info$
1343    whitelist.2 = ^node_collector_zone_size$
1344    whitelist.3 = ^node_collector_zone_health$
1345    whitelist.4 = ^node_collector_unhealthy_nodes_in_zone$
1346    whitelist.5 = ^rest_client_request_duration_seconds_sum$
1347    whitelist.6 = ^rest_client_requests_total$
1348    whitelist.7 = ^process_cpu_seconds_total$
1349    whitelist.8 = ^process_resident_memory_bytes$
1350    whitelist.9 = ^process_virtual_memory_bytes$
1351    
1352
1353    [input.prometheus::etcd]
1354
1355    # disable prometheus etcd metrics
1356    disabled = false
1357
1358    # override type
1359    type = kubernetes_prometheus
1360
1361    # specify Splunk index
1362    index =
1363
1364    # override host
1365    host = ${KUBERNETES_NODENAME}
1366
1367    # override source
1368    source = etcd
1369
1370    # how often to collect prometheus metricd
1371    interval = 60s
1372
1373    # request timeout
1374    timeout = 60s
1375
1376    # prometheus endpoint
1377    endpoint.http = http://:2379/metrics
1378    endpoint.https = https://:2379/metrics
1379
1380    # token for "Authorization: Bearer $(cat tokenPath)"
1381    tokenPath =
1382
1383    # server certificate for certificate validation
1384    certPath = /rootfs/etc/kubernetes/pki/etcd/ca.crt
1385
1386    # client certificate for authentication
1387    clientCertPath = /rootfs/etc/kubernetes/pki/apiserver-etcd-client.crt
1388    clientKeyPath = /rootfs/etc/kubernetes/pki/apiserver-etcd-client.key
1389
1390    # Allow invalid SSL server certificate
1391    insecure = true
1392
1393    # include metrics help with the events
1394    includeHelp = false
1395
1396    # set output (splunk or devnull, default is [general]defaultOutput)
1397    output =
1398
1399    whitelist.1 = ^etcd_server_leader_changes_seen_total$
1400    whitelist.2 = ^etcd_server_has_leader$
1401    whitelist.3 = ^etcd_server_proposals_committed_total$
1402    whitelist.4 = ^etcd_server_proposals_applied_total$
1403    whitelist.5 = ^etcd_server_proposals_committed_total$
1404    whitelist.6 = ^etcd_server_proposals_pending$
1405    whitelist.7 = ^etcd_server_proposals_failed_total$
1406    whitelist.8 = ^etcd_disk_wal_fsync_duration_seconds_sum$
1407    whitelist.9 = ^etcd_disk_wal_fsync_duration_seconds_count$
1408    whitelist.10 = ^etcd_disk_backend_commit_duration_seconds_sum$
1409    whitelist.11 = ^etcd_disk_backend_commit_duration_seconds_count$
1410    whitelist.12 = ^etcd_network_client_grpc_.*$
1411    whitelist.13 = ^grpc_server_handled_total$
1412    whitelist.14 = ^etcd_network_peer_round_trip_time_seconds_bucket$
1413    whitelist.15 = ^process_cpu_seconds_total$
1414    whitelist.16 = ^process_resident_memory_bytes$
1415    whitelist.17 = ^process_virtual_memory_bytes$
1416    whitelist.18 = ^process_open_fds$
1417    whitelist.19 = ^process_max_fds$
1418    whitelist.20 = ^etcd_disk_backend_commit_duration_seconds_bucket$
1419    whitelist.21 = ^etcd_disk_wal_fsync_duration_seconds_bucket$
1420
1421  004-addon.conf: |
1422    [general]
1423
1424    # addons can be run in parallel with agents
1425    addon = true
1426
1427    [input.kubernetes_events]
1428
1429    # disable events
1430    disabled = false
1431
1432    # override type
1433    type = kubernetes_events
1434
1435    # specify Splunk index
1436    index =
1437
1438    # set output (splunk or devnull, default is [general]defaultOutput)
1439    output =
1440
1441    # exclude managed fields from the metadata
1442    excludeManagedFields = true
1443
1444
1445    [input.kubernetes_watch::pods]
1446
1447    # disable events
1448    disabled = false
1449
1450    # Set the timeout for how often watch request should refresh the whole list
1451    refresh = 10m
1452
1453    apiVersion = v1
1454    kind = Pod
1455    namespace =
1456
1457    # override type
1458    type = kubernetes_objects
1459
1460    # specify Splunk index
1461    index =
1462
1463    # set output (splunk or devnull, default is [general]defaultOutput)
1464    output =
1465
1466    # exclude managed fields from the metadata
1467    excludeManagedFields = true
1468
1469    # you can remove or hash some values in the events (after modifyValues you can define path in the JSON object,
1470    # and the value can be hash:{hashFunction}, or remove to remove the object )
1471    ; modifyValues.object.data.* = hash:sha256
1472    ; modifyValues.object.metadata.annotations.* = remove
1473
1474    # You can exclude events by namespace with blacklist or whitelist only required namespaces
1475    # blacklist.kubernetes_namespace = ^namespace0$
1476    # whitelist.kubernetes_namespace = ^((namespace1)|(namespace2))$
1477
1478    [input.kubernetes_watch::resourcequota]
1479    # disable events
1480    disabled = false
1481
1482    # Set the timeout for how often watch request should refresh the whole list
1483    refresh = 10m
1484
1485    apiVersion = v1
1486    kind = ResourceQuota
1487    namespace =
1488
1489    # override type
1490    type = kubernetes_objects
1491
1492    # specify Splunk index
1493    index =
1494
1495    # set output (splunk or devnull, default is [general]defaultOutput)
1496    output =
1497
1498    # exclude managed fields from the metadata
1499    excludeManagedFields = true
1500
1501    [input.kubernetes_watch::nodes]
1502    # disable events
1503    disabled = false
1504
1505    # Set the timeout for how often watch request should refresh the whole list
1506    refresh = 10m
1507
1508    apiVersion = v1
1509    kind = Node
1510    namespace =
1511
1512    # override type
1513    type = kubernetes_objects
1514
1515    # specify Splunk index
1516    index =
1517
1518    # set output (splunk or devnull, default is [general]defaultOutput)
1519    output =
1520
1521    # exclude managed fields from the metadata
1522    excludeManagedFields = true
1523
1524---
1525apiVersion: apps/v1
1526kind: DaemonSet
1527metadata:
1528  name: collectorforkubernetes
1529  namespace: collectorforkubernetes
1530  labels:
1531    app: collectorforkubernetes
1532spec:
1533  # Default updateStrategy is OnDelete. For collectord RollingUpdate is suitable
1534  # When you update configuration
1535  updateStrategy:
1536    type: RollingUpdate
1537  selector:
1538    matchLabels:
1539      daemon: collectorforkubernetes
1540  template:
1541    metadata:
1542      name: collectorforkubernetes
1543      labels:
1544        daemon: collectorforkubernetes
1545    spec:
1546      priorityClassName: collectorforkubernetes-critical
1547      dnsPolicy: ClusterFirstWithHostNet
1548      hostNetwork: true
1549      serviceAccountName: collectorforkubernetes
1550      # We run this DaemonSet only for Non-Masters
1551      affinity:
1552        nodeAffinity:
1553          requiredDuringSchedulingIgnoredDuringExecution:
1554            nodeSelectorTerms:
1555            - matchExpressions:
1556              - key: node-role.kubernetes.io/control-plane
1557                operator: DoesNotExist
1558      tolerations:
1559      - operator: "Exists"
1560        effect: "NoSchedule"
1561      - operator: "Exists"
1562        effect: "NoExecute"
1563      containers:
1564      - name: collectorforkubernetes
1565        # Collectord version
1566        image: docker.io/outcoldsolutions/collectorforkubernetes:26.04.3
1567        imagePullPolicy: Always
1568        securityContext:
1569          runAsUser: 0
1570          privileged: true
1571        # Define your resources if you need. Defaults should be fine for most.
1572        # You can lower or increase based on your hosts.
1573        resources:
1574          limits:
1575            cpu: 2000m
1576            memory: 1024Mi
1577          requests:
1578            cpu: 500m
1579            memory: 256Mi
1580        env:
1581        - name: KUBERNETES_NODENAME
1582          valueFrom:
1583            fieldRef:
1584              fieldPath: spec.nodeName
1585        - name: POD_NAME
1586          valueFrom:
1587            fieldRef:
1588              fieldPath: metadata.name
1589        volumeMounts:
1590        # We store state in /data folder (file positions)
1591        - name: collectorforkubernetes-state
1592          mountPath: /data
1593        # Configuration file deployed with ConfigMap
1594        - name: collectorforkubernetes-config
1595          mountPath: /config/
1596          readOnly: true
1597        # Root filesystem to have access to logs and metrics
1598        - name: rootfs
1599          mountPath: /rootfs/
1600          readOnly: false
1601          mountPropagation: HostToContainer
1602        # correct timezone
1603        - name: localtime
1604          mountPath: /etc/localtime
1605          readOnly: true
1606      volumes:
1607      # We store state directly on host, change this location, if
1608      # your persistent volume is somewhere else
1609      - name: collectorforkubernetes-state
1610        hostPath:
1611          path: /var/lib/collectorforkubernetes/data/
1612          type: DirectoryOrCreate
1613      # Location of docker root (for container logs and metadata)
1614      - name: rootfs
1615        hostPath:
1616          path: /
1617      # correct timezone
1618      - name: localtime
1619        hostPath:
1620          path: /etc/localtime
1621      # configuration from ConfigMap
1622      - name: collectorforkubernetes-config
1623        configMap:
1624          name: collectorforkubernetes
1625          items:
1626          - key: 001-general.conf
1627            path: 001-general.conf
1628          - key: 002-daemonset.conf
1629            path: 002-daemonset.conf
1630---
1631apiVersion: apps/v1
1632kind: DaemonSet
1633metadata:
1634  name: collectorforkubernetes-master
1635  namespace: collectorforkubernetes
1636  labels:
1637    app: collectorforkubernetes
1638spec:
1639  updateStrategy:
1640    type: RollingUpdate
1641  selector:
1642    matchLabels:
1643      daemon: collectorforkubernetes
1644  template:
1645    metadata:
1646      name: collectorforkubernetes-master
1647      labels:
1648        daemon: collectorforkubernetes
1649    spec:
1650      priorityClassName: collectorforkubernetes-critical
1651      dnsPolicy: ClusterFirstWithHostNet
1652      hostNetwork: true
1653      serviceAccountName: collectorforkubernetes
1654      affinity:
1655        nodeAffinity:
1656          requiredDuringSchedulingIgnoredDuringExecution:
1657            nodeSelectorTerms:
1658            - matchExpressions:
1659              - key: node-role.kubernetes.io/control-plane
1660                operator: Exists
1661      tolerations:
1662      - operator: "Exists"
1663        effect: "NoSchedule"
1664      - operator: "Exists"
1665        effect: "NoExecute"
1666      containers:
1667      - name: collectorforkubernetes
1668        image: docker.io/outcoldsolutions/collectorforkubernetes:26.04.3
1669        imagePullPolicy: Always
1670        securityContext:
1671          runAsUser: 0
1672          privileged: true
1673        resources:
1674          limits:
1675            cpu: 2000m
1676            memory: 1024Mi
1677          requests:
1678            cpu: 500m
1679            memory: 256Mi
1680        env:
1681        - name: KUBERNETES_NODENAME
1682          valueFrom:
1683            fieldRef:
1684              fieldPath: spec.nodeName
1685        - name: POD_NAME
1686          valueFrom:
1687            fieldRef:
1688              fieldPath: metadata.name
1689        volumeMounts:
1690        - name: collectorforkubernetes-state
1691          mountPath: /data
1692        - name: collectorforkubernetes-config
1693          mountPath: /config/
1694          readOnly: true
1695        - name: rootfs
1696          mountPath: /rootfs/
1697          readOnly: false
1698          mountPropagation: HostToContainer
1699        - name: localtime
1700          mountPath: /etc/localtime
1701          readOnly: true
1702      volumes:
1703      - name: collectorforkubernetes-state
1704        hostPath:
1705          path: /var/lib/collectorforkubernetes/data/
1706          type: DirectoryOrCreate
1707      - name: rootfs
1708        hostPath:
1709          path: /
1710      - name: localtime
1711        hostPath:
1712          path: /etc/localtime
1713      - name: collectorforkubernetes-config
1714        configMap:
1715          name: collectorforkubernetes
1716          items:
1717          - key: 001-general.conf
1718            path: 001-general.conf
1719          - key: 002-daemonset.conf
1720            path: 002-daemonset.conf
1721          - key: 003-daemonset-master.conf
1722            path: 003-daemonset-master.conf
1723---
1724apiVersion: apps/v1
1725kind: Deployment
1726metadata:
1727  name: collectorforkubernetes-addon
1728  namespace: collectorforkubernetes
1729  labels:
1730    app: collectorforkubernetes
1731spec:
1732  replicas: 1
1733  selector:
1734    matchLabels:
1735      daemon: collectorforkubernetes
1736  template:
1737    metadata:
1738      name: collectorforkubernetes-addon
1739      labels:
1740        daemon: collectorforkubernetes
1741    spec:
1742      priorityClassName: collectorforkubernetes-critical
1743      serviceAccountName: collectorforkubernetes
1744      containers:
1745      - name: collectorforkubernetes
1746        image: docker.io/outcoldsolutions/collectorforkubernetes:26.04.3
1747        imagePullPolicy: Always
1748        securityContext:
1749          runAsUser: 0
1750          privileged: true
1751        resources:
1752          limits:
1753            cpu: 1000m
1754            memory: 512Mi
1755          requests:
1756            cpu: 200m
1757            memory: 64Mi
1758        env:
1759        - name: KUBERNETES_NODENAME
1760          valueFrom:
1761            fieldRef:
1762              fieldPath: spec.nodeName
1763        - name: POD_NAME
1764          valueFrom:
1765            fieldRef:
1766              fieldPath: metadata.name
1767        volumeMounts:
1768        - name: collectorforkubernetes-state
1769          mountPath: /data
1770        - name: collectorforkubernetes-config
1771          mountPath: /config/
1772          readOnly: true
1773      volumes:
1774      - name: collectorforkubernetes-state
1775        hostPath:
1776          path: /var/lib/collectorforkubernetes/data/
1777          type: Directory
1778      - name: collectorforkubernetes-config
1779        configMap:
1780          name: collectorforkubernetes
1781          items:
1782          - key: 001-general.conf
1783            path: 001-general.conf
1784          - key: 004-addon.conf
1785            path: 004-addon.conf