Collectord configuration

Download

collectorforkubernetes.yaml

CURL

1curl -O https://www.outcoldsolutions.com/docs/monitoring-kubernetes/collectorforkubernetes.yaml

WGET

1wget https://www.outcoldsolutions.com/docs/monitoring-kubernetes/collectorforkubernetes.yaml

collectorforkubernetes.yaml

   1apiVersion: v1
   2kind: Namespace
   3metadata:
   4  labels:
   5    app: collectorforkubernetes
   6  name: collectorforkubernetes
   7---
   8apiVersion: apiextensions.k8s.io/v1
   9kind: CustomResourceDefinition
  10metadata:
  11  name: configurations.collectord.io
  12spec:
  13  group: collectord.io
  14  versions:
  15    - name: v1
  16      served: true
  17      storage: true
  18      schema:
  19        openAPIV3Schema:
  20          type: object
  21          properties:
  22            spec:
  23              type: object
  24              additionalProperties: true
  25            force:
  26              type: boolean
  27  scope: Cluster
  28  names:
  29    listKind: ConfigurationList
  30    plural: configurations
  31    singular: configuration
  32    kind: Configuration
  33---
  34apiVersion: apiextensions.k8s.io/v1
  35kind: CustomResourceDefinition
  36metadata:
  37  name: splunkoutputs.collectord.io
  38spec:
  39  group: collectord.io
  40  versions:
  41    - name: v1
  42      served: true
  43      storage: true
  44      schema:
  45        openAPIV3Schema:
  46          type: object
  47          properties:
  48            spec:
  49              type: object
  50              properties:
  51                url:
  52                  type: string
  53                  format: uri
  54                insecure:
  55                  type: boolean
  56                token:
  57                  type: string
  58                  description: "Plain token"
  59                tokenFromSecret:
  60                  type: object
  61                  description: "Reference to a Kubernetes Secret"
  62                  properties:
  63                    secret:
  64                      type: string
  65                    key:
  66                      type: string
  67              oneOf:
  68                - required: ["token"]
  69                - required: ["tokenFromSecret"]
  70  scope: Namespaced
  71  names:
  72    listKind: SplunkOutputList
  73    plural: splunkoutputs
  74    singular: splunkoutput
  75    kind: SplunkOutput
  76---
  77apiVersion: v1
  78kind: ServiceAccount
  79metadata:
  80  labels:
  81    app: collectorforkubernetes
  82  name: collectorforkubernetes
  83  namespace: collectorforkubernetes
  84---
  85apiVersion: scheduling.k8s.io/v1
  86kind: PriorityClass
  87metadata:
  88  name: collectorforkubernetes-critical
  89value: 1000000000
  90---
  91apiVersion: rbac.authorization.k8s.io/v1
  92kind: ClusterRole
  93metadata:
  94  labels:
  95    app: collectorforkubernetes
  96  name: collectorforkubernetes
  97rules:
  98- apiGroups: ['extensions']
  99  resources: ['podsecuritypolicies']
 100  verbs:     ['use']
 101  resourceNames:
 102  - privileged
 103- apiGroups:
 104  - ""
 105  - apps
 106  - batch
 107  - extensions
 108  - collectord.io
 109  - rbac.authorization.k8s.io
 110  resources:
 111  - splunkoutputs
 112  - alertmanagers
 113  - cronjobs
 114  - daemonsets
 115  - deployments
 116  - endpoints
 117  - events
 118  - jobs
 119  - namespaces
 120  - nodes
 121  - nodes/metrics
 122  - nodes/proxy
 123  - pods
 124  - replicasets
 125  - replicationcontrollers
 126  - scheduledjobs
 127  - secrets
 128  - services
 129  - statefulsets
 130  - persistentvolumeclaims
 131  - configurations
 132  - resourcequotas
 133  - clusterroles
 134  verbs:
 135  - get
 136  - list
 137  - watch
 138- nonResourceURLs:
 139  - /metrics
 140  verbs:
 141  - get
 142  apiGroups: []
 143  resources: []
 144---
 145apiVersion: rbac.authorization.k8s.io/v1
 146kind: ClusterRoleBinding
 147metadata:
 148  labels:
 149    app: collectorforkubernetes
 150  name: collectorforkubernetes
 151roleRef:
 152  apiGroup: rbac.authorization.k8s.io
 153  kind: ClusterRole
 154  name: collectorforkubernetes
 155subjects:
 156  - kind: ServiceAccount
 157    name: collectorforkubernetes
 158    namespace: collectorforkubernetes
 159---
 160apiVersion: v1
 161kind: ConfigMap
 162metadata:
 163  name: collectorforkubernetes
 164  namespace: collectorforkubernetes
 165  labels:
 166    app: collectorforkubernetes
 167data:
 168  001-general.conf: |
 169    # The general configuration is used for all deployments
 170    #
 171    # Run collectord with the flag -conf and specify location of the configuration files.
 172    #
 173    # You can override all the values using environment variables with the format like
 174    #   COLLECTOR__<ANYNAME>=<section>__<key>=<value>
 175    # As an example you can set dataPath in [general] section as
 176    #   COLLECTOR__DATAPATH=general__dataPath=C:\\some\\path\\data.db
 177    # This parameter can be configured using -env-override, set it to empty string to disable this feature
 178
 179    [general]
 180
 181    # Please review license https://www.outcoldsolutions.com/docs/license-agreement/
 182    # and accept license by changing the value to *true*
 183    acceptLicense = false
 184
 185    # Location for the database
 186    # Collectord stores positions of the files and internal state
 187    dataPath = ./data/
 188
 189    # log level (accepted values are trace, debug, info, warn, error, fatal)
 190    logLevel = info
 191
 192    # http server gives access to two endpoints
 193    # /healthz
 194    # /metrics/json
 195    # /metrics/prometheus
 196    # httpServerBinding = 0.0.0.0:11888
 197    httpServerBinding =
 198
 199    # log requests to the http server
 200    httpServerLog = false
 201
 202    # telemetry report endpoint, set it to empty string to disable telemetry
 203    telemetryEndpoint = https://license.outcold.solutions/telemetry/
 204
 205    # license check endpoint
 206    licenseEndpoint = https://license.outcold.solutions/license/
 207
 208    # license server through proxy
 209    # This configuration is used only for the Outcold Solutions License Server
 210    # For license server running on-premises, use configuration under [license.client]
 211    licenseServerProxyUrl =
 212
 213    # authentication with basic authorization (user:password)
 214    # This configuration is used only for the Outcold Solutions License Server
 215    # For license server running on-premises, use configuration under [license.client]
 216    licenseServerProxyBasicAuth =
 217
 218    # license key
 219    license =
 220
 221    # Environment variable $KUBERNETES_NODENAME is used by default to setup hostname
 222    # Use value below to override specific name
 223    hostname =
 224
 225    # Default output for events, logs and metrics
 226    # valid values: splunk and devnull
 227    # Use devnull by default if you don't want to redirect data
 228    defaultOutput = splunk
 229
 230    # Default buffer size for file input
 231    fileInputBufferSize = 256b
 232
 233    # Maximum size of one line the file reader can read
 234    fileInputLineMaxSize = 1mb
 235
 236    # Include custom fields to attach to every event, in example below every event sent to Splunk will hav
 237    # indexed field my_environment=dev. Fields names should match to ^[a-z][_a-z0-9]*$
 238    # Better way to configure that is to specify labels for Kubernetes Nodes.
 239    # ; fields.my_environment = dev
 240    # Identify the cluster if you are planning to monitor multiple clusters
 241    fields.kubernetes_cluster = -
 242
 243    # Include EC2 Metadata (see list of possible fields https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html)
 244    # Should be in format ec2Metadata.{desired_field_name} = {url path to read the value}
 245    # ec2Metadata.ec2_instance_id = /latest/meta-data/instance-id
 246    # ec2Metadata.ec2_instance_type = /latest/meta-data/instance-type
 247
 248    # subdomain for the annotations added to the pods, workloads, namespaces or containers, like splunk.collectord.io/..
 249    annotationsSubdomain =
 250
 251    # configure global thruput per second for forwarded logs (metrics are not included)
 252    # for example if you set `thruputPerSecond = 512Kb`, that will limit amount of logs forwarded
 253    # from the single Collectord instance to 512Kb per second.
 254    # You can configure thruput individually for the logs (including specific for container logs) below
 255    thruputPerSecond =
 256
 257    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
 258    # older than 7 days
 259    tooOldEvents =
 260
 261    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
 262    tooNewEvents =
 263
 264    # For input.files::X and application logs, when glob or match are configured, Collectord can automatically
 265    # detect gzipped files and skip them (based on the extensions or magic numbers)
 266    autoSkipGzipFiles = true
 267
 268    [license.client]
 269    # point to the license located on the HTTP web server, or a hosted by the Collectord running as license server
 270    url =
 271    # basic authentication for the HTTP server
 272    basicAuth =
 273    # if SSL, ignore the certificate verification
 274    insecure = false
 275    # CA Path for the Server certificate
 276    capath =
 277    # CA Name fot the Server certificate
 278    caname =
 279    # license server through proxy
 280    proxyUrl =
 281    # authentication with basic authorization (user:password)
 282    proxyBasicAuth =
 283
 284
 285    # forward internal collectord metrics
 286    [input.collectord_metrics]
 287
 288    # disable collectord internal metrics
 289    disabled = false
 290
 291    # override type
 292    type = kubernetes_prometheus
 293
 294    # how often to collect internal metrics
 295    interval = 1m
 296
 297    # set output (splunk or devnull, default is [general]defaultOutput)
 298    output =
 299
 300    # specify Splunk index
 301    index =
 302
 303    # whitelist or blacklist the metrics
 304    whitelist.1 = ^file_input_open$
 305    whitelist.2 = ^file_input_read_bytes$
 306    whitelist.3 = ^kubernetes_handlers$
 307    whitelist.4 = ^pipe$
 308    whitelist.5 = ^pipelines_num$
 309    whitelist.6 = ^splunk_post_bytes_sum.*$
 310    whitelist.7 = ^splunk_post_events_count_sum.*$
 311    whitelist.8 = ^splunk_post_failed_requests$
 312    whitelist.9 = ^splunk_post_message_max_lag_seconds_bucket.*$
 313    whitelist.10 = ^splunk_post_requests_seconds_sum.*$
 314    whitelist.11 = ^splunk_post_retries_required_sum.*$
 315
 316
 317    # connection to kubernetes api
 318    [general.kubernetes]
 319
 320    # Override service URL for Kubernetes (default is ${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT})
 321    serviceURL =
 322
 323    # Environment variable $KUBERNETES_NODENAME is used by default to setup nodeName
 324    # Use it only when you need to override it
 325    nodeName =
 326
 327    # Configuration to access the API server,
 328    # see https://kubernetes.io/docs/tasks/access-application-cluster/access-cluster/#accessing-the-api-from-a-pod
 329    # for details
 330    tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
 331    certPath = /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
 332
 333    # Default timeout for http responses. The streaming/watch requests depend on this timeout.
 334    timeout = 30m
 335
 336    # How long to keep the cache for the recent calls to API server (to limit number of calls when collectord discovers new pods)
 337    metadataTTL = 30s
 338
 339    # regex to find pods
 340    podsCgroupFilter = ^/([^/\s]+/)*kubepods(\.slice)?/((kubepods-)?(burstable|besteffort)(\.slice)?/)?([^/]*)pod([0-9a-f]{32}|[0-9a-f\-_]{36})(\.slice)?$
 341
 342    # regex to find containers in the pods
 343    containersCgroupFilter = ^/([^/\s]+/)*kubepods(\.slice)?/((kubepods-)?(burstable|besteffort)(\.slice)?/)?([^/]*)pod([0-9a-f]{32}|[0-9a-f\-_]{36})(\.slice)?/(docker-|crio-|cri-\w+-)?[0-9a-f]{64}(\.scope)?(\/.+)?$
 344
 345    # path to the kubelet root location (use it to discover application logs for emptyDir)
 346    # the expected format is `pods/{pod-id}/volumes/kubernetes.io~empty-dir/{volume-name}/_data/`
 347    volumesRootDir = /rootfs/var/lib/kubelet/
 348
 349    # You can attach annotations as a metadata, using the format
 350    #   includeAnnotations.{key} = {regexp}
 351    # For example if you want to include all annotations that starts with `prometheus.io` or `example.com` you can include
 352    # the following format:
 353    #   includeAnnotations.1 = ^prometheus\.io.*
 354    #   includeAnnotations.2 = ^example\.com.*
 355
 356    # You can exclude labels from metadata, using the format
 357    #   excludeLabels.{key} = {regexp}
 358    # For example if you want to exclude all labels that starts with `prometheus.io` or `example.com` you can include
 359    # the following format:
 360    #   excludeLabels.1 = ^prometheus\.io.*
 361    #   excludeLabels.2 = ^example\.com.*
 362
 363    # watch for changes (annotations) in the objects
 364    watch.namespaces = v1/namespace
 365    watch.deployments = apps/v1/deployment
 366    watch.configurations = collectord.io/v1/configuration
 367
 368    # Collectord can review the assigned ClusterRole and traverse metadata for the Pods only for the Owner objects
 369    # that are defined in the ClusterRole, ignoring anything else, it does not have access to.
 370    # This way Collectord does not generate 403 requests on API Server
 371    clusterRole = collectorforkubernetes
 372
 373    # Alternative of telling Collectord about the ClusterRole is to manually list the objects.
 374    # You can define which objects Collectord should traverse when it sees Owners.
 375    ; traverseOwnership.namespaces = v1/namespace
 376
 377    # Implementation of the watch protocol.
 378    # 0 - use the default implementation (2)
 379    # 1 - use the watch implementation that is optimized for the small number of objects (just issue one watch for all objects)
 380    # 2 - use the watch implementation that is optimized for the large number of objects (paginate through the list of objects and issue watch for the last resource version)
 381    watchImplementation = 2
 382
 383    # watch for pods annotations, setup prometheus collection
 384    # for these pods
 385    # Addon listens on Pod Network
 386    # DaemonSets listen on Host Network
 387    [input.prometheus_auto]
 388
 389    # disable prometheus auto discovery for pods
 390    disabled = false
 391
 392    # override type
 393    type = kubernetes_prometheus
 394
 395    # specify Splunk index
 396    index =
 397
 398    # how often to collect prometheus metrics
 399    interval = 60s
 400
 401    # include metrics help with the events
 402    includeHelp = true
 403
 404    # http client timeout
 405    timeout = 30s
 406
 407    # set output (splunk or devnull, default is [general]defaultOutput)
 408    output =
 409
 410    # Include an Authorization header for the prometheus scrapper
 411    # When configuring scrapping with collectord using annotations use prometheus.1-AuthorizationKey=key1
 412    # authorization.key1 = Bearer FOO
 413
 414
 415    # Splunk output
 416    [output.splunk]
 417
 418    # Splunk HTTP Event Collector url
 419    url =
 420    # You can specify muiltiple splunk URls with
 421    #
 422    # urls.0 = https://server1:8088/services/collector/event/1.0
 423    # urls.1 = https://server1:8088/services/collector/event/1.0
 424    # urls.2 = https://server1:8088/services/collector/event/1.0
 425    #
 426    # Limitations:
 427    # * The urls cannot have different path.
 428
 429    # Specify how URL should be picked up (in case if multiple is used)
 430    # urlSelection = random|round-robin|random-with-round-robin
 431    # where:
 432    # * random - choose random url on first selection and after each failure (connection or HTTP status code >= 500)
 433    # * round-robin - choose url starting from first one and bump on each failure (connection or HTTP status code >= 500)
 434    # * random-with-round-robin - choose random url on first selection and after that in round-robin on each
 435    #                             failure (connection or HTTP status code >= 500)
 436    urlSelection = random-with-round-robin
 437
 438    # Splunk HTTP Event Collector Token
 439    token =
 440
 441    # Allow invalid SSL server certificate
 442    insecure = false
 443    # minTLSVersion = TLSv1.2
 444    # maxTLSVersion = TLSv1.3
 445
 446    # Path to CA cerificate
 447    caPath =
 448
 449    # CA Name to verify
 450    caName =
 451
 452    # path for client certificate (if required)
 453    clientCertPath =
 454
 455    # path for client key (if required)
 456    clientKeyPath =
 457
 458    # Events are batched with the maximum size set by batchSize and staying in pipeline for not longer
 459    # than set by frequency
 460    frequency = 5s
 461    batchSize = 768K
 462    # limit by the number of events (0 value has no limit on the number of events)
 463    events = 50
 464
 465    # Splunk through proxy
 466    proxyUrl =
 467
 468    # authentication with basic authorization (user:password)
 469    proxyBasicAuth =
 470
 471    # Splunk acknowledgement url (.../services/collector/ack)
 472    ackUrl =
 473    # You can specify muiltiple splunk URls for ackUrl
 474    #
 475    # ackUrls.0 = https://server1:8088/services/collector/ack
 476    # ackUrls.1 = https://server1:8088/services/collector/ack
 477    # ackUrls.2 = https://server1:8088/services/collector/ack
 478    #
 479    # Make sure that they in the same order as urls for url, to make sure that this Splunk instance will be
 480    # able to acknowledge the payload.
 481    #
 482    # Limitations:
 483    # * The urls cannot have different path.
 484
 485    # Enable index acknowledgment
 486    ackEnabled = false
 487
 488    # Index acknowledgment timeout
 489    ackTimeout = 3m
 490
 491    # Timeout specifies a time limit for requests made by collectord.
 492    # The timeout includes connection time, any
 493    # redirects, and reading the response body.
 494    timeout = 30s
 495
 496    # in case when pipeline can post to multiple indexes, we want to avoid posibility of blocking
 497    # all pipelines, because just some events have incorrect index
 498    dedicatedClientPerIndex = true
 499
 500    # possible values: RedirectToDefault, Drop, Retry
 501    incorrectIndexBehavior = RedirectToDefault
 502
 503    # gzip compression level (nocompression, default, 1...9)
 504    compressionLevel = default
 505
 506    # number of dedicated splunk output threads (to increase throughput above 4k events per second)
 507    threads = 2
 508    # Default algorithm between threads is roundrobin, but you can change it to weighted
 509    ; threadsAlgorithm = weighted
 510
 511    # if you want to exclude some preindexed fields from events
 512    # excludeFields.kubernetes_pod_ip = true
 513
 514    # By default if there are no indexes defined on the message, Collectord sends the event without the index, and
 515    # Splunk HTTP Event Collector going to use the default index for the Token. You can change that, and tell Collectord
 516    # to ignore all events that don't have index defined explicitly
 517    ; requireExplicitIndex = true
 518
 519    # You can define if you want to truncate messages that are larger than 1M in length (or define your own size, like 256K)
 520    ; maximumMessageLength = 1M
 521
 522    # For messages generated from logs, include unique `event_id` in the event
 523    ; includeEventID = false
 524
 525    # Dedicated queue size for the output, default is 1024, larger queue sizes will require more memory,
 526    # but will allow to handle more events in case of network issues
 527    queueSize = 1024
 528
 529    # How many digits after the decimal point to keep for timestamps (0-9)
 530    # Defaults to 3 (milliseconds)
 531    # Change to 6 for microseconds
 532    # Change to 9 for nanoseconds
 533    ; timestampPrecision = 3
 534
 535  002-daemonset.conf: |
 536    # DaemonSet configuration is used for Nodes and Masters.
 537
 538    // connection to CRIO
 539    [general.cri-o]
 540
 541    # url for CRIO API, only unix socket is supported
 542    url = unix:///rootfs/var/run/crio/crio.sock
 543
 544    # Timeout for http responses to docker client. The streaming requests depend on this timeout.
 545    timeout = 1m
 546
 547
 548    [general.containerd]
 549    # Runtime can be on /rootfs/run/containerd (depends on the Linux distribution)
 550    runtimePath = /rootfs/var/run/containerd
 551    namespace = k8s.io
 552
 553
 554    # cgroup input
 555    [input.system_stats]
 556
 557    # disable system level stats
 558    disabled.host = false
 559    disabled.cgroup = false
 560
 561    # cgroups fs location
 562    pathCgroups = /rootfs/sys/fs/cgroup
 563
 564    # proc location
 565    pathProc = /rootfs/proc
 566
 567    # how often to collect cgroup stats
 568    statsInterval = 30s
 569
 570    # override type
 571    type.host = kubernetes_stats_v2_host
 572    type.cgroup = kubernetes_stats_v2_cgroup
 573
 574    # specify Splunk index
 575    index.host =
 576    index.cgroup =
 577
 578    # set output (splunk or devnull, default is [general]defaultOutput)
 579    output.host =
 580    output.cgroup =
 581
 582
 583    # proc input
 584    [input.proc_stats]
 585
 586    # disable proc level stats
 587    disabled = false
 588
 589    # proc location
 590    pathProc = /rootfs/proc
 591
 592    # how often to collect proc stats
 593    statsInterval = 30s
 594
 595    # override type
 596    type = kubernetes_proc_stats_v2
 597
 598    # specify Splunk index
 599    index.host =
 600    index.cgroup =
 601
 602    # proc filesystem includes by default system threads (there can be over 100 of them)
 603    # these stats do not help with the observability
 604    # excluding them can reduce the size of the index, performance of the searches and usage of the collector
 605    includeSystemThreads = false
 606
 607    # set output (splunk or devnull, default is [general]defaultOutput)
 608    output.host =
 609    output.cgroup =
 610
 611    # Hide arguments for the processes, replacing with HIDDEN_ARGS(NUMBER)
 612    hideArgs = false
 613
 614
 615    # network stats
 616    [input.net_stats]
 617
 618    # disable net stats
 619    disabled = false
 620
 621    # proc path location
 622    pathProc = /rootfs/proc
 623
 624    # how often to collect net stats
 625    statsInterval = 30s
 626
 627    # override type
 628    type = kubernetes_net_stats_v2
 629
 630    # specify Splunk index
 631    index.host =
 632    index.cgroup =
 633
 634    # set output (splunk or devnull, default is [general]defaultOutput)
 635    output.host =
 636    output.cgroup =
 637
 638
 639    # network socket table
 640    [input.net_socket_table]
 641
 642    # disable net stats
 643    disabled = false
 644
 645    # proc path location
 646    pathProc = /rootfs/proc
 647
 648    # how often to collect net stats
 649    statsInterval = 30s
 650
 651    # override type
 652    type = kubernetes_net_socket_table
 653
 654    # specify Splunk index
 655    index.host =
 656    index.cgroup =
 657
 658    # set output (splunk or devnull, default is [general]defaultOutput)
 659    output.host =
 660    output.cgroup =
 661
 662    # group connections by tcp_state, localAddr, remoteAddr (if localPort is not the port it is listening on)
 663    # that can significally reduces the amount of events
 664    group = true
 665
 666    # Collectord can watch for services, node, and pod IP addresses, and lookup the names
 667    # for the IP addresses. Keeping this enabled can add a significant load on the API Server, with large number of pods.
 668    disableLookup = false
 669
 670
 671    # mount input (collects mount stats where kubelet runtime is stored)
 672    [input.mount_stats]
 673
 674    # disable system level stats
 675    disabled = false
 676
 677    # how often to collect mount stats
 678    statsInterval = 30s
 679
 680    # override type
 681    type = kubernetes_mount_stats
 682
 683    # specify Splunk index
 684    index =
 685
 686    # set output (splunk or devnull, default is [general]defaultOutput)
 687    output =
 688
 689
 690    # diskstats input (collects /proc/diskstats)
 691    [input.disk_stats]
 692
 693    # disable system level stats
 694    disabled = false
 695
 696    # how often to collect mount stats
 697    statsInterval = 30s
 698
 699    # override type
 700    type = kubernetes_disk_stats
 701
 702    # specify Splunk index
 703    index =
 704
 705    # set output (splunk or devnull, default is [general]defaultOutput)
 706    output =
 707
 708
 709    # Container Log files
 710    [input.files]
 711
 712    # disable container logs monitoring
 713    disabled = false
 714
 715    # root location of docker log files
 716    # logs are expected in standard docker format like {containerID}/{containerID}-json.log
 717    # rotated files
 718    path = /rootfs/var/lib/docker/containers/
 719    # root location of CRI-O files
 720    # logs are expected in Kubernetes format, like {podID}/{containerName}/0.log
 721    crioPath = /rootfs/var/log/pods/
 722
 723    # (obsolete) glob matching pattern for log files
 724    # glob = */*-json.log*
 725
 726    # files are read using polling schema, when reach the EOF how often to check if files got updated
 727    pollingInterval = 250ms
 728
 729    # how often to look for the new files under logs path
 730    walkingInterval = 5s
 731
 732    # include verbose fields in events (file offset)
 733    verboseFields = false
 734
 735    # override type
 736    type = kubernetes_logs
 737
 738    # specify Splunk index
 739    index =
 740
 741    # docker splits events when they are larger than 10-100k (depends on the docker version)
 742    # we join them together by default and forward to Splunk as one event
 743    joinPartialEvents = true
 744
 745    # In case if your containers report messages with terminal colors or other escape sequences
 746    # you can enable strip for all the containers in one place.
 747    # Better is to enable it only for required container with the label collectord.io/strip-terminal-escape-sequences=true
 748    stripTerminalEscapeSequences = false
 749    # Regexp used for stripping terminal colors, it does not stip all the escape sequences
 750    # Read http://man7.org/linux/man-pages/man4/console_codes.4.html for more information
 751    stripTerminalEscapeSequencesRegex = (\x1b\[\d{1,3}(;\d{1,3})*m)|(\x07)|(\x1b]\d+(\s\d)?;[^\x07]+\x07)|(.*\x1b\[K)
 752
 753    # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded)
 754    samplingPercent = -1
 755
 756    # sampling key for hash based sampling (should be regexp with the named match pattern `key`)
 757    samplingKey =
 758
 759    # set output (splunk or devnull, default is [general]defaultOutput)
 760    output =
 761
 762    # configure default thruput per second for for each container log
 763    # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
 764    # from the single container to 128Kb per second.
 765    thruputPerSecond =
 766
 767    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
 768    # older than 7 days
 769    tooOldEvents =
 770
 771    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
 772    tooNewEvents =
 773
 774
 775    # Application Logs
 776    [input.app_logs]
 777
 778    # disable container application logs monitoring
 779    disabled = false
 780
 781    # root location of mounts (applies to hostPath mounts only), if the hostPath differs inside container from the path on host
 782    root = /rootfs/
 783
 784    # how often to review list of available volumes
 785    syncInterval = 5s
 786
 787    # glob matching pattern for log files
 788    glob = *.log*
 789
 790    # files are read using polling schema, when reach the EOF how often to check if files got updated
 791    pollingInterval = 250ms
 792
 793    # how often to look for the new files under logs path
 794    walkingInterval = 5s
 795
 796    # include verbose fields in events (file offset)
 797    verboseFields = false
 798
 799    # override type
 800    type = kubernetes_logs
 801
 802    # specify Splunk index
 803    index =
 804
 805    # we split files using new line character, with this configuration you can specify what defines the new event
 806    # after new line
 807    eventPatternRegex = ^[^\s]
 808    # Maximum interval of messages in pipeline
 809    eventPatternMaxInterval = 100ms
 810    # Maximum time to wait for the messages in pipeline
 811    eventPatternMaxWait = 1s
 812    # Maximum message size
 813    eventPatternMaxSize = 1MB
 814
 815    # set output (splunk or devnull, default is [general]defaultOutput)
 816    output =
 817
 818    # configure default thruput per second for for each container log
 819    # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
 820    # from the single container to 128Kb per second.
 821    thruputPerSecond =
 822
 823    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
 824    # older than 7 days
 825    tooOldEvents =
 826
 827    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
 828    tooNewEvents =
 829
 830    # Configure how long Collectord should keep the file descriptors open for files, that has not been forwarded yet
 831    # When using PVC, and if pipeline is lagging behind, Collectord holding open fd for files, can cause long termination
 832    # of pods, as kubelet cannot unmount the PVC volume from the system
 833    maxHoldAfterClose = 1800s
 834
 835
 836    # Host logs. Input syslog(.\d+)? files
 837    [input.files::syslog]
 838
 839    # disable host level logs
 840    disabled = false
 841
 842    # root location of docker files
 843    path = /rootfs/var/log/
 844
 845    # regex matching pattern
 846    match = ^(syslog|messages)(.\d+)?$
 847
 848    # limit search only on one level
 849    recursive = false
 850
 851    # files are read using polling schema, when reach the EOF how often to check if files got updated
 852    pollingInterval = 250ms
 853
 854    # how often o look for the new files under logs path
 855    walkingInterval = 5s
 856
 857    # include verbose fields in events (file offset)
 858    verboseFields = false
 859
 860    # override type
 861    type = kubernetes_host_logs
 862
 863    # specify Splunk index
 864    index =
 865
 866    # field extraction
 867    extraction = ^(?P<timestamp>[A-Za-z]+\s+\d+\s\d+:\d+:\d+)\s(?P<syslog_hostname>[^\s]+)\s(?P<syslog_component>[^:\[]+)(\[(?P<syslog_pid>\d+)\])?: (.+)$
 868    # extractionMessageField =
 869
 870    # timestamp field
 871    timestampField = timestamp
 872
 873    # format for timestamp
 874    # the layout defines the format by showing how the reference time, defined to be `Mon Jan 2 15:04:05 -0700 MST 2006`
 875    timestampFormat = Jan 2 15:04:05
 876
 877    # Adjust date, if month/day aren't set in format
 878    timestampSetMonth = false
 879    timestampSetDay = false
 880
 881    # timestamp location (if not defined by format)
 882    timestampLocation = Local
 883
 884    # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded)
 885    samplingPercent = -1
 886
 887    # sampling key for hash based sampling (should be regexp with the named match pattern `key`)
 888    samplingKey =
 889
 890    # set output (splunk or devnull, default is [general]defaultOutput)
 891    output =
 892
 893    # configure default thruput per second for this files group
 894    # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
 895    # from the files in this group to 128Kb per second.
 896    thruputPerSecond =
 897
 898    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
 899    # older than 7 days
 900    tooOldEvents =
 901
 902    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
 903    tooNewEvents =
 904
 905    # by default every new event should start from not space symbol
 906    eventPattern = ^[^\s]
 907
 908    # Blacklisting and whitelisting the logs
 909    # whitelist.0 = ^regexp$
 910    # blacklist.0 = ^regexp$
 911
 912
 913    # Host logs. Input all *.log(.\d+)? files
 914    [input.files::logs]
 915
 916    # disable host level logs
 917    disabled = false
 918
 919    # root location of log files
 920    path = /rootfs/var/log/
 921
 922    # regex matching pattern
 923    match = ^(([\w\-.]+\.log(.[\d\-]+)?)|(docker))$
 924
 925    # files are read using polling schema, when reach the EOF how often to check if files got updated
 926    pollingInterval = 250ms
 927
 928    # how often o look for the new files under logs path
 929    walkingInterval = 5s
 930
 931    # include verbose fields in events (file offset)
 932    verboseFields = false
 933
 934    # override type
 935    type = kubernetes_host_logs
 936
 937    # specify Splunk index
 938    index =
 939
 940    # field extraction
 941    extraction =
 942    extractionMessageField =
 943
 944    # timestamp field
 945    timestampField =
 946
 947    # format for timestamp
 948    # the layout defines the format by showing how the reference time, defined to be `Mon Jan 2 15:04:05 -0700 MST 2006`
 949    timestampFormat =
 950
 951    # timestamp location (if not defined by format)
 952    timestampLocation =
 953
 954    # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded)
 955    samplingPercent = -1
 956
 957    # sampling key for hash based sampling (should be regexp with the named match pattern `key`)
 958    samplingKey =
 959
 960    # set output (splunk or devnull, default is [general]defaultOutput)
 961    output =
 962
 963    # configure default thruput per second for this files group
 964    # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
 965    # from the files in this group to 128Kb per second.
 966    thruputPerSecond =
 967
 968    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
 969    # older than 7 days
 970    tooOldEvents =
 971
 972    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
 973    tooNewEvents =
 974
 975    # by default every new event should start from not space symbol
 976    eventPattern = ^[^\s]
 977
 978    # Blacklisting and whitelisting the logs
 979    # whitelist.0 = ^regexp$
 980    # blacklist.0 = ^regexp$
 981
 982
 983    [input.journald]
 984
 985    # disable host level logs
 986    disabled = false
 987
 988    # root location of log files
 989    path.persistent = /rootfs/var/log/journal/
 990    path.volatile = /rootfs/run/log/journal/
 991
 992    # when reach end of journald, how often to pull
 993    pollingInterval = 250ms
 994
 995    # if you don't want to forward journald from the beginning,
 996    # set the oldest event in relative value, like -14h or -30m or -30s (h/m/s supported)
 997    startFromRel =
 998
 999    # override type
1000    type = kubernetes_host_logs
1001
1002    # specify Splunk index
1003    index =
1004
1005    # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded)
1006    samplingPercent = -1
1007
1008    # sampling key (should be regexp with the named match pattern `key`)
1009    samplingKey =
1010
1011    # how often to reopen the journald to free old files
1012    reopenInterval = 1h
1013
1014    # set output (splunk or devnull, default is [general]defaultOutput)
1015    output =
1016
1017    # configure default thruput per second for journald
1018    # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
1019    # from the journald to 128Kb per second.
1020    thruputPerSecond =
1021
1022    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
1023    # older than 7 days
1024    tooOldEvents =
1025
1026    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
1027    tooNewEvents =
1028
1029    # by default every new event should start from not space symbol
1030    eventPattern = ^[^\s]
1031
1032    # Blacklisting and whitelisting the logs
1033    # whitelist.0 = ^regexp$
1034    # blacklist.0 = ^regexp$
1035
1036    # Move Journald logs reader to a separate process, to prevent process from crashing in case of corrupted log files
1037    spawnExternalProcess = false
1038
1039
1040    # Pipe to join events (container logs only)
1041    [pipe.join]
1042
1043    # disable joining event
1044    disabled = false
1045
1046    # Maximum interval of messages in pipeline
1047    maxInterval = 100ms
1048
1049    # Maximum time to wait for the messages in pipeline
1050    maxWait = 1s
1051
1052    # Maximum message size
1053    maxSize = 1MB
1054
1055    # Default pattern to indicate new message (should start not from space)
1056    patternRegex = ^[^\s]
1057
1058
1059    # (depricated, use annotations for settings up join rules)
1060    # Define special event join patterns for matched events
1061    # Section consist of [pipe.join::<name>]
1062    # [pipe.join::my_app]
1063    ## Set match pattern for the fields
1064    #; matchRegex.docker_container_image = my_app
1065    #; matchRegex.stream = stdout
1066    ## All events start from '[<digits>'
1067    #; patternRegex = ^\[\d+
1068
1069
1070    # You can configure global replace rules for the events, which can help to remove sensitive data
1071    # from logs before they are sent to Splunk. Those rules will be applied to all pipelines for container logs, host logs,
1072    # application logs and events.
1073    # In the following example we replace password=TEST with password=********
1074    ; [pipe.replace::name]
1075    ; patternRegex = (password=)([^\s]+)
1076    ; replace = $1********
1077    
1078    # You can configure global hash rules for the events, which can help to hide sensitive data
1079    # from logs before they are sent to outputs. Those rules will be applied to all pipelines for container logs, host logs,
1080    # application logs and events.
1081    # In the following example we hash IP addresses with fnv-1a-64
1082    ; [pipe.hash::name]
1083    ; match = (\d{1,3}\.){3}\d{1,3}'
1084    ; function = fnv-1a-64
1085
1086
1087    [input.prometheus::kubelet]
1088
1089    # disable prometheus kubelet metrics
1090    disabled = false
1091
1092    # override type
1093    type = kubernetes_prometheus
1094
1095    # specify Splunk index
1096    index =
1097
1098    # override host (environment variables are supported, by default Kubernetes node name is used)
1099    host = ${KUBERNETES_NODENAME}
1100
1101    # override source
1102    source = kubelet
1103
1104    # how often to collect prometheus metrics
1105    interval = 60s
1106
1107    # request timeout
1108    timeout = 60s
1109
1110    # Prometheus endpoint, multiple values can be specified, collectord tries them in order till finding the first
1111    # working endpoint.
1112    # At first trying to get it through proxy
1113    endpoint.1proxy = https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT}/api/v1/nodes/${KUBERNETES_NODENAME}/proxy/metrics
1114    # In case if cannot get it through proxy, trying localhost
1115    endpoint.2http = http://127.0.0.1:10255/metrics
1116
1117    # token for "Authorization: Bearer $(cat tokenPath)"
1118    tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
1119
1120    # server certificate for certificate validation
1121    certPath = /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
1122
1123    # client certificate for authentication
1124    clientCertPath =
1125
1126    # Allow invalid SSL server certificate
1127    insecure = true
1128
1129    # include metrics help with the events
1130    includeHelp = false
1131
1132    # set output (splunk or devnull, default is [general]defaultOutput)
1133    output =
1134
1135    whitelist.1 = ^kubernetes_build_info$
1136    whitelist.2 = ^kubelet_runtime_operations_duration_seconds_sum$
1137    whitelist.3 = ^kubelet_docker_operations_duration_seconds_sum$
1138    whitelist.4 = ^kubelet_network_plugin_operations_duration_seconds_sum$
1139    whitelist.5 = ^kubelet_cgroup_manager_duration_seconds_sum$
1140    whitelist.6 = ^storage_operation_duration_seconds_sum$
1141    whitelist.7 = ^kubelet_docker_operations_errors_total$
1142    whitelist.8 = ^kubelet_runtime_operations_errors_total$
1143    whitelist.9 = ^rest_client_requests_total$
1144    whitelist.10 = ^process_cpu_seconds_total$
1145    whitelist.11 = ^process_resident_memory_bytes$
1146    whitelist.12 = ^process_virtual_memory_bytes$
1147    whitelist.13 = ^rest_client_request_duration_seconds_sum$
1148    whitelist.14 = ^kubelet_volume_stats_.+$
1149    whitelist.15 = ^rest_client_requests_total$
1150    
1151
1152    ; # Collectord reports if entropy is low
1153    ; [diagnostics::node-entropy]
1154    ; settings.path = /rootfs/proc/sys/kernel/random/entropy_avail
1155    ; settings.interval = 1h
1156    ; settings.threshold = 800
1157
1158    # Collectord can report if node reboot is required
1159    [diagnostics::node-reboot-required]
1160    settings.path = /rootfs/var/run/reboot-required*
1161    settings.interval = 1h
1162
1163    # See https://www.kernel.org/doc/Documentation/admin-guide/hw-vuln/index.rst
1164    # And https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-system-cpu
1165    [diagnostics::cpu-vulnerabilities]
1166    settings.path = /rootfs/sys/devices/system/cpu/vulnerabilities/*
1167    settings.interval = 1h
1168
1169
1170  003-daemonset-master.conf: |
1171    [input.prometheus::kubernetes-api]
1172
1173    # disable prometheus kubernetes-api metrics
1174    disabled = false
1175
1176    # override type
1177    type = kubernetes_prometheus
1178
1179    # specify Splunk index
1180    index =
1181
1182    # override host (environment variables are supported, by default Kubernetes node name is used)
1183    host = ${KUBERNETES_NODENAME}
1184
1185    # override source
1186    source = kubernetes-api
1187
1188    # how often to collect prometheus metrics
1189    interval = 60s
1190
1191    # request timeout
1192    timeout = 60s
1193
1194    # prometheus endpoint
1195    # at first trying to get it from localhost (avoiding load balancer, if multiple api servers)
1196    endpoint.1localhost = https://127.0.0.1:6443/metrics
1197    # as fallback using proxy
1198    endpoint.2kubeapi = https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT}/metrics
1199
1200    # token for "Authorization: Bearer $(cat tokenPath)"
1201    tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
1202
1203    # server certificate for certificate validation
1204    certPath = /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
1205
1206    # client certificate for authentication
1207    clientCertPath =
1208
1209    # Allow invalid SSL server certificate
1210    insecure = true
1211
1212    # include metrics help with the events
1213    includeHelp = false
1214
1215    # set output (splunk or devnull, default is [general]defaultOutput)
1216    output =
1217
1218    whitelist.1 = ^kubernetes_build_info$
1219    whitelist.2 = ^authenticated_user_requests$
1220    whitelist.3 = ^apiserver_request_total$
1221    whitelist.4 = ^process_cpu_seconds_total$
1222    whitelist.5 = ^process_resident_memory_bytes$
1223    whitelist.6 = ^process_virtual_memory_bytes$
1224    whitelist.7 = ^rest_client_request_duration_seconds_sum$
1225    whitelist.8 = ^rest_client_requests_total$
1226
1227
1228    # This configuration works if scheduled is bind to the localhost:10251
1229    [input.prometheus::scheduler]
1230
1231    # disable prometheus scheduler metrics
1232    disabled = false
1233
1234    # override type
1235    type = kubernetes_prometheus
1236
1237    # specify Splunk index
1238    index =
1239
1240    # override host
1241    host = ${KUBERNETES_NODENAME}
1242
1243    # override source
1244    source = scheduler
1245
1246    # how often to collect prometheus metrics
1247    interval = 60s
1248
1249    # request timeout
1250    timeout = 60s
1251
1252    # prometheus endpoint
1253    endpoint.https = https://:10259/metrics
1254    endpoint.http = http://127.0.0.1:10251/metrics
1255
1256    # token for "Authorization: Bearer $(cat tokenPath)"
1257    tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
1258
1259    # server certificate for certificate validation
1260    certPath =
1261
1262    # client certificate for authentication
1263    clientCertPath =
1264
1265    # Allow invalid SSL server certificate
1266    insecure = true
1267
1268    # include metrics help with the events
1269    includeHelp = false
1270
1271    # set output (splunk or devnull, default is [general]defaultOutput)
1272    output =
1273
1274    whitelist.1 = ^kubernetes_build_info$
1275    whitelist.2 = ^scheduler_e2e_scheduling_duration_seconds_sum$
1276    whitelist.3 = ^scheduler_binding_duration_seconds_sum$
1277    whitelist.4 = ^scheduler_scheduling_algorithm_duration_seconds_sum$
1278    whitelist.5 = ^rest_client_request_duration_seconds_sum$
1279    whitelist.6 = ^rest_client_requests_total$
1280    whitelist.7 = ^process_cpu_seconds_total$
1281    whitelist.8 = ^process_resident_memory_bytes$
1282    whitelist.9 = ^process_virtual_memory_bytes$
1283
1284
1285    # This configuration works if controller-manager is bind to the localhost:10252
1286    [input.prometheus::controller-manager]
1287
1288    # disable prometheus controller-manager metrics
1289    disabled = false
1290
1291    # override type
1292    type = kubernetes_prometheus
1293
1294    # specify Splunk index
1295    index =
1296
1297    # override host
1298    host = ${KUBERNETES_NODENAME}
1299
1300    # override source
1301    source = controller-manager
1302
1303    # how often to collect prometheus metrics
1304    interval = 60s
1305
1306    # request timeout
1307    timeout = 60s
1308
1309    # prometheus endpoint
1310    endpoint.https = https://:10257/metrics
1311    endpoint.http = http://127.0.0.1:10252/metrics
1312
1313    # token for "Authorization: Bearer $(cat tokenPath)"
1314    tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
1315
1316    # server certificate for certificate validation
1317    certPath =
1318
1319    # client certificate for authentication
1320    clientCertPath =
1321
1322    # Allow invalid SSL server certificate
1323    insecure = true
1324
1325    # include metrics help with the events
1326    includeHelp = false
1327
1328    # set output (splunk or devnull, default is [general]defaultOutput)
1329    output =
1330
1331    whitelist.1 = ^kubernetes_build_info$
1332    whitelist.2 = ^node_collector_zone_size$
1333    whitelist.3 = ^node_collector_zone_health$
1334    whitelist.4 = ^node_collector_unhealthy_nodes_in_zone$
1335    whitelist.5 = ^rest_client_request_duration_seconds_sum$
1336    whitelist.6 = ^rest_client_requests_total$
1337    whitelist.7 = ^process_cpu_seconds_total$
1338    whitelist.8 = ^process_resident_memory_bytes$
1339    whitelist.9 = ^process_virtual_memory_bytes$
1340    
1341
1342    [input.prometheus::etcd]
1343
1344    # disable prometheus etcd metrics
1345    disabled = false
1346
1347    # override type
1348    type = kubernetes_prometheus
1349
1350    # specify Splunk index
1351    index =
1352
1353    # override host
1354    host = ${KUBERNETES_NODENAME}
1355
1356    # override source
1357    source = etcd
1358
1359    # how often to collect prometheus metricd
1360    interval = 60s
1361
1362    # request timeout
1363    timeout = 60s
1364
1365    # prometheus endpoint
1366    endpoint.http = http://:2379/metrics
1367    endpoint.https = https://:2379/metrics
1368
1369    # token for "Authorization: Bearer $(cat tokenPath)"
1370    tokenPath =
1371
1372    # server certificate for certificate validation
1373    certPath = /rootfs/etc/kubernetes/pki/etcd/ca.crt
1374
1375    # client certificate for authentication
1376    clientCertPath = /rootfs/etc/kubernetes/pki/apiserver-etcd-client.crt
1377    clientKeyPath = /rootfs/etc/kubernetes/pki/apiserver-etcd-client.key
1378
1379    # Allow invalid SSL server certificate
1380    insecure = true
1381
1382    # include metrics help with the events
1383    includeHelp = false
1384
1385    # set output (splunk or devnull, default is [general]defaultOutput)
1386    output =
1387
1388    whitelist.1 = ^etcd_server_leader_changes_seen_total$
1389    whitelist.2 = ^etcd_server_has_leader$
1390    whitelist.3 = ^etcd_server_proposals_committed_total$
1391    whitelist.4 = ^etcd_server_proposals_applied_total$
1392    whitelist.5 = ^etcd_server_proposals_committed_total$
1393    whitelist.6 = ^etcd_server_proposals_pending$
1394    whitelist.7 = ^etcd_server_proposals_failed_total$
1395    whitelist.8 = ^etcd_disk_wal_fsync_duration_seconds_sum$
1396    whitelist.9 = ^etcd_disk_wal_fsync_duration_seconds_count$
1397    whitelist.10 = ^etcd_disk_backend_commit_duration_seconds_sum$
1398    whitelist.11 = ^etcd_disk_backend_commit_duration_seconds_count$
1399    whitelist.12 = ^etcd_network_client_grpc_.*$
1400    whitelist.13 = ^grpc_server_handled_total$
1401    whitelist.14 = ^etcd_network_peer_round_trip_time_seconds_bucket$
1402    whitelist.15 = ^process_cpu_seconds_total$
1403    whitelist.16 = ^process_resident_memory_bytes$
1404    whitelist.17 = ^process_virtual_memory_bytes$
1405    whitelist.18 = ^process_open_fds$
1406    whitelist.19 = ^process_max_fds$
1407    whitelist.20 = ^etcd_disk_backend_commit_duration_seconds_bucket$
1408    whitelist.21 = ^etcd_disk_wal_fsync_duration_seconds_bucket$
1409
1410  004-addon.conf: |
1411    [general]
1412
1413    # addons can be run in parallel with agents
1414    addon = true
1415
1416    [input.kubernetes_events]
1417
1418    # disable events
1419    disabled = false
1420
1421    # override type
1422    type = kubernetes_events
1423
1424    # specify Splunk index
1425    index =
1426
1427    # set output (splunk or devnull, default is [general]defaultOutput)
1428    output =
1429
1430    # exclude managed fields from the metadata
1431    excludeManagedFields = true
1432
1433
1434    [input.kubernetes_watch::pods]
1435
1436    # disable events
1437    disabled = false
1438
1439    # Set the timeout for how often watch request should refresh the whole list
1440    refresh = 10m
1441
1442    apiVersion = v1
1443    kind = Pod
1444    namespace =
1445
1446    # override type
1447    type = kubernetes_objects
1448
1449    # specify Splunk index
1450    index =
1451
1452    # set output (splunk or devnull, default is [general]defaultOutput)
1453    output =
1454
1455    # exclude managed fields from the metadata
1456    excludeManagedFields = true
1457
1458    # you can remove or hash some values in the events (after modifyValues you can define path in the JSON object,
1459    # and the value can be hash:{hashFunction}, or remove to remove the object )
1460    ; modifyValues.object.data.* = hash:sha256
1461    ; modifyValues.object.metadata.annotations.* = remove
1462
1463    # You can exclude events by namespace with blacklist or whitelist only required namespaces
1464    # blacklist.kubernetes_namespace = ^namespace0$
1465    # whitelist.kubernetes_namespace = ^((namespace1)|(namespace2))$
1466
1467    [input.kubernetes_watch::resourcequota]
1468    # disable events
1469    disabled = false
1470
1471    # Set the timeout for how often watch request should refresh the whole list
1472    refresh = 10m
1473
1474    apiVersion = v1
1475    kind = ResourceQuota
1476    namespace =
1477
1478    # override type
1479    type = kubernetes_objects
1480
1481    # specify Splunk index
1482    index =
1483
1484    # set output (splunk or devnull, default is [general]defaultOutput)
1485    output =
1486
1487    # exclude managed fields from the metadata
1488    excludeManagedFields = true
1489
1490    [input.kubernetes_watch::nodes]
1491    # disable events
1492    disabled = false
1493
1494    # Set the timeout for how often watch request should refresh the whole list
1495    refresh = 10m
1496
1497    apiVersion = v1
1498    kind = Node
1499    namespace =
1500
1501    # override type
1502    type = kubernetes_objects
1503
1504    # specify Splunk index
1505    index =
1506
1507    # set output (splunk or devnull, default is [general]defaultOutput)
1508    output =
1509
1510    # exclude managed fields from the metadata
1511    excludeManagedFields = true
1512
1513---
1514apiVersion: apps/v1
1515kind: DaemonSet
1516metadata:
1517  name: collectorforkubernetes
1518  namespace: collectorforkubernetes
1519  labels:
1520    app: collectorforkubernetes
1521spec:
1522  # Default updateStrategy is OnDelete. For collectord RollingUpdate is suitable
1523  # When you update configuration
1524  updateStrategy:
1525    type: RollingUpdate
1526  selector:
1527    matchLabels:
1528      daemon: collectorforkubernetes
1529  template:
1530    metadata:
1531      name: collectorforkubernetes
1532      labels:
1533        daemon: collectorforkubernetes
1534    spec:
1535      priorityClassName: collectorforkubernetes-critical
1536      dnsPolicy: ClusterFirstWithHostNet
1537      hostNetwork: true
1538      serviceAccountName: collectorforkubernetes
1539      # We run this DaemonSet only for Non-Masters
1540      affinity:
1541        nodeAffinity:
1542          requiredDuringSchedulingIgnoredDuringExecution:
1543            nodeSelectorTerms:
1544            - matchExpressions:
1545              - key: node-role.kubernetes.io/control-plane
1546                operator: DoesNotExist
1547      tolerations:
1548      - operator: "Exists"
1549        effect: "NoSchedule"
1550      - operator: "Exists"
1551        effect: "NoExecute"
1552      containers:
1553      - name: collectorforkubernetes
1554        # Collectord version
1555        image: docker.io/outcoldsolutions/collectorforkubernetes:25.10.3
1556        imagePullPolicy: Always
1557        securityContext:
1558          runAsUser: 0
1559          privileged: true
1560        # Define your resources if you need. Defaults should be fine for most.
1561        # You can lower or increase based on your hosts.
1562        resources:
1563          limits:
1564            cpu: 2000m
1565            memory: 1024Mi
1566          requests:
1567            cpu: 500m
1568            memory: 256Mi
1569        env:
1570        - name: KUBERNETES_NODENAME
1571          valueFrom:
1572            fieldRef:
1573              fieldPath: spec.nodeName
1574        - name: POD_NAME
1575          valueFrom:
1576            fieldRef:
1577              fieldPath: metadata.name
1578        volumeMounts:
1579        # We store state in /data folder (file positions)
1580        - name: collectorforkubernetes-state
1581          mountPath: /data
1582        # Configuration file deployed with ConfigMap
1583        - name: collectorforkubernetes-config
1584          mountPath: /config/
1585          readOnly: true
1586        # Root filesystem to have access to logs and metrics
1587        - name: rootfs
1588          mountPath: /rootfs/
1589          readOnly: false
1590          mountPropagation: HostToContainer
1591        # correct timezone
1592        - name: localtime
1593          mountPath: /etc/localtime
1594          readOnly: true
1595      volumes:
1596      # We store state directly on host, change this location, if
1597      # your persistent volume is somewhere else
1598      - name: collectorforkubernetes-state
1599        hostPath:
1600          path: /var/lib/collectorforkubernetes/data/
1601          type: DirectoryOrCreate
1602      # Location of docker root (for container logs and metadata)
1603      - name: rootfs
1604        hostPath:
1605          path: /
1606      # correct timezone
1607      - name: localtime
1608        hostPath:
1609          path: /etc/localtime
1610      # configuration from ConfigMap
1611      - name: collectorforkubernetes-config
1612        configMap:
1613          name: collectorforkubernetes
1614          items:
1615          - key: 001-general.conf
1616            path: 001-general.conf
1617          - key: 002-daemonset.conf
1618            path: 002-daemonset.conf
1619---
1620apiVersion: apps/v1
1621kind: DaemonSet
1622metadata:
1623  name: collectorforkubernetes-master
1624  namespace: collectorforkubernetes
1625  labels:
1626    app: collectorforkubernetes
1627spec:
1628  updateStrategy:
1629    type: RollingUpdate
1630  selector:
1631    matchLabels:
1632      daemon: collectorforkubernetes
1633  template:
1634    metadata:
1635      name: collectorforkubernetes-master
1636      labels:
1637        daemon: collectorforkubernetes
1638    spec:
1639      priorityClassName: collectorforkubernetes-critical
1640      dnsPolicy: ClusterFirstWithHostNet
1641      hostNetwork: true
1642      serviceAccountName: collectorforkubernetes
1643      affinity:
1644        nodeAffinity:
1645          requiredDuringSchedulingIgnoredDuringExecution:
1646            nodeSelectorTerms:
1647            - matchExpressions:
1648              - key: node-role.kubernetes.io/control-plane
1649                operator: Exists
1650      tolerations:
1651      - operator: "Exists"
1652        effect: "NoSchedule"
1653      - operator: "Exists"
1654        effect: "NoExecute"
1655      containers:
1656      - name: collectorforkubernetes
1657        image: docker.io/outcoldsolutions/collectorforkubernetes:25.10.3
1658        imagePullPolicy: Always
1659        securityContext:
1660          runAsUser: 0
1661          privileged: true
1662        resources:
1663          limits:
1664            cpu: 2000m
1665            memory: 1024Mi
1666          requests:
1667            cpu: 500m
1668            memory: 256Mi
1669        env:
1670        - name: KUBERNETES_NODENAME
1671          valueFrom:
1672            fieldRef:
1673              fieldPath: spec.nodeName
1674        - name: POD_NAME
1675          valueFrom:
1676            fieldRef:
1677              fieldPath: metadata.name
1678        volumeMounts:
1679        - name: collectorforkubernetes-state
1680          mountPath: /data
1681        - name: collectorforkubernetes-config
1682          mountPath: /config/
1683          readOnly: true
1684        - name: rootfs
1685          mountPath: /rootfs/
1686          readOnly: false
1687          mountPropagation: HostToContainer
1688        - name: localtime
1689          mountPath: /etc/localtime
1690          readOnly: true
1691      volumes:
1692      - name: collectorforkubernetes-state
1693        hostPath:
1694          path: /var/lib/collectorforkubernetes/data/
1695          type: DirectoryOrCreate
1696      - name: rootfs
1697        hostPath:
1698          path: /
1699      - name: localtime
1700        hostPath:
1701          path: /etc/localtime
1702      - name: collectorforkubernetes-config
1703        configMap:
1704          name: collectorforkubernetes
1705          items:
1706          - key: 001-general.conf
1707            path: 001-general.conf
1708          - key: 002-daemonset.conf
1709            path: 002-daemonset.conf
1710          - key: 003-daemonset-master.conf
1711            path: 003-daemonset-master.conf
1712---
1713apiVersion: apps/v1
1714kind: Deployment
1715metadata:
1716  name: collectorforkubernetes-addon
1717  namespace: collectorforkubernetes
1718  labels:
1719    app: collectorforkubernetes
1720spec:
1721  replicas: 1
1722  selector:
1723    matchLabels:
1724      daemon: collectorforkubernetes
1725  template:
1726    metadata:
1727      name: collectorforkubernetes-addon
1728      labels:
1729        daemon: collectorforkubernetes
1730    spec:
1731      priorityClassName: collectorforkubernetes-critical
1732      serviceAccountName: collectorforkubernetes
1733      containers:
1734      - name: collectorforkubernetes
1735        image: docker.io/outcoldsolutions/collectorforkubernetes:25.10.3
1736        imagePullPolicy: Always
1737        securityContext:
1738          runAsUser: 0
1739          privileged: true
1740        resources:
1741          limits:
1742            cpu: 1000m
1743            memory: 512Mi
1744          requests:
1745            cpu: 200m
1746            memory: 64Mi
1747        env:
1748        - name: KUBERNETES_NODENAME
1749          valueFrom:
1750            fieldRef:
1751              fieldPath: spec.nodeName
1752        - name: POD_NAME
1753          valueFrom:
1754            fieldRef:
1755              fieldPath: metadata.name
1756        volumeMounts:
1757        - name: collectorforkubernetes-state
1758          mountPath: /data
1759        - name: collectorforkubernetes-config
1760          mountPath: /config/
1761          readOnly: true
1762      volumes:
1763      - name: collectorforkubernetes-state
1764        hostPath:
1765          path: /var/lib/collectorforkubernetes/data/
1766          type: Directory
1767      - name: collectorforkubernetes-config
1768        configMap:
1769          name: collectorforkubernetes
1770          items:
1771          - key: 001-general.conf
1772            path: 001-general.conf
1773          - key: 004-addon.conf
1774            path: 004-addon.conf

About Outcold Solutions

Outcold Solutions provides solutions for monitoring Kubernetes, OpenShift and Docker clusters in Splunk Enterprise and Splunk Cloud. We offer certified Splunk applications, which give you insights across all container environments. We are helping businesses reduce complexity related to logging and monitoring by providing easy-to-use and easy-to-deploy solutions for Linux and Windows containers. We deliver applications, which help developers monitor their applications and help operators keep their clusters healthy. With the power of Splunk Enterprise and Splunk Cloud, we offer one solution to help you keep all the metrics and logs in one place, allowing you to quickly address complex questions on container performance.

Red Hat
Splunk
AWS