Download

Link

CURL

1curl -O https://www.outcoldsolutions.com/docs/monitoring-kubernetes/collectorforkubernetes.yaml

WGET

1wget https://www.outcoldsolutions.com/docs/monitoring-kubernetes/collectorforkubernetes.yaml

collectorforkubernetes.yaml

   1apiVersion: v1
   2kind: Namespace
   3metadata:
   4  labels:
   5    app: collectorforkubernetes
   6  name: collectorforkubernetes
   7---
   8apiVersion: apiextensions.k8s.io/v1
   9kind: CustomResourceDefinition
  10metadata:
  11  name: configurations.collectord.io
  12spec:
  13  group: collectord.io
  14  versions:
  15    - name: v1
  16      served: true
  17      storage: true
  18      schema:
  19        openAPIV3Schema:
  20          type: object
  21          properties:
  22            spec:
  23              type: object
  24              additionalProperties: true
  25            force:
  26              type: boolean
  27  scope: Cluster
  28  names:
  29    listKind: ConfigurationList
  30    plural: configurations
  31    singular: configuration
  32    kind: Configuration
  33---
  34apiVersion: apiextensions.k8s.io/v1
  35kind: CustomResourceDefinition
  36metadata:
  37  name: splunkoutputs.collectord.io
  38spec:
  39  group: collectord.io
  40  versions:
  41    - name: v1
  42      served: true
  43      storage: true
  44      schema:
  45        openAPIV3Schema:
  46          type: object
  47          properties:
  48            spec:
  49              type: object
  50              properties:
  51                url:
  52                  type: string
  53                  format: uri
  54                insecure:
  55                  type: boolean
  56                token:
  57                  type: string
  58                  description: "Plain token"
  59                tokenFromSecret:
  60                  type: object
  61                  description: "Reference to a Kubernetes Secret"
  62                  properties:
  63                    secret:
  64                      type: string
  65                    key:
  66                      type: string
  67              oneOf:
  68                - required: ["token"]
  69                - required: ["tokenFromSecret"]
  70  scope: Namespaced
  71  names:
  72    listKind: SplunkOutputList
  73    plural: splunkoutputs
  74    singular: splunkoutput
  75    kind: SplunkOutput
  76---
  77apiVersion: v1
  78kind: ServiceAccount
  79metadata:
  80  labels:
  81    app: collectorforkubernetes
  82  name: collectorforkubernetes
  83  namespace: collectorforkubernetes
  84---
  85apiVersion: scheduling.k8s.io/v1
  86kind: PriorityClass
  87metadata:
  88  name: collectorforkubernetes-critical
  89value: 1000000000
  90---
  91apiVersion: rbac.authorization.k8s.io/v1
  92kind: ClusterRole
  93metadata:
  94  labels:
  95    app: collectorforkubernetes
  96  name: collectorforkubernetes
  97rules:
  98- apiGroups: ['extensions']
  99  resources: ['podsecuritypolicies']
 100  verbs:     ['use']
 101  resourceNames:
 102  - privileged
 103- apiGroups:
 104  - ""
 105  - apps
 106  - batch
 107  - extensions
 108  - collectord.io
 109  - rbac.authorization.k8s.io
 110  resources:
 111  - splunkoutputs
 112  - alertmanagers
 113  - cronjobs
 114  - daemonsets
 115  - deployments
 116  - endpoints
 117  - events
 118  - jobs
 119  - namespaces
 120  - nodes
 121  - nodes/metrics
 122  - nodes/proxy
 123  - pods
 124  - replicasets
 125  - replicationcontrollers
 126  - scheduledjobs
 127  - secrets
 128  - services
 129  - statefulsets
 130  - persistentvolumeclaims
 131  - configurations
 132  - resourcequotas
 133  - clusterroles
 134  verbs:
 135  - get
 136  - list
 137  - watch
 138- nonResourceURLs:
 139  - /metrics
 140  verbs:
 141  - get
 142  apiGroups: []
 143  resources: []
 144---
 145apiVersion: rbac.authorization.k8s.io/v1
 146kind: ClusterRoleBinding
 147metadata:
 148  labels:
 149    app: collectorforkubernetes
 150  name: collectorforkubernetes
 151roleRef:
 152  apiGroup: rbac.authorization.k8s.io
 153  kind: ClusterRole
 154  name: collectorforkubernetes
 155subjects:
 156  - kind: ServiceAccount
 157    name: collectorforkubernetes
 158    namespace: collectorforkubernetes
 159---
 160apiVersion: v1
 161kind: ConfigMap
 162metadata:
 163  name: collectorforkubernetes
 164  namespace: collectorforkubernetes
 165  labels:
 166    app: collectorforkubernetes
 167data:
 168  001-general.conf: |
 169    # The general configuration is used for all deployments
 170    #
 171    # Run collectord with the flag -conf and specify location of the configuration files.
 172    #
 173    # You can override all the values using environment variables with the format like
 174    #   COLLECTOR__<ANYNAME>=<section>__<key>=<value>
 175    # As an example you can set dataPath in [general] section as
 176    #   COLLECTOR__DATAPATH=general__dataPath=C:\\some\\path\\data.db
 177    # This parameter can be configured using -env-override, set it to empty string to disable this feature
 178
 179    [general]
 180
 181    # Please review license https://www.outcoldsolutions.com/docs/license-agreement/
 182    # and accept license by changing the value to *true*
 183    acceptLicense = false
 184
 185    # Location for the database
 186    # Collectord stores positions of the files and internal state
 187    dataPath = ./data/
 188
 189    # log level (accepted values are trace, debug, info, warn, error, fatal)
 190    logLevel = info
 191
 192    # http server gives access to two endpoints
 193    # /healthz
 194    # /metrics/json
 195    # /metrics/prometheus
 196    # httpServerBinding = 0.0.0.0:11888
 197    httpServerBinding =
 198
 199    # log requests to the http server
 200    httpServerLog = false
 201
 202    # telemetry report endpoint, set it to empty string to disable telemetry
 203    telemetryEndpoint = https://license.outcold.solutions/telemetry/
 204
 205    # license check endpoint
 206    licenseEndpoint = https://license.outcold.solutions/license/
 207
 208    # license server through proxy
 209    # This configuration is used only for the Outcold Solutions License Server
 210    # For license server running on-premises, use configuration under [license.client]
 211    licenseServerProxyUrl =
 212
 213    # authentication with basic authorization (user:password)
 214    # This configuration is used only for the Outcold Solutions License Server
 215    # For license server running on-premises, use configuration under [license.client]
 216    licenseServerProxyBasicAuth =
 217
 218    # license key
 219    license =
 220
 221    # Environment variable $KUBERNETES_NODENAME is used by default to setup hostname
 222    # Use value below to override specific name
 223    hostname =
 224
 225    # Default output for events, logs and metrics
 226    # valid values: splunk and devnull
 227    # Use devnull by default if you don't want to redirect data
 228    defaultOutput = splunk
 229
 230    # Default buffer size for file input
 231    fileInputBufferSize = 256b
 232
 233    # Maximum size of one line the file reader can read
 234    fileInputLineMaxSize = 1mb
 235
 236    # Include custom fields to attach to every event, in example below every event sent to Splunk will hav
 237    # indexed field my_environment=dev. Fields names should match to ^[a-z][_a-z0-9]*$
 238    # Better way to configure that is to specify labels for Kubernetes Nodes.
 239    # ; fields.my_environment = dev
 240    # Identify the cluster if you are planning to monitor multiple clusters
 241    fields.kubernetes_cluster = -
 242
 243    # Include EC2 Metadata (see list of possible fields https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html)
 244    # Should be in format ec2Metadata.{desired_field_name} = {url path to read the value}
 245    # ec2Metadata.ec2_instance_id = /latest/meta-data/instance-id
 246    # ec2Metadata.ec2_instance_type = /latest/meta-data/instance-type
 247
 248    # subdomain for the annotations added to the pods, workloads, namespaces or containers, like splunk.collectord.io/..
 249    annotationsSubdomain =
 250
 251    # configure global thruput per second for forwarded logs (metrics are not included)
 252    # for example if you set `thruputPerSecond = 512Kb`, that will limit amount of logs forwarded
 253    # from the single Collectord instance to 512Kb per second.
 254    # You can configure thruput individually for the logs (including specific for container logs) below
 255    thruputPerSecond =
 256
 257    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
 258    # older than 7 days
 259    tooOldEvents =
 260
 261    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
 262    tooNewEvents =
 263
 264    # For input.files::X and application logs, when glob or match are configured, Collectord can automatically
 265    # detect gzipped files and skip them (based on the extensions or magic numbers)
 266    autoSkipGzipFiles = true
 267
 268    [license.client]
 269    # point to the license located on the HTTP web server, or a hosted by the Collectord running as license server
 270    url =
 271    # basic authentication for the HTTP server
 272    basicAuth =
 273    # if SSL, ignore the certificate verification
 274    insecure = false
 275    # CA Path for the Server certificate
 276    capath =
 277    # CA Name fot the Server certificate
 278    caname =
 279    # license server through proxy
 280    proxyUrl =
 281    # authentication with basic authorization (user:password)
 282    proxyBasicAuth =
 283
 284
 285    # forward internal collectord metrics
 286    [input.collectord_metrics]
 287
 288    # disable collectord internal metrics
 289    disabled = false
 290
 291    # override type
 292    type = kubernetes_prometheus
 293
 294    # how often to collect internal metrics
 295    interval = 1m
 296
 297    # set output (splunk or devnull, default is [general]defaultOutput)
 298    output =
 299
 300    # specify Splunk index
 301    index =
 302
 303    # whitelist or blacklist the metrics
 304    whitelist.1 = ^file_input_open$
 305    whitelist.2 = ^file_input_read_bytes$
 306    whitelist.3 = ^kubernetes_handlers$
 307    whitelist.4 = ^pipe$
 308    whitelist.5 = ^pipelines_num$
 309    whitelist.6 = ^splunk_post_bytes_sum.*$
 310    whitelist.7 = ^splunk_post_events_count_sum.*$
 311    whitelist.8 = ^splunk_post_failed_requests$
 312    whitelist.9 = ^splunk_post_message_max_lag_seconds_bucket.*$
 313    whitelist.10 = ^splunk_post_requests_seconds_sum.*$
 314    whitelist.11 = ^splunk_post_retries_required_sum.*$
 315
 316
 317    # connection to kubernetes api
 318    [general.kubernetes]
 319
 320    # Override service URL for Kubernetes (default is ${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT})
 321    serviceURL =
 322
 323    # Environment variable $KUBERNETES_NODENAME is used by default to setup nodeName
 324    # Use it only when you need to override it
 325    nodeName =
 326
 327    # Configuration to access the API server,
 328    # see https://kubernetes.io/docs/tasks/access-application-cluster/access-cluster/#accessing-the-api-from-a-pod
 329    # for details
 330    tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
 331    certPath = /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
 332
 333    # Default timeout for http responses. The streaming/watch requests depend on this timeout.
 334    timeout = 30m
 335
 336    # How long to keep the cache for the recent calls to API server (to limit number of calls when collectord discovers new pods)
 337    metadataTTL = 30s
 338
 339    # regex to find pods
 340    podsCgroupFilter = ^/([^/\s]+/)*kubepods(\.slice)?/((kubepods-)?(burstable|besteffort)(\.slice)?/)?([^/]*)pod([0-9a-f]{32}|[0-9a-f\-_]{36})(\.slice)?$
 341
 342    # regex to find containers in the pods
 343    containersCgroupFilter = ^/([^/\s]+/)*kubepods(\.slice)?/((kubepods-)?(burstable|besteffort)(\.slice)?/)?([^/]*)pod([0-9a-f]{32}|[0-9a-f\-_]{36})(\.slice)?/(docker-|crio-|cri-\w+-)?[0-9a-f]{64}(\.scope)?(\/.+)?$
 344
 345    # path to the kubelet root location (use it to discover application logs for emptyDir)
 346    # the expected format is `pods/{pod-id}/volumes/kubernetes.io~empty-dir/{volume-name}/_data/`
 347    volumesRootDir = /rootfs/var/lib/kubelet/
 348
 349    # You can attach annotations as a metadata, using the format
 350    #   includeAnnotations.{key} = {regexp}
 351    # For example if you want to include all annotations that starts with `prometheus.io` or `example.com` you can include
 352    # the following format:
 353    #   includeAnnotations.1 = ^prometheus\.io.*
 354    #   includeAnnotations.2 = ^example\.com.*
 355
 356    # You can exclude labels from metadata, using the format
 357    #   excludeLabels.{key} = {regexp}
 358    # For example if you want to exclude all labels that starts with `prometheus.io` or `example.com` you can include
 359    # the following format:
 360    #   excludeLabels.1 = ^prometheus\.io.*
 361    #   excludeLabels.2 = ^example\.com.*
 362
 363    # watch for changes (annotations) in the objects
 364    watch.namespaces = v1/namespace
 365    watch.deployments = apps/v1/deployment
 366    watch.configurations = collectord.io/v1/configuration
 367
 368    # Collectord can review the assigned ClusterRole and traverse metadata for the Pods only for the Owner objects
 369    # that are defined in the ClusterRole, ignoring anything else, it does not have access to.
 370    # This way Collectord does not generate 403 requests on API Server
 371    clusterRole = collectorforkubernetes
 372
 373    # Alternative of telling Collectord about the ClusterRole is to manually list the objects.
 374    # You can define which objects Collectord should traverse when it sees Owners.
 375    ; traverseOwnership.namespaces = v1/namespace
 376
 377    # Implementation of the watch protocol.
 378    # 0 - use the default implementation (2)
 379    # 1 - use the watch implementation that is optimized for the small number of objects (just issue one watch for all objects)
 380    # 2 - use the watch implementation that is optimized for the large number of objects (paginate through the list of objects and issue watch for the last resource version)
 381    watchImplementation = 2
 382
 383    # watch for pods annotations, setup prometheus collection
 384    # for these pods
 385    # Addon listens on Pod Network
 386    # DaemonSets listen on Host Network
 387    [input.prometheus_auto]
 388
 389    # disable prometheus auto discovery for pods
 390    disabled = false
 391
 392    # override type
 393    type = kubernetes_prometheus
 394
 395    # specify Splunk index
 396    index =
 397
 398    # how often to collect prometheus metrics
 399    interval = 60s
 400
 401    # include metrics help with the events
 402    includeHelp = true
 403
 404    # http client timeout
 405    timeout = 30s
 406
 407    # set output (splunk or devnull, default is [general]defaultOutput)
 408    output =
 409
 410    # Include an Authorization header for the prometheus scrapper
 411    # When configuring scrapping with collectord using annotations use prometheus.1-AuthorizationKey=key1
 412    # authorization.key1 = Bearer FOO
 413
 414
 415    # Splunk output
 416    [output.splunk]
 417
 418    # Splunk HTTP Event Collector url
 419    url =
 420    # You can specify muiltiple splunk URls with
 421    #
 422    # urls.0 = https://server1:8088/services/collector/event/1.0
 423    # urls.1 = https://server1:8088/services/collector/event/1.0
 424    # urls.2 = https://server1:8088/services/collector/event/1.0
 425    #
 426    # Limitations:
 427    # * The urls cannot have different path.
 428
 429    # Specify how URL should be picked up (in case if multiple is used)
 430    # urlSelection = random|round-robin|random-with-round-robin
 431    # where:
 432    # * random - choose random url on first selection and after each failure (connection or HTTP status code >= 500)
 433    # * round-robin - choose url starting from first one and bump on each failure (connection or HTTP status code >= 500)
 434    # * random-with-round-robin - choose random url on first selection and after that in round-robin on each
 435    #                             failure (connection or HTTP status code >= 500)
 436    urlSelection = random-with-round-robin
 437
 438    # Splunk HTTP Event Collector Token
 439    token =
 440
 441    # Allow invalid SSL server certificate
 442    insecure = false
 443    # minTLSVersion = TLSv1.2
 444    # maxTLSVersion = TLSv1.3
 445
 446    # Path to CA cerificate
 447    caPath =
 448
 449    # CA Name to verify
 450    caName =
 451
 452    # path for client certificate (if required)
 453    clientCertPath =
 454
 455    # path for client key (if required)
 456    clientKeyPath =
 457
 458    # Events are batched with the maximum size set by batchSize and staying in pipeline for not longer
 459    # than set by frequency
 460    frequency = 5s
 461    batchSize = 768K
 462    # limit by the number of events (0 value has no limit on the number of events)
 463    events = 50
 464
 465    # Splunk through proxy
 466    proxyUrl =
 467
 468    # authentication with basic authorization (user:password)
 469    proxyBasicAuth =
 470
 471    # Splunk acknowledgement url (.../services/collector/ack)
 472    ackUrl =
 473    # You can specify muiltiple splunk URls for ackUrl
 474    #
 475    # ackUrls.0 = https://server1:8088/services/collector/ack
 476    # ackUrls.1 = https://server1:8088/services/collector/ack
 477    # ackUrls.2 = https://server1:8088/services/collector/ack
 478    #
 479    # Make sure that they in the same order as urls for url, to make sure that this Splunk instance will be
 480    # able to acknowledge the payload.
 481    #
 482    # Limitations:
 483    # * The urls cannot have different path.
 484
 485    # Enable index acknowledgment
 486    ackEnabled = false
 487
 488    # Index acknowledgment timeout
 489    ackTimeout = 3m
 490
 491    # Timeout specifies a time limit for requests made by collectord.
 492    # The timeout includes connection time, any
 493    # redirects, and reading the response body.
 494    timeout = 30s
 495
 496    # in case when pipeline can post to multiple indexes, we want to avoid posibility of blocking
 497    # all pipelines, because just some events have incorrect index
 498    dedicatedClientPerIndex = true
 499
 500    # possible values: RedirectToDefault, Drop, Retry
 501    incorrectIndexBehavior = RedirectToDefault
 502
 503    # gzip compression level (nocompression, default, 1...9)
 504    compressionLevel = default
 505
 506    # number of dedicated splunk output threads (to increase throughput above 4k events per second)
 507    threads = 2
 508    # Default algorithm between threads is roundrobin, but you can change it to weighted
 509    ; threadsAlgorithm = weighted
 510
 511    # if you want to exclude some preindexed fields from events
 512    # excludeFields.kubernetes_pod_ip = true
 513
 514    # By default if there are no indexes defined on the message, Collectord sends the event without the index, and
 515    # Splunk HTTP Event Collector going to use the default index for the Token. You can change that, and tell Collectord
 516    # to ignore all events that don't have index defined explicitly
 517    ; requireExplicitIndex = true
 518
 519    # You can define if you want to truncate messages that are larger than 1M in length (or define your own size, like 256K)
 520    ; maximumMessageLength = 1M
 521
 522    # For messages generated from logs, include unique `event_id` in the event
 523    ; includeEventID = false
 524
 525    # Dedicated queue size for the output, default is 1024, larger queue sizes will require more memory,
 526    # but will allow to handle more events in case of network issues
 527    queueSize = 1024
 528
 529    # How many digits after the decimal point to keep for timestamps (0-9)
 530    # Defaults to 3 (milliseconds)
 531    # Change to 6 for microseconds
 532    # Change to 9 for nanoseconds
 533    ; timestampPrecision = 3
 534
 535  002-daemonset.conf: |
 536    # DaemonSet configuration is used for Nodes and Masters.
 537
 538    // connection to CRIO
 539    [general.cri-o]
 540
 541    # url for CRIO API, only unix socket is supported
 542    url = unix:///rootfs/var/run/crio/crio.sock
 543
 544    # Timeout for http responses to docker client. The streaming requests depend on this timeout.
 545    timeout = 1m
 546
 547
 548    [general.containerd]
 549    # Runtime can be on /rootfs/run/containerd (depends on the Linux distribution)
 550    runtimePath = /rootfs/var/run/containerd
 551    namespace = k8s.io
 552
 553
 554    # cgroup input
 555    [input.system_stats]
 556
 557    # disable system level stats
 558    disabled.host = false
 559    disabled.cgroup = false
 560
 561    # cgroups fs location
 562    pathCgroups = /rootfs/sys/fs/cgroup
 563
 564    # proc location
 565    pathProc = /rootfs/proc
 566
 567    # how often to collect cgroup stats
 568    statsInterval = 30s
 569
 570    # override type
 571    type.host = kubernetes_stats_v2_host
 572    type.cgroup = kubernetes_stats_v2_cgroup
 573
 574    # specify Splunk index
 575    index.host =
 576    index.cgroup =
 577
 578    # set output (splunk or devnull, default is [general]defaultOutput)
 579    output.host =
 580    output.cgroup =
 581
 582
 583    # proc input
 584    [input.proc_stats]
 585
 586    # disable proc level stats
 587    disabled = false
 588
 589    # proc location
 590    pathProc = /rootfs/proc
 591
 592    # how often to collect proc stats
 593    statsInterval = 30s
 594
 595    # override type
 596    type = kubernetes_proc_stats_v2
 597
 598    # specify Splunk index
 599    index.host =
 600    index.cgroup =
 601
 602    # proc filesystem includes by default system threads (there can be over 100 of them)
 603    # these stats do not help with the observability
 604    # excluding them can reduce the size of the index, performance of the searches and usage of the collector
 605    includeSystemThreads = false
 606
 607    # set output (splunk or devnull, default is [general]defaultOutput)
 608    output.host =
 609    output.cgroup =
 610
 611    # Hide arguments for the processes, replacing with HIDDEN_ARGS(NUMBER)
 612    hideArgs = false
 613
 614
 615    # network stats
 616    [input.net_stats]
 617
 618    # disable net stats
 619    disabled = false
 620
 621    # proc path location
 622    pathProc = /rootfs/proc
 623
 624    # how often to collect net stats
 625    statsInterval = 30s
 626
 627    # override type
 628    type = kubernetes_net_stats_v2
 629
 630    # specify Splunk index
 631    index.host =
 632    index.cgroup =
 633
 634    # set output (splunk or devnull, default is [general]defaultOutput)
 635    output.host =
 636    output.cgroup =
 637
 638
 639    # network socket table
 640    [input.net_socket_table]
 641
 642    # disable net stats
 643    disabled = false
 644
 645    # proc path location
 646    pathProc = /rootfs/proc
 647
 648    # how often to collect net stats
 649    statsInterval = 30s
 650
 651    # override type
 652    type = kubernetes_net_socket_table
 653
 654    # specify Splunk index
 655    index.host =
 656    index.cgroup =
 657
 658    # set output (splunk or devnull, default is [general]defaultOutput)
 659    output.host =
 660    output.cgroup =
 661
 662    # group connections by tcp_state, localAddr, remoteAddr (if localPort is not the port it is listening on)
 663    # that can significally reduces the amount of events
 664    group = true
 665
 666    # Collectord can watch for services, node, and pod IP addresses, and lookup the names
 667    # for the IP addresses. Keeping this enabled can add a significant load on the API Server, with large number of pods.
 668    disableLookup = false
 669
 670
 671    # mount input (collects mount stats where kubelet runtime is stored)
 672    [input.mount_stats]
 673
 674    # disable system level stats
 675    disabled = false
 676
 677    # how often to collect mount stats
 678    statsInterval = 30s
 679
 680    # override type
 681    type = kubernetes_mount_stats
 682
 683    # specify Splunk index
 684    index =
 685
 686    # set output (splunk or devnull, default is [general]defaultOutput)
 687    output =
 688
 689
 690    # diskstats input (collects /proc/diskstats)
 691    [input.disk_stats]
 692
 693    # disable system level stats
 694    disabled = false
 695
 696    # how often to collect mount stats
 697    statsInterval = 30s
 698
 699    # override type
 700    type = kubernetes_disk_stats
 701
 702    # specify Splunk index
 703    index =
 704
 705    # set output (splunk or devnull, default is [general]defaultOutput)
 706    output =
 707
 708
 709    # Container Log files
 710    [input.files]
 711
 712    # disable container logs monitoring
 713    disabled = false
 714
 715    # root location of docker log files
 716    # logs are expected in standard docker format like {containerID}/{containerID}-json.log
 717    # rotated files
 718    path = /rootfs/var/lib/docker/containers/
 719    # root location of CRI-O files
 720    # logs are expected in Kubernetes format, like {podID}/{containerName}/0.log
 721    crioPath = /rootfs/var/log/pods/
 722
 723    # (obsolete) glob matching pattern for log files
 724    # glob = */*-json.log*
 725
 726    # files are read using polling schema, when reach the EOF how often to check if files got updated
 727    pollingInterval = 250ms
 728
 729    # how often to look for the new files under logs path
 730    walkingInterval = 5s
 731
 732    # include verbose fields in events (file offset)
 733    verboseFields = false
 734
 735    # override type
 736    type = kubernetes_logs
 737
 738    # specify Splunk index
 739    index =
 740
 741    # docker splits events when they are larger than 10-100k (depends on the docker version)
 742    # we join them together by default and forward to Splunk as one event
 743    joinPartialEvents = true
 744
 745    # In case if your containers report messages with terminal colors or other escape sequences
 746    # you can enable strip for all the containers in one place.
 747    # Better is to enable it only for required container with the label collectord.io/strip-terminal-escape-sequences=true
 748    stripTerminalEscapeSequences = false
 749    # Regexp used for stripping terminal colors, it does not stip all the escape sequences
 750    # Read http://man7.org/linux/man-pages/man4/console_codes.4.html for more information
 751    stripTerminalEscapeSequencesRegex = (\x1b\[\d{1,3}(;\d{1,3})*m)|(\x07)|(\x1b]\d+(\s\d)?;[^\x07]+\x07)|(.*\x1b\[K)
 752
 753    # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded)
 754    samplingPercent = -1
 755
 756    # sampling key for hash based sampling (should be regexp with the named match pattern `key`)
 757    samplingKey =
 758
 759    # set output (splunk or devnull, default is [general]defaultOutput)
 760    output =
 761
 762    # configure default thruput per second for for each container log
 763    # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
 764    # from the single container to 128Kb per second.
 765    thruputPerSecond =
 766
 767    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
 768    # older than 7 days
 769    tooOldEvents =
 770
 771    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
 772    tooNewEvents =
 773
 774
 775    # Application Logs
 776    [input.app_logs]
 777
 778    # disable container application logs monitoring
 779    disabled = false
 780
 781    # root location of mounts (applies to hostPath mounts only), if the hostPath differs inside container from the path on host
 782    root = /rootfs/
 783
 784    # how often to review list of available volumes
 785    syncInterval = 5s
 786
 787    # glob matching pattern for log files
 788    glob = *.log*
 789
 790    # files are read using polling schema, when reach the EOF how often to check if files got updated
 791    pollingInterval = 250ms
 792
 793    # how often to look for the new files under logs path
 794    walkingInterval = 5s
 795
 796    # include verbose fields in events (file offset)
 797    verboseFields = false
 798
 799    # override type
 800    type = kubernetes_logs
 801
 802    # specify Splunk index
 803    index =
 804
 805    # we split files using new line character, with this configuration you can specify what defines the new event
 806    # after new line
 807    eventPatternRegex = ^[^\s]
 808    # Maximum interval of messages in pipeline
 809    eventPatternMaxInterval = 100ms
 810    # Maximum time to wait for the messages in pipeline
 811    eventPatternMaxWait = 1s
 812    # Maximum message size
 813    eventPatternMaxSize = 1MB
 814
 815    # set output (splunk or devnull, default is [general]defaultOutput)
 816    output =
 817
 818    # configure default thruput per second for for each container log
 819    # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
 820    # from the single container to 128Kb per second.
 821    thruputPerSecond =
 822
 823    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
 824    # older than 7 days
 825    tooOldEvents =
 826
 827    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
 828    tooNewEvents =
 829
 830    # Configure how long Collectord should keep the file descriptors open for files, that has not been forwarded yet
 831    # When using PVC, and if pipeline is lagging behind, Collectord holding open fd for files, can cause long termination
 832    # of pods, as kubelet cannot unmount the PVC volume from the system
 833    maxHoldAfterClose = 1800s
 834
 835
 836    # Host logs. Input syslog(.\d+)? files
 837    [input.files::syslog]
 838
 839    # disable host level logs
 840    disabled = false
 841
 842    # root location of docker files
 843    path = /rootfs/var/log/
 844
 845    # regex matching pattern
 846    match = ^(syslog|messages)(.\d+)?$
 847
 848    # limit search only on one level
 849    recursive = false
 850
 851    # files are read using polling schema, when reach the EOF how often to check if files got updated
 852    pollingInterval = 250ms
 853
 854    # how often o look for the new files under logs path
 855    walkingInterval = 5s
 856
 857    # include verbose fields in events (file offset)
 858    verboseFields = false
 859
 860    # override type
 861    type = kubernetes_host_logs
 862
 863    # specify Splunk index
 864    index =
 865
 866    # field extraction
 867    extraction = ^(?P<timestamp>[A-Za-z]+\s+\d+\s\d+:\d+:\d+)\s(?P<syslog_hostname>[^\s]+)\s(?P<syslog_component>[^:\[]+)(\[(?P<syslog_pid>\d+)\])?: (.+)$
 868    # extractionMessageField =
 869
 870    # timestamp field
 871    timestampField = timestamp
 872
 873    # format for timestamp
 874    # the layout defines the format by showing how the reference time, defined to be `Mon Jan 2 15:04:05 -0700 MST 2006`
 875    timestampFormat = Jan 2 15:04:05
 876
 877    # Adjust date, if month/day aren't set in format
 878    timestampSetMonth = false
 879    timestampSetDay = false
 880
 881    # timestamp location (if not defined by format)
 882    timestampLocation = Local
 883
 884    # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded)
 885    samplingPercent = -1
 886
 887    # sampling key for hash based sampling (should be regexp with the named match pattern `key`)
 888    samplingKey =
 889
 890    # set output (splunk or devnull, default is [general]defaultOutput)
 891    output =
 892
 893    # configure default thruput per second for this files group
 894    # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
 895    # from the files in this group to 128Kb per second.
 896    thruputPerSecond =
 897
 898    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
 899    # older than 7 days
 900    tooOldEvents =
 901
 902    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
 903    tooNewEvents =
 904
 905    # by default every new event should start from not space symbol
 906    eventPattern = ^[^\s]
 907
 908    # Blacklisting and whitelisting the logs
 909    # whitelist.0 = ^regexp$
 910    # blacklist.0 = ^regexp$
 911
 912
 913    # Host logs. Input all *.log(.\d+)? files
 914    [input.files::logs]
 915
 916    # disable host level logs
 917    disabled = false
 918
 919    # root location of log files
 920    path = /rootfs/var/log/
 921
 922    # regex matching pattern
 923    match = ^(([\w\-.]+\.log(.[\d\-]+)?)|(docker))$
 924
 925    # files are read using polling schema, when reach the EOF how often to check if files got updated
 926    pollingInterval = 250ms
 927
 928    # how often o look for the new files under logs path
 929    walkingInterval = 5s
 930
 931    # include verbose fields in events (file offset)
 932    verboseFields = false
 933
 934    # override type
 935    type = kubernetes_host_logs
 936
 937    # specify Splunk index
 938    index =
 939
 940    # field extraction
 941    extraction =
 942    extractionMessageField =
 943
 944    # timestamp field
 945    timestampField =
 946
 947    # format for timestamp
 948    # the layout defines the format by showing how the reference time, defined to be `Mon Jan 2 15:04:05 -0700 MST 2006`
 949    timestampFormat =
 950
 951    # timestamp location (if not defined by format)
 952    timestampLocation =
 953
 954    # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded)
 955    samplingPercent = -1
 956
 957    # sampling key for hash based sampling (should be regexp with the named match pattern `key`)
 958    samplingKey =
 959
 960    # set output (splunk or devnull, default is [general]defaultOutput)
 961    output =
 962
 963    # configure default thruput per second for this files group
 964    # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
 965    # from the files in this group to 128Kb per second.
 966    thruputPerSecond =
 967
 968    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
 969    # older than 7 days
 970    tooOldEvents =
 971
 972    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
 973    tooNewEvents =
 974
 975    # by default every new event should start from not space symbol
 976    eventPattern = ^[^\s]
 977
 978    # Blacklisting and whitelisting the logs
 979    # whitelist.0 = ^regexp$
 980    # blacklist.0 = ^regexp$
 981
 982
 983    [input.journald]
 984
 985    # disable host level logs
 986    disabled = false
 987
 988    # root location of log files
 989    path.persistent = /rootfs/var/log/journal/
 990    path.volatile = /rootfs/run/log/journal/
 991
 992    # when reach end of journald, how often to pull
 993    pollingInterval = 250ms
 994
 995    # if you don't want to forward journald from the beginning,
 996    # set the oldest event in relative value, like -14h or -30m or -30s (h/m/s supported)
 997    startFromRel =
 998
 999    # override type
1000    type = kubernetes_host_logs
1001
1002    # specify Splunk index
1003    index =
1004
1005    # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded)
1006    samplingPercent = -1
1007
1008    # sampling key (should be regexp with the named match pattern `key`)
1009    samplingKey =
1010
1011    # how often to reopen the journald to free old files
1012    reopenInterval = 1h
1013
1014    # set output (splunk or devnull, default is [general]defaultOutput)
1015    output =
1016
1017    # configure default thruput per second for journald
1018    # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
1019    # from the journald to 128Kb per second.
1020    thruputPerSecond =
1021
1022    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
1023    # older than 7 days
1024    tooOldEvents =
1025
1026    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
1027    tooNewEvents =
1028
1029    # by default every new event should start from not space symbol
1030    eventPattern = ^[^\s]
1031
1032    # Blacklisting and whitelisting the logs
1033    # whitelist.0 = ^regexp$
1034    # blacklist.0 = ^regexp$
1035
1036    # Move Journald logs reader to a separate process, to prevent process from crashing in case of corrupted log files
1037    spawnExternalProcess = false
1038
1039
1040    # Pipe to join events (container logs only)
1041    [pipe.join]
1042
1043    # disable joining event
1044    disabled = false
1045
1046    # Maximum interval of messages in pipeline
1047    maxInterval = 100ms
1048
1049    # Maximum time to wait for the messages in pipeline
1050    maxWait = 1s
1051
1052    # Maximum message size
1053    maxSize = 1MB
1054
1055    # Default pattern to indicate new message (should start not from space)
1056    patternRegex = ^[^\s]
1057
1058
1059    # (depricated, use annotations for settings up join rules)
1060    # Define special event join patterns for matched events
1061    # Section consist of [pipe.join::<name>]
1062    # [pipe.join::my_app]
1063    ## Set match pattern for the fields
1064    #; matchRegex.docker_container_image = my_app
1065    #; matchRegex.stream = stdout
1066    ## All events start from '[<digits>'
1067    #; patternRegex = ^\[\d+
1068
1069
1070    # You can configure global replace rules for the events, which can help to remove sensitive data
1071    # from logs before they are sent to Splunk. Those rules will be applied to all pipelines for container logs, host logs,
1072    # application logs and events.
1073    # In the following example we replace password=TEST with password=********
1074    ; [pipe.replace::name]
1075    ; patternRegex = (password=)([^\s]+)
1076    ; replace = $1********
1077    
1078    # You can configure global hash rules for the events, which can help to hide sensitive data
1079    # from logs before they are sent to outputs. Those rules will be applied to all pipelines for container logs, host logs,
1080    # application logs and events.
1081    # In the following example we hash IP addresses with fnv-1a-64
1082    ; [pipe.hash::name]
1083    ; match = (\d{1,3}\.){3}\d{1,3}'
1084    ; function = fnv-1a-64
1085
1086
1087    [input.prometheus::kubelet]
1088
1089    # disable prometheus kubelet metrics
1090    disabled = false
1091
1092    # override type
1093    type = kubernetes_prometheus
1094
1095    # specify Splunk index
1096    index =
1097
1098    # override host (environment variables are supported, by default Kubernetes node name is used)
1099    host = ${KUBERNETES_NODENAME}
1100
1101    # override source
1102    source = kubelet
1103
1104    # how often to collect prometheus metrics
1105    interval = 60s
1106
1107    # request timeout
1108    timeout = 60s
1109
1110    # Prometheus endpoint, multiple values can be specified, collectord tries them in order till finding the first
1111    # working endpoint.
1112    # At first trying to get it through proxy
1113    endpoint.1proxy = https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT}/api/v1/nodes/${KUBERNETES_NODENAME}/proxy/metrics
1114    # In case if cannot get it through proxy, trying localhost
1115    endpoint.2http = http://127.0.0.1:10255/metrics
1116
1117    # token for "Authorization: Bearer $(cat tokenPath)"
1118    tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
1119
1120    # server certificate for certificate validation
1121    certPath = /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
1122
1123    # client certificate for authentication
1124    clientCertPath =
1125
1126    # Allow invalid SSL server certificate
1127    insecure = true
1128
1129    # include metrics help with the events
1130    includeHelp = false
1131
1132    # set output (splunk or devnull, default is [general]defaultOutput)
1133    output =
1134
1135    whitelist.1 = ^kubernetes_build_info$
1136    whitelist.2 = ^kubelet_runtime_operations_duration_seconds_sum$
1137    whitelist.3 = ^kubelet_docker_operations_duration_seconds_sum$
1138    whitelist.4 = ^kubelet_network_plugin_operations_duration_seconds_sum$
1139    whitelist.5 = ^kubelet_cgroup_manager_duration_seconds_sum$
1140    whitelist.6 = ^storage_operation_duration_seconds_sum$
1141    whitelist.7 = ^kubelet_docker_operations_errors_total$
1142    whitelist.8 = ^kubelet_runtime_operations_errors_total$
1143    whitelist.9 = ^rest_client_requests_total$
1144    whitelist.10 = ^process_cpu_seconds_total$
1145    whitelist.11 = ^process_resident_memory_bytes$
1146    whitelist.12 = ^process_virtual_memory_bytes$
1147    whitelist.13 = ^rest_client_request_duration_seconds_sum$
1148    whitelist.14 = ^kubelet_volume_stats_.+$
1149    whitelist.15 = ^rest_client_requests_total$
1150    
1151
1152    ; # Collectord reports if entropy is low
1153    ; [diagnostics::node-entropy]
1154    ; settings.path = /rootfs/proc/sys/kernel/random/entropy_avail
1155    ; settings.interval = 1h
1156    ; settings.threshold = 800
1157
1158    # Collectord can report if node reboot is required
1159    [diagnostics::node-reboot-required]
1160    settings.path = /rootfs/var/run/reboot-required*
1161    settings.interval = 1h
1162
1163    # See https://www.kernel.org/doc/Documentation/admin-guide/hw-vuln/index.rst
1164    # And https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-system-cpu
1165    [diagnostics::cpu-vulnerabilities]
1166    settings.path = /rootfs/sys/devices/system/cpu/vulnerabilities/*
1167    settings.interval = 1h
1168
1169
1170  003-daemonset-master.conf: |
1171    [input.prometheus::kubernetes-api]
1172
1173    # disable prometheus kubernetes-api metrics
1174    disabled = false
1175
1176    # override type
1177    type = kubernetes_prometheus
1178
1179    # specify Splunk index
1180    index =
1181
1182    # override host (environment variables are supported, by default Kubernetes node name is used)
1183    host = ${KUBERNETES_NODENAME}
1184
1185    # override source
1186    source = kubernetes-api
1187
1188    # how often to collect prometheus metrics
1189    interval = 60s
1190
1191    # request timeout
1192    timeout = 60s
1193
1194    # prometheus endpoint
1195    # at first trying to get it from localhost (avoiding load balancer, if multiple api servers)
1196    endpoint.1localhost = https://127.0.0.1:6443/metrics
1197    # as fallback using proxy
1198    endpoint.2kubeapi = https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT}/metrics
1199
1200    # token for "Authorization: Bearer $(cat tokenPath)"
1201    tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
1202
1203    # server certificate for certificate validation
1204    certPath = /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
1205
1206    # client certificate for authentication
1207    clientCertPath =
1208
1209    # Allow invalid SSL server certificate
1210    insecure = true
1211
1212    # include metrics help with the events
1213    includeHelp = false
1214
1215    # set output (splunk or devnull, default is [general]defaultOutput)
1216    output =
1217
1218    whitelist.1 = ^kubernetes_build_info$
1219    whitelist.2 = ^authenticated_user_requests$
1220    whitelist.3 = ^apiserver_request_total$
1221    whitelist.4 = ^process_cpu_seconds_total$
1222    whitelist.5 = ^process_resident_memory_bytes$
1223    whitelist.6 = ^process_virtual_memory_bytes$
1224    whitelist.7 = ^rest_client_request_duration_seconds_sum$
1225    whitelist.8 = ^rest_client_requests_total$
1226
1227
1228    # This configuration works if scheduled is bind to the localhost:10251
1229    [input.prometheus::scheduler]
1230
1231    # disable prometheus scheduler metrics
1232    disabled = false
1233
1234    # override type
1235    type = kubernetes_prometheus
1236
1237    # specify Splunk index
1238    index =
1239
1240    # override host
1241    host = ${KUBERNETES_NODENAME}
1242
1243    # override source
1244    source = scheduler
1245
1246    # how often to collect prometheus metrics
1247    interval = 60s
1248
1249    # request timeout
1250    timeout = 60s
1251
1252    # prometheus endpoint
1253    endpoint.https = https://:10259/metrics
1254    endpoint.http = http://127.0.0.1:10251/metrics
1255
1256    # token for "Authorization: Bearer $(cat tokenPath)"
1257    tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
1258
1259    # server certificate for certificate validation
1260    certPath =
1261
1262    # client certificate for authentication
1263    clientCertPath =
1264
1265    # Allow invalid SSL server certificate
1266    insecure = true
1267
1268    # include metrics help with the events
1269    includeHelp = false
1270
1271    # set output (splunk or devnull, default is [general]defaultOutput)
1272    output =
1273
1274    whitelist.1 = ^kubernetes_build_info$
1275    whitelist.2 = ^scheduler_e2e_scheduling_duration_seconds_sum$
1276    whitelist.3 = ^scheduler_binding_duration_seconds_sum$
1277    whitelist.4 = ^scheduler_scheduling_algorithm_duration_seconds_sum$
1278    whitelist.5 = ^rest_client_request_duration_seconds_sum$
1279    whitelist.6 = ^rest_client_requests_total$
1280    whitelist.7 = ^process_cpu_seconds_total$
1281    whitelist.8 = ^process_resident_memory_bytes$
1282    whitelist.9 = ^process_virtual_memory_bytes$
1283
1284
1285    # This configuration works if controller-manager is bind to the localhost:10252
1286    [input.prometheus::controller-manager]
1287
1288    # disable prometheus controller-manager metrics
1289    disabled = false
1290
1291    # override type
1292    type = kubernetes_prometheus
1293
1294    # specify Splunk index
1295    index =
1296
1297    # override host
1298    host = ${KUBERNETES_NODENAME}
1299
1300    # override source
1301    source = controller-manager
1302
1303    # how often to collect prometheus metrics
1304    interval = 60s
1305
1306    # request timeout
1307    timeout = 60s
1308
1309    # prometheus endpoint
1310    endpoint.https = https://:10257/metrics
1311    endpoint.http = http://127.0.0.1:10252/metrics
1312
1313    # token for "Authorization: Bearer $(cat tokenPath)"
1314    tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
1315
1316    # server certificate for certificate validation
1317    certPath =
1318
1319    # client certificate for authentication
1320    clientCertPath =
1321
1322    # Allow invalid SSL server certificate
1323    insecure = true
1324
1325    # include metrics help with the events
1326    includeHelp = false
1327
1328    # set output (splunk or devnull, default is [general]defaultOutput)
1329    output =
1330
1331    whitelist.1 = ^kubernetes_build_info$
1332    whitelist.2 = ^node_collector_zone_size$
1333    whitelist.3 = ^node_collector_zone_health$
1334    whitelist.4 = ^node_collector_unhealthy_nodes_in_zone$
1335    whitelist.5 = ^rest_client_request_duration_seconds_sum$
1336    whitelist.6 = ^rest_client_requests_total$
1337    whitelist.7 = ^process_cpu_seconds_total$
1338    whitelist.8 = ^process_resident_memory_bytes$
1339    whitelist.9 = ^process_virtual_memory_bytes$
1340    
1341
1342    [input.prometheus::etcd]
1343
1344    # disable prometheus etcd metrics
1345    disabled = false
1346
1347    # override type
1348    type = kubernetes_prometheus
1349
1350    # specify Splunk index
1351    index =
1352
1353    # override host
1354    host = ${KUBERNETES_NODENAME}
1355
1356    # override source
1357    source = etcd
1358
1359    # how often to collect prometheus metricd
1360    interval = 60s
1361
1362    # request timeout
1363    timeout = 60s
1364
1365    # prometheus endpoint
1366    endpoint.http = http://:2379/metrics
1367    endpoint.https = https://:2379/metrics
1368
1369    # token for "Authorization: Bearer $(cat tokenPath)"
1370    tokenPath =
1371
1372    # server certificate for certificate validation
1373    certPath = /rootfs/etc/kubernetes/pki/etcd/ca.crt
1374
1375    # client certificate for authentication
1376    clientCertPath = /rootfs/etc/kubernetes/pki/apiserver-etcd-client.crt
1377    clientKeyPath = /rootfs/etc/kubernetes/pki/apiserver-etcd-client.key
1378
1379    # Allow invalid SSL server certificate
1380    insecure = true
1381
1382    # include metrics help with the events
1383    includeHelp = false
1384
1385    # set output (splunk or devnull, default is [general]defaultOutput)
1386    output =
1387
1388    whitelist.1 = ^etcd_server_leader_changes_seen_total$
1389    whitelist.2 = ^etcd_server_has_leader$
1390    whitelist.3 = ^etcd_server_proposals_committed_total$
1391    whitelist.4 = ^etcd_server_proposals_applied_total$
1392    whitelist.5 = ^etcd_server_proposals_committed_total$
1393    whitelist.6 = ^etcd_server_proposals_pending$
1394    whitelist.7 = ^etcd_server_proposals_failed_total$
1395    whitelist.8 = ^etcd_disk_wal_fsync_duration_seconds_sum$
1396    whitelist.9 = ^etcd_disk_wal_fsync_duration_seconds_count$
1397    whitelist.10 = ^etcd_disk_backend_commit_duration_seconds_sum$
1398    whitelist.11 = ^etcd_disk_backend_commit_duration_seconds_count$
1399    whitelist.12 = ^etcd_network_client_grpc_.*$
1400    whitelist.13 = ^grpc_server_handled_total$
1401    whitelist.14 = ^etcd_network_peer_round_trip_time_seconds_bucket$
1402    whitelist.15 = ^process_cpu_seconds_total$
1403    whitelist.16 = ^process_resident_memory_bytes$
1404    whitelist.17 = ^process_virtual_memory_bytes$
1405    whitelist.18 = ^process_open_fds$
1406    whitelist.19 = ^process_max_fds$
1407    whitelist.20 = ^etcd_disk_backend_commit_duration_seconds_bucket$
1408    whitelist.21 = ^etcd_disk_wal_fsync_duration_seconds_bucket$
1409
1410  004-addon.conf: |
1411    [general]
1412
1413    # addons can be run in parallel with agents
1414    addon = true
1415
1416    [input.kubernetes_events]
1417
1418    # disable events
1419    disabled = false
1420
1421    # override type
1422    type = kubernetes_events
1423
1424    # specify Splunk index
1425    index =
1426
1427    # set output (splunk or devnull, default is [general]defaultOutput)
1428    output =
1429
1430    # exclude managed fields from the metadata
1431    excludeManagedFields = true
1432
1433
1434    [input.kubernetes_watch::pods]
1435
1436    # disable events
1437    disabled = false
1438
1439    # Set the timeout for how often watch request should refresh the whole list
1440    refresh = 10m
1441
1442    apiVersion = v1
1443    kind = Pod
1444    namespace =
1445
1446    # override type
1447    type = kubernetes_objects
1448
1449    # specify Splunk index
1450    index =
1451
1452    # set output (splunk or devnull, default is [general]defaultOutput)
1453    output =
1454
1455    # exclude managed fields from the metadata
1456    excludeManagedFields = true
1457
1458    # you can remove or hash some values in the events (after modifyValues you can define path in the JSON object,
1459    # and the value can be hash:{hashFunction}, or remove to remove the object )
1460    ; modifyValues.object.data.* = hash:sha256
1461    ; modifyValues.object.metadata.annotations.* = remove
1462
1463    # You can exclude events by namespace with blacklist or whitelist only required namespaces
1464    # blacklist.kubernetes_namespace = ^namespace0$
1465    # whitelist.kubernetes_namespace = ^((namespace1)|(namespace2))$
1466
1467    [input.kubernetes_watch::resourcequota]
1468    # disable events
1469    disabled = false
1470
1471    # Set the timeout for how often watch request should refresh the whole list
1472    refresh = 10m
1473
1474    apiVersion = v1
1475    kind = ResourceQuota
1476    namespace =
1477
1478    # override type
1479    type = kubernetes_objects
1480
1481    # specify Splunk index
1482    index =
1483
1484    # set output (splunk or devnull, default is [general]defaultOutput)
1485    output =
1486
1487    # exclude managed fields from the metadata
1488    excludeManagedFields = true
1489
1490    [input.kubernetes_watch::nodes]
1491    # disable events
1492    disabled = false
1493
1494    # Set the timeout for how often watch request should refresh the whole list
1495    refresh = 10m
1496
1497    apiVersion = v1
1498    kind = Node
1499    namespace =
1500
1501    # override type
1502    type = kubernetes_objects
1503
1504    # specify Splunk index
1505    index =
1506
1507    # set output (splunk or devnull, default is [general]defaultOutput)
1508    output =
1509
1510    # exclude managed fields from the metadata
1511    excludeManagedFields = true
1512
1513---
1514apiVersion: apps/v1
1515kind: DaemonSet
1516metadata:
1517  name: collectorforkubernetes
1518  namespace: collectorforkubernetes
1519  labels:
1520    app: collectorforkubernetes
1521spec:
1522  # Default updateStrategy is OnDelete. For collectord RollingUpdate is suitable
1523  # When you update configuration
1524  updateStrategy:
1525    type: RollingUpdate
1526  selector:
1527    matchLabels:
1528      daemon: collectorforkubernetes
1529  template:
1530    metadata:
1531      name: collectorforkubernetes
1532      labels:
1533        daemon: collectorforkubernetes
1534    spec:
1535      priorityClassName: collectorforkubernetes-critical
1536      dnsPolicy: ClusterFirstWithHostNet
1537      hostNetwork: true
1538      serviceAccountName: collectorforkubernetes
1539      # We run this DaemonSet only for Non-Masters
1540      affinity:
1541        nodeAffinity:
1542          requiredDuringSchedulingIgnoredDuringExecution:
1543            nodeSelectorTerms:
1544            - matchExpressions:
1545              - key: node-role.kubernetes.io/control-plane
1546                operator: DoesNotExist
1547      tolerations:
1548      - operator: "Exists"
1549        effect: "NoSchedule"
1550      - operator: "Exists"
1551        effect: "NoExecute"
1552      containers:
1553      - name: collectorforkubernetes
1554        # Collectord version
1555        image: docker.io/outcoldsolutions/collectorforkubernetes:25.10.3
1556        imagePullPolicy: Always
1557        securityContext:
1558          runAsUser: 0
1559          privileged: true
1560        # Define your resources if you need. Defaults should be fine for most.
1561        # You can lower or increase based on your hosts.
1562        resources:
1563          limits:
1564            cpu: 2000m
1565            memory: 1024Mi
1566          requests:
1567            cpu: 500m
1568            memory: 256Mi
1569        env:
1570        - name: KUBERNETES_NODENAME
1571          valueFrom:
1572            fieldRef:
1573              fieldPath: spec.nodeName
1574        - name: POD_NAME
1575          valueFrom:
1576            fieldRef:
1577              fieldPath: metadata.name
1578        volumeMounts:
1579        # We store state in /data folder (file positions)
1580        - name: collectorforkubernetes-state
1581          mountPath: /data
1582        # Configuration file deployed with ConfigMap
1583        - name: collectorforkubernetes-config
1584          mountPath: /config/
1585          readOnly: true
1586        # Root filesystem to have access to logs and metrics
1587        - name: rootfs
1588          mountPath: /rootfs/
1589          readOnly: false
1590          mountPropagation: HostToContainer
1591        # correct timezone
1592        - name: localtime
1593          mountPath: /etc/localtime
1594          readOnly: true
1595      volumes:
1596      # We store state directly on host, change this location, if
1597      # your persistent volume is somewhere else
1598      - name: collectorforkubernetes-state
1599        hostPath:
1600          path: /var/lib/collectorforkubernetes/data/
1601          type: DirectoryOrCreate
1602      # Location of docker root (for container logs and metadata)
1603      - name: rootfs
1604        hostPath:
1605          path: /
1606      # correct timezone
1607      - name: localtime
1608        hostPath:
1609          path: /etc/localtime
1610      # configuration from ConfigMap
1611      - name: collectorforkubernetes-config
1612        configMap:
1613          name: collectorforkubernetes
1614          items:
1615          - key: 001-general.conf
1616            path: 001-general.conf
1617          - key: 002-daemonset.conf
1618            path: 002-daemonset.conf
1619---
1620apiVersion: apps/v1
1621kind: DaemonSet
1622metadata:
1623  name: collectorforkubernetes-master
1624  namespace: collectorforkubernetes
1625  labels:
1626    app: collectorforkubernetes
1627spec:
1628  updateStrategy:
1629    type: RollingUpdate
1630  selector:
1631    matchLabels:
1632      daemon: collectorforkubernetes
1633  template:
1634    metadata:
1635      name: collectorforkubernetes-master
1636      labels:
1637        daemon: collectorforkubernetes
1638    spec:
1639      priorityClassName: collectorforkubernetes-critical
1640      dnsPolicy: ClusterFirstWithHostNet
1641      hostNetwork: true
1642      serviceAccountName: collectorforkubernetes
1643      affinity:
1644        nodeAffinity:
1645          requiredDuringSchedulingIgnoredDuringExecution:
1646            nodeSelectorTerms:
1647            - matchExpressions:
1648              - key: node-role.kubernetes.io/control-plane
1649                operator: Exists
1650      tolerations:
1651      - operator: "Exists"
1652        effect: "NoSchedule"
1653      - operator: "Exists"
1654        effect: "NoExecute"
1655      containers:
1656      - name: collectorforkubernetes
1657        image: docker.io/outcoldsolutions/collectorforkubernetes:25.10.3
1658        imagePullPolicy: Always
1659        securityContext:
1660          runAsUser: 0
1661          privileged: true
1662        resources:
1663          limits:
1664            cpu: 2000m
1665            memory: 1024Mi
1666          requests:
1667            cpu: 500m
1668            memory: 256Mi
1669        env:
1670        - name: KUBERNETES_NODENAME
1671          valueFrom:
1672            fieldRef:
1673              fieldPath: spec.nodeName
1674        - name: POD_NAME
1675          valueFrom:
1676            fieldRef:
1677              fieldPath: metadata.name
1678        volumeMounts:
1679        - name: collectorforkubernetes-state
1680          mountPath: /data
1681        - name: collectorforkubernetes-config
1682          mountPath: /config/
1683          readOnly: true
1684        - name: rootfs
1685          mountPath: /rootfs/
1686          readOnly: false
1687          mountPropagation: HostToContainer
1688        - name: localtime
1689          mountPath: /etc/localtime
1690          readOnly: true
1691      volumes:
1692      - name: collectorforkubernetes-state
1693        hostPath:
1694          path: /var/lib/collectorforkubernetes/data/
1695          type: DirectoryOrCreate
1696      - name: rootfs
1697        hostPath:
1698          path: /
1699      - name: localtime
1700        hostPath:
1701          path: /etc/localtime
1702      - name: collectorforkubernetes-config
1703        configMap:
1704          name: collectorforkubernetes
1705          items:
1706          - key: 001-general.conf
1707            path: 001-general.conf
1708          - key: 002-daemonset.conf
1709            path: 002-daemonset.conf
1710          - key: 003-daemonset-master.conf
1711            path: 003-daemonset-master.conf
1712---
1713apiVersion: apps/v1
1714kind: Deployment
1715metadata:
1716  name: collectorforkubernetes-addon
1717  namespace: collectorforkubernetes
1718  labels:
1719    app: collectorforkubernetes
1720spec:
1721  replicas: 1
1722  selector:
1723    matchLabels:
1724      daemon: collectorforkubernetes
1725  template:
1726    metadata:
1727      name: collectorforkubernetes-addon
1728      labels:
1729        daemon: collectorforkubernetes
1730    spec:
1731      priorityClassName: collectorforkubernetes-critical
1732      serviceAccountName: collectorforkubernetes
1733      containers:
1734      - name: collectorforkubernetes
1735        image: docker.io/outcoldsolutions/collectorforkubernetes:25.10.3
1736        imagePullPolicy: Always
1737        securityContext:
1738          runAsUser: 0
1739          privileged: true
1740        resources:
1741          limits:
1742            cpu: 1000m
1743            memory: 512Mi
1744          requests:
1745            cpu: 200m
1746            memory: 64Mi
1747        env:
1748        - name: KUBERNETES_NODENAME
1749          valueFrom:
1750            fieldRef:
1751              fieldPath: spec.nodeName
1752        - name: POD_NAME
1753          valueFrom:
1754            fieldRef:
1755              fieldPath: metadata.name
1756        volumeMounts:
1757        - name: collectorforkubernetes-state
1758          mountPath: /data
1759        - name: collectorforkubernetes-config
1760          mountPath: /config/
1761          readOnly: true
1762      volumes:
1763      - name: collectorforkubernetes-state
1764        hostPath:
1765          path: /var/lib/collectorforkubernetes/data/
1766          type: Directory
1767      - name: collectorforkubernetes-config
1768        configMap:
1769          name: collectorforkubernetes
1770          items:
1771          - key: 001-general.conf
1772            path: 001-general.conf
1773          - key: 004-addon.conf
1774            path: 004-addon.conf

Monitoring Kubernetes

Collectord configuration

Download

Link

CURL

WGET

collectorforkubernetes.yaml

About Outcold Solutions