Download

Link

CURL

1curl -O https://www.outcoldsolutions.com/docs/monitoring-openshift/collectorforopenshift-rhel.yaml

WGET

1wget https://www.outcoldsolutions.com/docs/monitoring-openshift/collectorforopenshift-rhel.yaml

collectorforopenshift.yaml

   1apiVersion: project.openshift.io/v1
   2kind: Project
   3metadata:
   4  labels:
   5    app: collectorforopenshift
   6  name: collectorforopenshift
   7  annotations:
   8    openshift.io/node-selector: ''
   9    openshift.io/description: 'Monitoring OpenShift in Splunk, built by Outcold Solutions'
  10    openshift.io/display-name: 'Collectord for OpenShift'
  11---
  12apiVersion: apiextensions.k8s.io/v1
  13kind: CustomResourceDefinition
  14metadata:
  15  name: configurations.collectord.io
  16spec:
  17  group: collectord.io
  18  versions:
  19    - name: v1
  20      served: true
  21      storage: true
  22      schema:
  23        openAPIV3Schema:
  24          type: object
  25          properties:
  26            spec:
  27              type: object
  28              additionalProperties: true
  29            force:
  30              type: boolean
  31  scope: Cluster
  32  names:
  33    listKind: ConfigurationList
  34    plural: configurations
  35    singular: configuration
  36    kind: Configuration
  37---
  38apiVersion: apiextensions.k8s.io/v1
  39kind: CustomResourceDefinition
  40metadata:
  41  name: splunkoutputs.collectord.io
  42spec:
  43  group: collectord.io
  44  versions:
  45    - name: v1
  46      served: true
  47      storage: true
  48      schema:
  49        openAPIV3Schema:
  50          type: object
  51          properties:
  52            spec:
  53              type: object
  54              properties:
  55                url:
  56                  type: string
  57                  format: uri
  58                insecure:
  59                  type: boolean
  60                token:
  61                  type: string
  62                  description: "Plain token"
  63                tokenFromSecret:
  64                  type: object
  65                  description: "Reference to a Kubernetes Secret"
  66                  properties:
  67                    secret:
  68                      type: string
  69                    key:
  70                      type: string
  71              oneOf:
  72                - required: ["token"]
  73                - required: ["tokenFromSecret"]
  74  scope: Namespaced
  75  names:
  76    listKind: SplunkOutputList
  77    plural: splunkoutputs
  78    singular: splunkoutput
  79    kind: SplunkOutput
  80---
  81apiVersion: scheduling.k8s.io/v1
  82kind: PriorityClass
  83metadata:
  84  name: collectorforopenshift-critical
  85value: 1000000000
  86---
  87kind: SecurityContextConstraints
  88apiVersion: security.openshift.io/v1
  89metadata:
  90  name: collectorforopenshift
  91allowHostDirVolumePlugin: true
  92allowHostIPC: true
  93allowHostNetwork: true
  94allowHostPID: true
  95allowHostPorts: true
  96allowPrivilegeEscalation: true
  97allowPrivilegedContainer: true
  98readOnlyRootFilesystem: false
  99allowedCapabilities:
 100  - '*'
 101allowedUnsafeSysctls:
 102  - '*'
 103fsGroup:
 104  type: RunAsAny
 105runAsUser:
 106  type: RunAsAny
 107seLinuxContext:
 108  type: RunAsAny
 109supplementalGroups:
 110  type: RunAsAny
 111seccompProfiles:
 112  - '*'
 113users:
 114  - system:serviceaccount:collectorforopenshift:collectorforopenshift
 115volumes:
 116  - '*'
 117---
 118apiVersion: v1
 119kind: ServiceAccount
 120metadata:
 121  labels:
 122    app: collectorforopenshift
 123  name: collectorforopenshift
 124  namespace: collectorforopenshift
 125---
 126apiVersion: rbac.authorization.k8s.io/v1
 127kind: ClusterRole
 128metadata:
 129  labels:
 130    app: collectorforopenshift
 131  name: collectorforopenshift
 132rules:
 133- apiGroups:
 134    - ""
 135    - apps
 136    - batch
 137    - extensions
 138    - collectord.io
 139    - apps.openshift.io
 140    - build.openshift.io
 141    - authorization.openshift.io
 142    - template.openshift.io
 143    - quota.openshift.io
 144  resources:
 145    - splunkoutputs
 146    - alertmanagers
 147    - cronjobs
 148    - daemonsets
 149    - deployments
 150    - endpoints
 151    - events
 152    - jobs
 153    - namespaces
 154    - nodes
 155    - nodes/metrics
 156    - nodes/proxy
 157    - pods
 158    - replicasets
 159    - replicationcontrollers
 160    - scheduledjobs
 161    - secrets
 162    - services
 163    - statefulsets
 164    - persistentvolumeclaims
 165    - configurations
 166    - resourcequotas
 167    - deploymentconfigs
 168    - clusterroles
 169    - clusterresourcequotas
 170  verbs:
 171  - get
 172  - list
 173  - watch
 174- nonResourceURLs:
 175  - /metrics
 176  verbs:
 177  - get
 178  apiGroups: []
 179  resources: []
 180---
 181apiVersion: rbac.authorization.k8s.io/v1
 182kind: ClusterRoleBinding
 183metadata:
 184  labels:
 185    app: collectorforopenshift
 186  name: collectorforopenshift
 187roleRef:
 188  kind: ClusterRole
 189  name: collectorforopenshift
 190  apiGroup: rbac.authorization.k8s.io
 191subjects:
 192  - kind: ServiceAccount
 193    name: collectorforopenshift
 194    namespace: collectorforopenshift
 195---
 196apiVersion: v1
 197kind: ConfigMap
 198metadata:
 199  name: collectorforopenshift
 200  namespace: collectorforopenshift
 201  labels:
 202    app: collectorforopenshift
 203data:
 204  001-general.conf: |
 205    # The general configuration is used for all deployments
 206    #
 207    # Run collectord with the flag `-conf` and specify location of the configuration files.
 208    #
 209    # You can override all the values using environment variables with the format like
 210    #   COLLECTOR__<ANYNAME>=<section>__<key>=<value>
 211    # As an example you can set `dataPath` in the `[general]` section as
 212    #   COLLECTOR__DATAPATH=general__dataPath=C:\\some\\path\\data.db
 213    # This parameter can be configured using -env-override, set it to empty string to disable this feature
 214
 215    [general]
 216
 217    # Please review license https://www.outcoldsolutions.com/docs/license-agreement/
 218    # and accept license by changing the value to *true*
 219    acceptLicense = false
 220
 221    # Location for the database
 222    # Collectord stores positions of the files and internal state
 223    dataPath = ./data/
 224
 225    # log level (accepted values are trace, debug, info, warn, error, fatal)
 226    logLevel = info
 227
 228    # http server gives access to two endpoints
 229    # /healthz
 230    # /metrics/json
 231    # /metrics/prometheus
 232    # httpServerBinding = 0.0.0.0:11888
 233    httpServerBinding =
 234
 235    # log requests to the http server
 236    httpServerLog = false
 237
 238    # telemetry report endpoint, set it to empty string to disable telemetry
 239    telemetryEndpoint = https://license.outcold.solutions/telemetry/
 240
 241    # license check endpoint
 242    licenseEndpoint = https://license.outcold.solutions/license/
 243
 244    # license server through proxy
 245    # This configuration is used only for the Outcold Solutions License Server
 246    # For license server running on-premises, use configuration under [license.client]
 247    licenseServerProxyUrl =
 248
 249    # authentication with basic authorization (user:password)
 250    # This configuration is used only for the Outcold Solutions License Server
 251    # For license server running on-premises, use configuration under [license.client]
 252    licenseServerProxyBasicAuth =
 253
 254    # license key
 255    license =
 256
 257    # Environment variable $KUBERNETES_NODENAME is used by default to setup hostname
 258    # Use value below to override specific name
 259    hostname =
 260
 261    # Default output for events, logs and metrics
 262    # valid values: splunk and devnull
 263    # Use devnull by default if you don't want to redirect data
 264    defaultOutput = splunk
 265
 266    # Default buffer size for file input
 267    fileInputBufferSize = 256b
 268
 269    # Maximum size of one line the file reader can read
 270    fileInputLineMaxSize = 1mb
 271
 272    # Include custom fields to attach to every event, in example below every event sent to Splunk will hav
 273    # indexed field my_environment=dev. Fields names should match to ^[a-z][_a-z0-9]*$
 274    # Better way to configure that is to specify labels for OpenShift Nodes.
 275    # ; fields.my_environment = dev
 276    # Identify the cluster if you are planning to monitor multiple clusters
 277    fields.openshift_cluster = -
 278
 279    # Include EC2 Metadata (see list of possible fields https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html)
 280    # Should be in format ec2Metadata.{desired_field_name} = {url path to read the value}
 281    # ec2Metadata.ec2_instance_id = /latest/meta-data/instance-id
 282    # ec2Metadata.ec2_instance_type = /latest/meta-data/instance-type
 283
 284    # subdomain for the annotations added to the pods, workloads, namespaces or containers, like splunk.collectord.io/..
 285    annotationsSubdomain =
 286
 287    # configure global thruput per second for forwarded logs (metrics are not included)
 288    # for example if you set `thruputPerSecond = 512Kb`, that will limit amount of logs forwarded
 289    # from the single Collectord instance to 512Kb per second.
 290    # You can configure thruput individually for the logs (including specific for container logs) below
 291    thruputPerSecond =
 292
 293    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
 294    # older than 7 days
 295    tooOldEvents =
 296
 297    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
 298    tooNewEvents =
 299
 300    # For input.files::X and application logs, when glob or match are configured, Collectord can automatically
 301    # detect gzipped files and skip them (based on the extensions or magic numbers)
 302    autoSkipGzipFiles = true
 303
 304    [license.client]
 305    # point to the license located on the HTTP web server, or a hosted by the Collectord running as license server
 306    url =
 307    # basic authentication for the HTTP server
 308    basicAuth =
 309    # if SSL, ignore the certificate verification
 310    insecure = false
 311    # CA Path for the Server certificate
 312    capath =
 313    # CA Name fot the Server certificate
 314    caname =
 315    # license server through proxy
 316    proxyUrl =
 317    # authentication with basic authorization (user:password)
 318    proxyBasicAuth =
 319
 320    # forward internal collectord metrics
 321    [input.collectord_metrics]
 322
 323    # disable collectord internal metrics
 324    disabled = false
 325
 326    # override type
 327    type = openshift_prometheus
 328
 329    # how often to collect internal metrics
 330    interval = 1m
 331
 332    # set output (splunk or devnull, default is [general]defaultOutput)
 333    output =
 334
 335    # specify Splunk index
 336    index =
 337
 338    # whitelist or blacklist the metrics
 339    whitelist.1 = ^file_input_open$
 340    whitelist.2 = ^file_input_read_bytes$
 341    whitelist.3 = ^openshift_handlers$
 342    whitelist.4 = ^pipe$
 343    whitelist.5 = ^pipelines_num$
 344    whitelist.6 = ^splunk_post_bytes_sum.*$
 345    whitelist.7 = ^splunk_post_events_count_sum.*$
 346    whitelist.8 = ^splunk_post_failed_requests$
 347    whitelist.9 = ^splunk_post_message_max_lag_seconds_bucket.*$
 348    whitelist.10 = ^splunk_post_requests_seconds_sum.*$
 349    whitelist.11 = ^splunk_post_retries_required_sum.*$
 350
 351
 352    # connection to kubernetes api
 353    [general.kubernetes]
 354
 355    # Override service URL for Kubernetes (default is ${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT})
 356    serviceURL =
 357
 358    # Environment variable $KUBERNETES_NODENAME is used by default to setup nodeName
 359    # Use it only when you need to override it
 360    nodeName =
 361
 362    # Configuration to access the API server,
 363    # see https://kubernetes.io/docs/tasks/access-application-cluster/access-cluster/#accessing-the-api-from-a-pod
 364    # for details
 365    tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
 366    certPath = /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
 367
 368    # Default timeout for http responses. The streaming/watch requests depend on this timeout.
 369    timeout = 30m
 370
 371    # How long to keep the cache for the recent calls to API server (to limit number of calls when collectord discovers new pods)
 372    metadataTTL = 30s
 373
 374    # regex to find pods
 375    podsCgroupFilter = ^/([^/\s]+/)*kubepods(\.slice)?/((kubepods-)?(burstable|besteffort)(\.slice)?/)?([^/]*)pod([0-9a-f]{32}|[0-9a-f\-_]{36})(\.slice)?$
 376
 377    # regex to find containers in the pods
 378    containersCgroupFilter = ^/([^/\s]+/)*kubepods(\.slice)?/((kubepods-)?(burstable|besteffort)(\.slice)?/)?([^/]*)pod([0-9a-f]{32}|[0-9a-f\-_]{36})(\.slice)?/(docker-|crio-|cri-\w+-)?[0-9a-f]{64}(\.scope)?(\/.+)?$
 379
 380    # path to the kubelet root location (use it to discover application logs for emptyDir)
 381    # the expected format is `pods/{pod-id}/volumes/kubernetes.io~empty-dir/{volume-name}/_data/`
 382    volumesRootDir = /rootfs/var/lib/kubelet/
 383
 384    # You can attach annotations as a metadata, using the format
 385    #   includeAnnotations.{key} = {regexp}
 386    # For example if you want to include all annotations that starts with `prometheus.io` or `example.com` you can include
 387    # the following format:
 388    #   includeAnnotations.1 = ^prometheus\.io.*
 389    #   includeAnnotations.2 = ^example\.com.*
 390
 391    # You can exclude labels from metadata, using the format
 392    #   excludeLabels.{key} = {regexp}
 393    # For example if you want to exclude all labels that starts with `prometheus.io` or `example.com` you can include
 394    # the following format:
 395    #   excludeLabels.1 = ^prometheus\.io.*
 396    #   excludeLabels.2 = ^example\.com.*
 397
 398    # watch for changes (annotations) in the objects
 399    watch.namespaces = v1/namespace
 400    watch.deploymentconfigs = apps.openshift.io/v1/deploymentconfig
 401    watch.configurations = collectord.io/v1/configuration
 402
 403    # Collectord can review the assigned ClusterRole and traverse metadata for the Pods only for the Owner objects
 404    # that are defined in the ClusterRole, ignoring anything else, it does not have access to.
 405    # This way Collectord does not generate 403 requests on API Server
 406    clusterRole = collectorforopenshift
 407
 408    # Alternative of telling Collectord about the ClusterRole is to manually list the objects.
 409    # You can define which objects Collectord should traverse when it sees Owners.
 410    ; traverseOwnership.namespaces = v1/namespace
 411
 412    # Implementation of the watch protocol.
 413    # 0 - use the default implementation (2)
 414    # 1 - use the watch implementation that is optimized for the small number of objects (just issue one watch for all objects)
 415    # 2 - use the watch implementation that is optimized for the large number of objects (paginate through the list of objects and issue watch for the last resource version)
 416    watchImplementation = 2
 417
 418    # watch for pods annotations, setup prometheus collection
 419    # for these pods
 420    # Addon listens on Pod Network
 421    # DaemonSets listen on Host Network
 422    [input.prometheus_auto]
 423
 424    # disable prometheus auto discovery for pods
 425    disabled = false
 426
 427    # override type
 428    type = openshift_prometheus
 429
 430    # specify Splunk index
 431    index =
 432
 433    # how often to collect prometheus metrics
 434    interval = 60s
 435
 436    # request timeout
 437    timeout = 60s
 438
 439    # include metrics help with the events
 440    includeHelp = true
 441
 442    # http client timeout
 443    timeout = 30s
 444
 445    # set output (splunk or devnull, default is [general]defaultOutput)
 446    output =
 447
 448    # Include an Authorization header for the prometheus scrapper
 449    # When configuring scrapping with collectord using annotations use prometheus.1-AuthorizationKey=key1
 450    # authorization.key1 = Bearer FOO
 451
 452
 453    # Splunk output
 454    [output.splunk]
 455
 456    # Splunk HTTP Event Collector url
 457    url =
 458    # You can specify muiltiple splunk URls with
 459    #
 460    # urls.0 = https://server1:8088/services/collector/event/1.0
 461    # urls.1 = https://server1:8088/services/collector/event/1.0
 462    # urls.2 = https://server1:8088/services/collector/event/1.0
 463    #
 464    # Limitations:
 465    # * The urls cannot have different path.
 466
 467    # Specify how URL should be picked up (in case if multiple is used)
 468    # urlSelection = random|round-robin|random-with-round-robin
 469    # where:
 470    # * random - choose random url on first selection and after each failure (connection or HTTP status code >= 500)
 471    # * round-robin - choose url starting from first one and bump on each failure (connection or HTTP status code >= 500)
 472    # * random-with-round-robin - choose random url on first selection and after that in round-robin on each
 473    #                             failure (connection or HTTP status code >= 500)
 474    urlSelection = random-with-round-robin
 475
 476    # Splunk HTTP Event Collector Token
 477    token =
 478
 479    # Allow invalid SSL server certificate
 480    insecure = false
 481    # minTLSVersion = TLSv1.2
 482    # maxTLSVersion = TLSv1.3
 483
 484    # Path to CA cerificate
 485    caPath =
 486
 487    # CA Name to verify
 488    caName =
 489
 490    # path for client certificate (if required)
 491    clientCertPath =
 492
 493    # path for client key (if required)
 494    clientKeyPath =
 495
 496    # Events are batched with the maximum size set by batchSize and staying in pipeline for not longer
 497    # than set by frequency
 498    frequency = 5s
 499    batchSize = 768K
 500    # limit by the number of events (0 value has no limit on the number of events)
 501    events = 50
 502
 503    # Splunk through proxy
 504    proxyUrl =
 505
 506    # authentication with basic authorization (user:password)
 507    proxyBasicAuth =
 508
 509    # Splunk acknowledgement url (.../services/collector/ack)
 510    ackUrl =
 511    # You can specify muiltiple splunk URls for ackUrl
 512    #
 513    # ackUrls.0 = https://server1:8088/services/collector/ack
 514    # ackUrls.1 = https://server1:8088/services/collector/ack
 515    # ackUrls.2 = https://server1:8088/services/collector/ack
 516    #
 517    # Make sure that they in the same order as urls for url, to make sure that this Splunk instance will be
 518    # able to acknowledge the payload.
 519    #
 520    # Limitations:
 521    # * The urls cannot have different path.
 522
 523    # Enable index acknowledgment
 524    ackEnabled = false
 525
 526    # Index acknowledgment timeout
 527    ackTimeout = 3m
 528
 529    # Timeout specifies a time limit for requests made by collectord.
 530    # The timeout includes connection time, any
 531    # redirects, and reading the response body.
 532    timeout = 30s
 533
 534    # in case when pipeline can post to multiple indexes, we want to avoid posibility of blocking
 535    # all pipelines, because just some events have incorrect index
 536    dedicatedClientPerIndex = true
 537
 538    # possible values: RedirectToDefault, Drop, Retry
 539    incorrectIndexBehavior = RedirectToDefault
 540
 541    # gzip compression level (nocompression, default, 1...9)
 542    compressionLevel = default
 543
 544    # number of dedicated splunk output threads (to increase throughput above 4k events per second)
 545    threads = 2
 546    # Default algorithm between threads is roundrobin, but you can change it to weighted
 547    ; threadsAlgorithm = weighted
 548
 549    # if you want to exclude some preindexed fields from events
 550    # excludeFields.openshift_pod_ip = true
 551
 552    # By default if there are no indexes defined on the message, Collectord sends the event without the index, and
 553    # Splunk HTTP Event Collector going to use the default index for the Token. You can change that, and tell Collectord
 554    # to ignore all events that don't have index defined explicitly
 555    ; requireExplicitIndex = true
 556
 557    # You can define if you want to truncate messages that are larger than 1M in length (or define your own size, like 256K)
 558    ; maximumMessageLength = 1M
 559
 560    # For messages generated from logs, include unique `event_id` in the event
 561    ; includeEventID = false
 562
 563    # Dedicated queue size for the output, default is 1024, larger queue sizes will require more memory,
 564    # but will allow to handle more events in case of network issues
 565    queueSize = 1024
 566
 567    # How many digits after the decimal point to keep for timestamps (0-9)
 568    # Defaults to 3 (milliseconds)
 569    # Change to 6 for microseconds
 570    # Change to 9 for nanoseconds
 571    ; timestampPrecision = 3
 572
 573  002-daemonset.conf: |
 574    # DaemonSet configuration is used for Nodes and Masters.
 575
 576    # connection to CRIO
 577    [general.cri-o]
 578
 579    # url for CRIO API, only unix socket is supported
 580    url = unix:///rootfs/var/run/crio/crio.sock
 581
 582    # Timeout for http responses to docker client. The streaming requests depend on this timeout.
 583    timeout = 1m
 584
 585    # cgroup input
 586    [input.system_stats]
 587
 588    # disable system level stats
 589    disabled.host = false
 590    disabled.cgroup = false
 591
 592    # cgroups fs location
 593    pathCgroups = /rootfs/sys/fs/cgroup
 594
 595    # proc location
 596    pathProc = /rootfs/proc
 597
 598    # how often to collect cgroup stats
 599    statsInterval = 30s
 600
 601    # override type
 602    type.host = openshift_stats_v2_host
 603    type.cgroup = openshift_stats_v2_cgroup
 604
 605    # specify Splunk index
 606    index.host =
 607    index.cgroup =
 608
 609    # set output (splunk or devnull, default is [general]defaultOutput)
 610    output.host =
 611    output.cgroup =
 612
 613
 614    # proc input
 615    [input.proc_stats]
 616
 617    # disable proc level stats
 618    disabled = false
 619
 620    # proc location
 621    pathProc = /rootfs/proc
 622
 623    # how often to collect proc stats
 624    statsInterval = 60s
 625
 626    # override type
 627    type = openshift_proc_stats_v2
 628
 629    # specify Splunk index
 630    index.host =
 631    index.cgroup =
 632
 633    # proc filesystem includes by default system threads (there can be over 100 of them)
 634    # these stats do not help with the observability
 635    # excluding them can reduce the size of the index, performance of the searches and usage of the collector
 636    includeSystemThreads = false
 637
 638    # set output (splunk or devnull, default is [general]defaultOutput)
 639    output.host =
 640    output.cgroup =
 641
 642    # Hide arguments for the processes, replacing with HIDDEN_ARGS(NUMBER)
 643    hideArgs = false
 644
 645
 646    # network stats
 647    [input.net_stats]
 648
 649    # disable net stats
 650    disabled = false
 651
 652    # proc path location
 653    pathProc = /rootfs/proc
 654
 655    # how often to collect net stats
 656    statsInterval = 30s
 657
 658    # override type
 659    type = openshift_net_stats_v2
 660
 661    # specify Splunk index
 662    index.host =
 663    index.cgroup =
 664
 665    # set output (splunk or devnull, default is [general]defaultOutput)
 666    output.host =
 667    output.cgroup =
 668
 669
 670    # network socket table
 671    [input.net_socket_table]
 672
 673    # disable net stats
 674    disabled = false
 675
 676    # proc path location
 677    pathProc = /rootfs/proc
 678
 679    # how often to collect net stats
 680    statsInterval = 30s
 681
 682    # override type
 683    type = openshift_net_socket_table
 684
 685    # specify Splunk index
 686    index.host =
 687    index.cgroup =
 688
 689    # set output (splunk or devnull, default is [general]defaultOutput)
 690    output.host =
 691    output.cgroup =
 692
 693    # group connections by tcp_state, localAddr, remoteAddr (if localPort is not the port it is listening on)
 694    # that can significally reduces the amount of events
 695    group = true
 696
 697    # Collectord can watch for services, node, and pod IP addresses, and lookup the names
 698    # for the IP addresses. Keeping this enabled can add a significant load on the API Server, with large number of pods.
 699    disableLookup = false
 700
 701
 702    # mount input (collects mount stats where kubelet runtime is stored)
 703    [input.mount_stats]
 704
 705    # disable system level stats
 706    disabled = false
 707
 708    # how often to collect mount stats
 709    statsInterval = 30s
 710
 711    # override type
 712    type = openshift_mount_stats
 713
 714    # specify Splunk index
 715    index =
 716
 717    # set output (splunk or devnull, default is [general]defaultOutput)
 718    output =
 719
 720
 721    # diskstats input (collects /proc/diskstats)
 722    [input.disk_stats]
 723
 724    # disable system level stats
 725    disabled = false
 726
 727    # how often to collect mount stats
 728    statsInterval = 30s
 729
 730    # override type
 731    type = openshift_disk_stats
 732
 733    # specify Splunk index
 734    index =
 735
 736    # set output (splunk or devnull, default is [general]defaultOutput)
 737    output =
 738
 739
 740    # Container Log files
 741    [input.files]
 742
 743    # disable container logs monitoring
 744    disabled = false
 745
 746    # root location of docker log files
 747    # logs are expected in standard docker format like {containerID}/{containerID}-json.log
 748    # rotated files
 749    path = /rootfs/var/lib/docker/containers/
 750    # root location of CRI-O files
 751    # logs are expected in Kubernetes format, like {podID}/{containerName}/0.log
 752    crioPath = /rootfs/var/log/pods/
 753
 754    # (obsolete) glob matching pattern for log files
 755    # glob = */*-json.log*
 756
 757    # files are read using polling schema, when reach the EOF how often to check if files got updated
 758    pollingInterval = 250ms
 759
 760    # how often to look for the new files under logs path
 761    walkingInterval = 5s
 762
 763    # include verbose fields in events (file offset)
 764    verboseFields = false
 765
 766    # override type
 767    type = openshift_logs
 768
 769    # specify Splunk index
 770    index =
 771
 772    # docker splits events when they are larger than 10-100k (depends on the docker version)
 773    # we join them together by default and forward to Splunk as one event
 774    joinPartialEvents = true
 775
 776    # In case if your containers report messages with terminal colors or other escape sequences
 777    # you can enable strip for all the containers in one place.
 778    # Better is to enable it only for required container with the label collectord.io/strip-terminal-escape-sequences=true
 779    stripTerminalEscapeSequences = false
 780    # Regexp used for stripping terminal colors, it does not stip all the escape sequences
 781    # Read http://man7.org/linux/man-pages/man4/console_codes.4.html for more information
 782    stripTerminalEscapeSequencesRegex = (\x1b\[\d{1,3}(;\d{1,3})*m)|(\x07)|(\x1b]\d+(\s\d)?;[^\x07]+\x07)|(.*\x1b\[K)
 783
 784    # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded)
 785    samplingPercent = -1
 786
 787    # sampling key for hash based sampling (should be regexp with the named match pattern `key`)
 788    samplingKey =
 789
 790    # set output (splunk or devnull, default is [general]defaultOutput)
 791    output =
 792
 793    # configure default thruput per second for for each container log
 794    # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
 795    # from the single container to 128Kb per second.
 796    thruputPerSecond =
 797
 798    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
 799    # older than 7 days
 800    tooOldEvents =
 801
 802    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
 803    tooNewEvents =
 804
 805
 806    # Application Logs
 807    [input.app_logs]
 808
 809    # disable container application logs monitoring
 810    disabled = false
 811
 812    # root location of mounts (applies to hostPath mounts only), if the hostPath differs inside container from the path on host
 813    root = /rootfs/
 814
 815    # how often to review list of available volumes
 816    syncInterval = 5s
 817
 818    # glob matching pattern for log files
 819    glob = *.log*
 820
 821    # files are read using polling schema, when reach the EOF how often to check if files got updated
 822    pollingInterval = 250ms
 823
 824    # how often to look for the new files under logs path
 825    walkingInterval = 5s
 826
 827    # include verbose fields in events (file offset)
 828    verboseFields = false
 829
 830    # override type
 831    type = openshift_logs
 832
 833    # specify Splunk index
 834    index =
 835
 836    # we split files using new line character, with this configuration you can specify what defines the new event
 837    # after new line
 838    eventPatternRegex = ^[^\s]
 839    # Maximum interval of messages in pipeline
 840    eventPatternMaxInterval = 100ms
 841    # Maximum time to wait for the messages in pipeline
 842    eventPatternMaxWait = 1s
 843    # Maximum message size
 844    eventPatternMaxSize = 1MB
 845
 846    # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded)
 847    samplingPercent = -1
 848
 849    # sampling key for hash based sampling (should be regexp with the named match pattern `key`)
 850    samplingKey =
 851
 852    # set output (splunk or devnull, default is [general]defaultOutput)
 853    output =
 854
 855    # configure default thruput per second for for each container log
 856    # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
 857    # from the single container to 128Kb per second.
 858    thruputPerSecond =
 859
 860    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
 861    # older than 7 days
 862    tooOldEvents =
 863
 864    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
 865    tooNewEvents =
 866
 867    # Configure how long Collectord should keep the file descriptors open for files, that has not been forwarded yet
 868    # When using PVC, and if pipeline is lagging behind, Collectord holding open fd for files, can cause long termination
 869    # of pods, as kubelet cannot unmount the PVC volume from the system
 870    maxHoldAfterClose = 1800s
 871
 872
 873    [input.journald]
 874
 875    # disable host level logs
 876    disabled = false
 877
 878    # root location of log files
 879    path.persistent = /rootfs/var/log/journal/
 880    # only if required
 881    # path.volatile = /rootfs/run/log/journal/
 882
 883    # when reach end of journald, how often to pull
 884    pollingInterval = 250ms
 885
 886    # if you don't want to forward journald from the beginning,
 887    # set the oldest event in relative value, like -14h or -30m or -30s (h/m/s supported)
 888    startFromRel =
 889
 890    # override type
 891    type = openshift_host_logs
 892
 893    # specify Splunk index
 894    index =
 895
 896    # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded)
 897    samplingPercent = -1
 898
 899    # sampling key (should be regexp with the named match pattern `key`)
 900    samplingKey =
 901
 902    # how often to reopen the journald to free old files
 903    reopenInterval = 1h
 904
 905    # set output (splunk or devnull, default is [general]defaultOutput)
 906    output =
 907
 908    # configure default thruput per second for this files group
 909    # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
 910    # from the files in this group to 128Kb per second.
 911    thruputPerSecond =
 912
 913    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
 914    # older than 7 days
 915    tooOldEvents =
 916
 917    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
 918    tooNewEvents =
 919
 920    # by default every new event should start from not space symbol
 921    eventPattern = ^[^\s]
 922
 923    # By default ignoring verbose hyperkube logs (all INFO messages)
 924    blacklist.0 = ^I\d+.*$
 925    # whitelist.0 = ^regexp$
 926    # blacklist.1 = ^regexp$
 927
 928    # Move Journald logs reader to a separate process, to prevent process from crashing in case of corrupted log files
 929    spawnExternalProcess = false
 930
 931    # Pipe to join events (container logs only)
 932    [pipe.join]
 933
 934    # disable joining event
 935    disabled = false
 936
 937    # Maximum interval of messages in pipeline
 938    maxInterval = 100ms
 939
 940    # Maximum time to wait for the messages in pipeline
 941    maxWait = 1s
 942
 943    # Maximum message size
 944    maxSize = 1MB
 945
 946    # Default pattern to indicate new message (should start not from space)
 947    patternRegex = ^[^\s]
 948
 949    # (depricated, use annotations for settings up join rules)
 950    # Define special event join patterns for matched events
 951    # Section consist of [pipe.join::<name>]
 952    # [pipe.join::my_app]
 953    ## Set match pattern for the fields
 954    #; matchRegex.docker_container_image = my_app
 955    #; matchRegex.stream = stdout
 956    ## All events start from '[<digits>'
 957    #; patternRegex = ^\[\d+
 958
 959    # You can configure global replace rules for the events, which can help to remove sensitive data
 960    # from logs before they are sent to Splunk. Those rules will be applied to all pipelines for container logs, host logs,
 961    # application logs and events.
 962    # In the following example we replace password=TEST with password=********
 963    ; [pipe.replace::name]
 964    ; patternRegex = (password=)([^\s]+)
 965    ; replace = $1********
 966
 967    # You can configure global hash rules for the events, which can help to hide sensitive data
 968    # from logs before they are sent to outputs. Those rules will be applied to all pipelines for container logs, host logs,
 969    # application logs and events.
 970    # In the following example we hash IP addresses with fnv-1a-64
 971    ; [pipe.hash::name]
 972    ; match = (\d{1,3}\.){3}\d{1,3}'
 973    ; function = fnv-1a-64
 974
 975
 976    [input.prometheus::kubelet]
 977
 978    # disable prometheus kubelet metrics
 979    disabled = false
 980
 981    # override type
 982    type = openshift_prometheus
 983
 984    # specify Splunk index
 985    index =
 986
 987    # Override host (environment variables are supported)
 988    host = ${KUBERNETES_NODENAME}
 989
 990    # Override source
 991    source = kubelet
 992
 993    # how often to collect prometheus metrics
 994    interval = 60s
 995
 996    # request timeout
 997    timeout = 60s
 998
 999    # prometheus endpoint
1000    endpoint = https://127.0.0.1:10250/metrics
1001
1002    # token for "Authorization: Bearer $(cat tokenPath)"
1003    tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
1004
1005    # server certificate for certificate validation
1006    certPath = /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
1007
1008    # client certificate for authentication
1009    clientCertPath =
1010
1011    # Allow invalid SSL server certificate
1012    insecure = true
1013
1014    # include metrics help with the events
1015    # can be useful to explore prometheus metrics
1016    includeHelp = false
1017
1018    # set output (splunk or devnull, default is [general]defaultOutput)
1019    output =
1020
1021    # filter only metrics used by dashboards
1022    whitelist.1 = ^(kubernetes|openshift)_build_info$
1023    whitelist.2 = ^kubelet_runtime_operations_duration_seconds_sum$
1024    whitelist.3 = ^kubelet_docker_operations_duration_seconds_sum$
1025    whitelist.4 = ^kubelet_network_plugin_operations_duration_seconds_sum$
1026    whitelist.5 = ^kubelet_cgroup_manager_duration_seconds_sum$
1027    whitelist.6 = ^storage_operation_duration_seconds_sum$
1028    whitelist.7 = ^kubelet_docker_operations_errors_total$
1029    whitelist.8 = ^kubelet_runtime_operations_errors_total$
1030    whitelist.9 = ^rest_client_requests_total$
1031    whitelist.10 = ^process_cpu_seconds_total$
1032    whitelist.11 = ^process_resident_memory_bytes$
1033    whitelist.12 = ^process_virtual_memory_bytes$
1034    whitelist.13 = ^kubelet_volume_stats_.+$
1035
1036    ; # Collectord reports if entropy is low (uncomment to use it)
1037    ; [diagnostics::node-entropy]
1038    ; settings.path = /rootfs/proc/sys/kernel/random/entropy_avail
1039    ; settings.interval = 1h
1040    ; settings.threshold = 800
1041
1042    # Collectord can report if node reboot is required (uncomment to use it)
1043    [diagnostics::node-reboot-required]
1044    settings.path = /rootfs/var/run/reboot-required*
1045    settings.interval = 1h
1046
1047    # See https://www.kernel.org/doc/Documentation/admin-guide/hw-vuln/index.rst
1048    # And https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-system-cpu
1049    [diagnostics::cpu-vulnerabilities]
1050    settings.path = /rootfs/sys/devices/system/cpu/vulnerabilities/*
1051    settings.interval = 1h
1052
1053  003-daemonset-master.conf: |
1054    [input.prometheus::kubernetes-api]
1055
1056    # disable prometheus kubernetes-api input
1057    disabled = false
1058
1059    # override type
1060    type = openshift_prometheus
1061
1062    # specify Splunk index
1063    index =
1064
1065    # override host
1066    host = ${KUBERNETES_NODENAME}
1067
1068    # override source
1069    source = kubernetes-api
1070
1071    # how often to collect prometheus metrics
1072    interval = 60s
1073
1074    # request timeout
1075    timeout = 60s
1076
1077    # prometheus endpoint
1078    # at first trying to get it from localhost (that way avoiding load balancer, if multiple)
1079    # as fallback using proxy
1080    endpoint.1localhost = https://127.0.0.1:8443/metrics
1081    endpoint.2kubeapi = https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT}/metrics
1082
1083    # token for "Authorization: Bearer $(cat tokenPath)"
1084    tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
1085
1086    # server certificate for certificate validation
1087    certPath = /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
1088
1089    # client certificate for authentication
1090    clientCertPath =
1091
1092    # Allow invalid SSL server certificate
1093    insecure = true
1094
1095    # include metrics help with the events
1096    includeHelp = false
1097
1098    # set output (splunk or devnull, default is [general]defaultOutput)
1099    output =
1100
1101    # filter only metrics used by dashboards
1102    whitelist.1 = ^(kubernetes|openshift)_build_info$
1103    whitelist.2 = ^authenticated_user_requests$
1104    whitelist.3 = ^apiserver_request_total$
1105    whitelist.4 = ^process_cpu_seconds_total$
1106    whitelist.5 = ^process_resident_memory_bytes$
1107    whitelist.6 = ^process_virtual_memory_bytes$
1108
1109
1110    [input.prometheus::controller]
1111
1112    # disable prometheus controller metrics
1113    disabled = false
1114
1115    # override type
1116    type = openshift_prometheus
1117
1118    # specify Splunk index
1119    index =
1120
1121    # override host
1122    host = ${KUBERNETES_NODENAME}
1123
1124    # override source
1125    source = controller
1126
1127    # how often to collect prometheus metrics
1128    interval = 60s
1129
1130    # request timeout
1131    timeout = 60s
1132
1133    # prometheus endpoint
1134    endpoint.https1 = https://:10257/metrics
1135    endpoint.https2 = https://:8444/metrics
1136
1137    # token for "Authorization: Bearer $(cat tokenPath)"
1138    tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
1139
1140    # server certificate for certificate validation
1141    certPath =
1142
1143    # client certificate for authentication
1144    clientCertPath =
1145    clientKeyPath =
1146
1147    # Allow invalid SSL server certificate
1148    insecure = true
1149
1150    # include metrics help with the events
1151    includeHelp = false
1152
1153    # set output (splunk or devnull, default is [general]defaultOutput)
1154    output =
1155
1156    # filter only metrics used by dashboards
1157    whitelist.1 = ^(kubernetes|openshift)_build_info$
1158    whitelist.2 = ^process_cpu_seconds_total$
1159    whitelist.3 = ^process_resident_memory_bytes$
1160    whitelist.4 = ^process_virtual_memory_bytes$
1161    whitelist.5 = ^node_collector_zone_size$
1162    whitelist.6 = ^node_collector_zone_health$
1163    whitelist.7 = ^node_collector_unhealthy_nodes_in_zone$
1164
1165    [input.prometheus::scheduler]
1166
1167    # disable prometheus scheduler metrics
1168    disabled = false
1169
1170    # override type
1171    type = openshift_prometheus
1172
1173    # specify Splunk index
1174    index =
1175
1176    # override host
1177    host = ${KUBERNETES_NODENAME}
1178
1179    # override source
1180    source = scheduler
1181
1182    # how often to collect prometheus metrics
1183    interval = 60s
1184
1185    # request timeout
1186    timeout = 60s
1187
1188    # prometheus endpoint
1189    endpoint.https1 = https://:10259/metrics
1190    endpoint.https2 = https://:8444/metrics
1191
1192    # token for "Authorization: Bearer $(cat tokenPath)"
1193    tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
1194
1195    # server certificate for certificate validation
1196    certPath =
1197
1198    # client certificate for authentication
1199    clientCertPath =
1200    clientKeyPath =
1201
1202    # Allow invalid SSL server certificate
1203    insecure = true
1204
1205    # include metrics help with the events
1206    includeHelp = false
1207
1208    # set output (splunk or devnull, default is [general]defaultOutput)
1209    output =
1210
1211    # filter only metrics used by dashboards
1212    whitelist.1 = ^(kubernetes|openshift)_build_info$
1213    whitelist.2 = ^scheduler_e2e_scheduling_duration_seconds_sum$
1214    whitelist.3 = ^scheduler_binding_duration_seconds_sum$
1215    whitelist.4 = ^scheduler_scheduling_algorithm_duration_seconds_sum$
1216    whitelist.5 = ^process_cpu_seconds_total$
1217    whitelist.6 = ^process_resident_memory_bytes$
1218    whitelist.7 = ^process_virtual_memory_bytes$
1219
1220
1221    [input.prometheus::etcd]
1222
1223    # disable prometheus etcd metrics
1224    disabled = false
1225
1226    # override type
1227    type = openshift_prometheus
1228
1229    # specify Splunk index
1230    index =
1231
1232    # override host
1233    host = ${KUBERNETES_NODENAME}
1234
1235    # override source
1236    source = etcd
1237
1238    # how often to collect prometheus metricd
1239    interval = 60s
1240
1241    # prometheus endpoint
1242    endpoint.https1 = https://:9979/metrics
1243    endpoint.https2 = https://:9978/metrics
1244
1245    # token for "Authorization: Bearer $(cat tokenPath)"
1246    tokenPath =
1247
1248    # server certificate for certificate validation
1249    certPath = /rootfs/etc/kubernetes/static-pod-resources/etcd-certs/secrets/etcd-all-certs/etcd-serving-*.crt
1250
1251    # client certificate for authentication
1252    clientCertPath = /rootfs/etc/kubernetes/static-pod-resources/etcd-certs/secrets/etcd-all-certs/etcd-peer-*.crt
1253    clientKeyPath = /rootfs/etc/kubernetes/static-pod-resources/etcd-certs/secrets/etcd-all-certs/etcd-peer-*.key
1254
1255    # Allow invalid SSL server certificate
1256    insecure = true
1257
1258    # include metrics help with the events
1259    includeHelp = false
1260
1261    # set output (splunk or devnull, default is [general]defaultOutput)
1262    output =
1263
1264    whitelist.1 = ^etcd_server_leader_changes_seen_total$
1265    whitelist.2 = ^etcd_server_has_leader$
1266    whitelist.3 = ^etcd_server_proposals_committed_total$
1267    whitelist.4 = ^etcd_server_proposals_applied_total$
1268    whitelist.5 = ^etcd_server_proposals_committed_total$
1269    whitelist.6 = ^etcd_server_proposals_pending$
1270    whitelist.7 = ^etcd_server_proposals_failed_total$
1271    whitelist.8 = ^etcd_disk_wal_fsync_duration_seconds_sum$
1272    whitelist.9 = ^etcd_disk_wal_fsync_duration_seconds_count$
1273    whitelist.10 = ^etcd_disk_backend_commit_duration_seconds_sum$
1274    whitelist.11 = ^etcd_disk_backend_commit_duration_seconds_count$
1275    whitelist.12 = ^etcd_network_client_grpc_.*$
1276    whitelist.13 = ^grpc_server_handled_total$
1277    whitelist.14 = ^etcd_network_peer_round_trip_time_seconds_bucket$
1278    whitelist.15 = ^process_cpu_seconds_total$
1279    whitelist.16 = ^process_resident_memory_bytes$
1280    whitelist.17 = ^process_virtual_memory_bytes$
1281    whitelist.18 = ^process_open_fds$
1282    whitelist.19 = ^process_max_fds$
1283    whitelist.20 = ^etcd_disk_backend_commit_duration_seconds_bucket$
1284    whitelist.21 = ^etcd_disk_wal_fsync_duration_seconds_bucket$
1285
1286
1287    # Audit logs
1288    [input.files::audit-logs]
1289
1290    # disable host level logs
1291    disabled = false
1292
1293    # root location of for audit logs
1294    path = /rootfs/var/log/kube-apiserver/
1295
1296    # glob matching files
1297    glob = audit*.log
1298
1299    # files are read using polling schema, when reach the EOF how often to check if files got updated
1300    pollingInterval = 250ms
1301
1302    # how often o look for the new files under logs path
1303    walkingInterval = 5s
1304
1305    # include verbose fields in events (file offset)
1306    verboseFields = false
1307
1308    # override type
1309    type = openshift_host_logs
1310
1311    # specify Splunk index
1312    index =
1313
1314    # field extraction
1315    extraction = (?P<message>.*"stageTimestamp":"(?P<timestamp>[^"]+)".*)
1316    extractionMessageField = message
1317
1318    # timestamp field
1319    timestampField = timestamp
1320
1321    # format for timestamp
1322    # the layout defines the format by showing how the reference time, defined to be `Mon Jan 2 15:04:05 -0700 MST 2006`
1323    timestampFormat = 2006-01-02T15:04:05.999999999Z07:00
1324
1325    # timestamp location (if not defined by format)
1326    timestampLocation =
1327
1328    # set output (splunk or devnull, default is [general]defaultOutput)
1329    output =
1330
1331    # configure default thruput per second for this files group
1332    # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
1333    # from the files in this group to 128Kb per second.
1334    thruputPerSecond =
1335
1336    # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
1337    # older than 7 days
1338    tooOldEvents =
1339
1340    # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
1341    tooNewEvents =
1342
1343    # Blacklisting and whitelisting the logs
1344    # whitelist.0 = ^regexp$
1345    # blacklist.0 = ^regexp$
1346
1347
1348  004-addon.conf: |
1349    [general]
1350    # addons can be run in parallel with agents
1351    addon = true
1352
1353
1354    [input.kubernetes_events]
1355
1356    # disable collecting kubernetes events
1357    disabled = false
1358
1359    # override type
1360    type = openshift_events
1361
1362    # specify Splunk index
1363    index =
1364
1365    # set output (splunk or devnull, default is [general]defaultOutput)
1366    output =
1367
1368    # exclude managed fields from the metadata
1369    excludeManagedFields = true
1370
1371
1372    [input.kubernetes_watch::pods]
1373
1374    # disable events
1375    disabled = false
1376
1377    # Set the timeout for how often watch request should refresh the whole list
1378    refresh = 10m
1379
1380    apiVersion = v1
1381    kind = Pod
1382    namespace =
1383
1384    # override type
1385    type = openshift_objects
1386
1387    # specify Splunk index
1388    index =
1389
1390    # set output (splunk or devnull, default is [general]defaultOutput)
1391    output =
1392
1393    # exclude managed fields from the metadata
1394    excludeManagedFields = true
1395
1396    # you can remove or hash some values in the events (after modifyValues you can define path in the JSON object,
1397    # and the value can be hash:{hashFunction}, or remove to remove the object )
1398    ; modifyValues.object.data.* = hash:sha256
1399    ; modifyValues.object.metadata.annotations.* = remove
1400
1401    # You can exclude events by namespace with blacklist or whitelist only required namespaces
1402    # blacklist.kubernetes_namespace = ^namespace0$
1403    # whitelist.kubernetes_namespace = ^((namespace1)|(namespace2))$
1404
1405    [input.kubernetes_watch::resourcequota]
1406    # disable events
1407    disabled = false
1408
1409    # Set the timeout for how often watch request should refresh the whole list
1410    refresh = 10m
1411
1412    apiVersion = v1
1413    kind = ResourceQuota
1414    namespace =
1415
1416    # override type
1417    type = openshift_objects
1418
1419    # specify Splunk index
1420    index =
1421
1422    # set output (splunk or devnull, default is [general]defaultOutput)
1423    output =
1424
1425    # exclude managed fields from the metadata
1426    excludeManagedFields = true
1427
1428
1429    [input.kubernetes_watch::clusterresourcequota]
1430    # disable events
1431    disabled = false
1432
1433    # Set the timeout for how often watch request should refresh the whole list
1434    refresh = 10m
1435
1436    apiVersion = quota.openshift.io/v1
1437    kind = ClusterResourceQuota
1438    namespace =
1439
1440    # override type
1441    type = openshift_objects
1442
1443    # specify Splunk index
1444    index =
1445
1446    # set output (splunk or devnull, default is [general]defaultOutput)
1447    output =
1448
1449    # exclude managed fields from the metadata
1450    excludeManagedFields = true
1451
1452
1453    [input.kubernetes_watch::nodes]
1454    # disable events
1455    disabled = false
1456
1457    # Set the timeout for how often watch request should refresh the whole list
1458    refresh = 10m
1459
1460    apiVersion = v1
1461    kind = Node
1462    namespace =
1463
1464    # override type
1465    type = openshift_objects
1466
1467    # specify Splunk index
1468    index =
1469
1470    # set output (splunk or devnull, default is [general]defaultOutput)
1471    output =
1472
1473    # exclude managed fields from the metadata
1474    excludeManagedFields = true
1475
1476---
1477apiVersion: apps/v1
1478kind: DaemonSet
1479metadata:
1480  name: collectorforopenshift
1481  namespace: collectorforopenshift
1482  labels:
1483    app: collectorforopenshift
1484spec:
1485  # Default updateStrategy is OnDelete. For collectord RollingUpdate is suitable
1486  # When you update configuration
1487  updateStrategy:
1488    type: RollingUpdate
1489
1490  selector:
1491    matchLabels:
1492      daemon: collectorforopenshift
1493
1494  template:
1495    metadata:
1496      name: collectorforopenshift
1497      labels:
1498        daemon: collectorforopenshift
1499    spec:
1500      priorityClassName: collectorforopenshift-critical
1501      dnsPolicy: ClusterFirstWithHostNet
1502      hostNetwork: true
1503      serviceAccountName: collectorforopenshift
1504      # We run this DaemonSet only for Non-Masters
1505      affinity:
1506        nodeAffinity:
1507          requiredDuringSchedulingIgnoredDuringExecution:
1508            nodeSelectorTerms:
1509            - matchExpressions:
1510              - key: node-role.kubernetes.io/control-plane
1511                operator: DoesNotExist
1512      tolerations:
1513      - operator: "Exists"
1514        effect: "NoSchedule"
1515      - operator: "Exists"
1516        effect: "NoExecute"
1517      containers:
1518      - name: collectorforopenshift
1519        # Stick to specific version
1520        image: registry.connect.redhat.com/outcoldsolutions/collectorforopenshift:25.10.3
1521        securityContext:
1522          privileged: true
1523          runAsUser: 0
1524        # Define your resources if you need. Defaults should be fine for most.
1525        resources:
1526          limits:
1527            cpu: 2000m
1528            memory: 512Mi
1529          requests:
1530            cpu: 500m
1531            memory: 256Mi
1532        env:
1533        - name: KUBERNETES_NODENAME
1534          valueFrom:
1535            fieldRef:
1536              fieldPath: spec.nodeName
1537        - name: POD_NAME
1538          valueFrom:
1539            fieldRef:
1540              fieldPath: metadata.name
1541        volumeMounts:
1542        # We store state in /data folder (file positions)
1543        - name: collectorforopenshift-state
1544          mountPath: /data
1545        # Configuration file deployed with ConfigMap
1546        - name: collectorforopenshift-config
1547          mountPath: /config/
1548          readOnly: true
1549        # Rootfs
1550        - name: rootfs
1551          mountPath: /rootfs/
1552          readOnly: false
1553          mountPropagation: HostToContainer
1554        # correct timezone
1555        - name: localtime
1556          mountPath: /etc/localtime
1557          readOnly: true
1558      volumes:
1559      # We store state directly on host, change this location, if
1560      # your persistent volume is somewhere else
1561      - name: collectorforopenshift-state
1562        hostPath:
1563          path: /var/lib/collectorforopenshift/data/
1564          type: DirectoryOrCreate
1565      # Location of rootfs
1566      - name: rootfs
1567        hostPath:
1568          path: /
1569      # correct timezone
1570      - name: localtime
1571        hostPath:
1572          path: /etc/localtime
1573      # configuration from ConfigMap
1574      - name: collectorforopenshift-config
1575        configMap:
1576          name: collectorforopenshift
1577          items:
1578          - key: 001-general.conf
1579            path: 001-general.conf
1580          - key: 002-daemonset.conf
1581            path: 002-daemonset.conf
1582---
1583apiVersion: apps/v1
1584kind: DaemonSet
1585metadata:
1586  name: collectorforopenshift-master
1587  namespace: collectorforopenshift
1588  labels:
1589    app: collectorforopenshift
1590spec:
1591  updateStrategy:
1592    type: RollingUpdate
1593  selector:
1594    matchLabels:
1595      daemon: collectorforopenshift
1596  template:
1597    metadata:
1598      name: collectorforopenshift-master
1599      labels:
1600        daemon: collectorforopenshift
1601    spec:
1602      priorityClassName: collectorforopenshift-critical
1603      dnsPolicy: ClusterFirstWithHostNet
1604      hostNetwork: true
1605      serviceAccountName: collectorforopenshift
1606      # Deploy only on master
1607      affinity:
1608        nodeAffinity:
1609          requiredDuringSchedulingIgnoredDuringExecution:
1610            nodeSelectorTerms:
1611            - matchExpressions:
1612              - key: node-role.kubernetes.io/control-plane
1613                operator: Exists
1614      tolerations:
1615      - operator: "Exists"
1616        effect: "NoSchedule"
1617      - operator: "Exists"
1618        effect: "NoExecute"
1619      containers:
1620      - name: collectorforopenshift
1621        image: registry.connect.redhat.com/outcoldsolutions/collectorforopenshift:25.10.3
1622        securityContext:
1623          privileged: true
1624          runAsUser: 0
1625        resources:
1626          limits:
1627            cpu: 2000m
1628            memory: 1024Mi
1629          requests:
1630            cpu: 500m
1631            memory: 256Mi
1632        env:
1633        - name: KUBERNETES_NODENAME
1634          valueFrom:
1635            fieldRef:
1636              fieldPath: spec.nodeName
1637        - name: POD_NAME
1638          valueFrom:
1639            fieldRef:
1640              fieldPath: metadata.name
1641        volumeMounts:
1642        - name: collectorforopenshift-state
1643          mountPath: /data
1644        - name: collectorforopenshift-config
1645          mountPath: /config/
1646          readOnly: true
1647          # Rootfs
1648        - name: rootfs
1649          mountPath: /rootfs/
1650          readOnly: false
1651          mountPropagation: HostToContainer
1652        - name: localtime
1653          mountPath: /etc/localtime
1654          readOnly: true
1655      volumes:
1656      - name: collectorforopenshift-state
1657        hostPath:
1658          path: /var/lib/collectorforopenshift/data/
1659          type: DirectoryOrCreate
1660      - name: rootfs
1661        hostPath:
1662          path: /
1663      - name: localtime
1664        hostPath:
1665          path: /etc/localtime
1666      - name: collectorforopenshift-config
1667        configMap:
1668          name: collectorforopenshift
1669          items:
1670          - key: 001-general.conf
1671            path: 001-general.conf
1672          - key: 002-daemonset.conf
1673            path: 002-daemonset.conf
1674          - key: 003-daemonset-master.conf
1675            path: 003-daemonset-master.conf
1676---
1677apiVersion: apps/v1
1678kind: Deployment
1679metadata:
1680  name: collectorforopenshift-addon
1681  namespace: collectorforopenshift
1682  labels:
1683    app: collectorforopenshift
1684spec:
1685  replicas: 1
1686  selector:
1687    matchLabels:
1688      daemon: collectorforopenshift
1689  template:
1690    metadata:
1691      name: collectorforopenshift-addon
1692      labels:
1693        daemon: collectorforopenshift
1694    spec:
1695      priorityClassName: collectorforopenshift-critical
1696      serviceAccountName: collectorforopenshift
1697      containers:
1698      - name: collectorforopenshift
1699        image: registry.connect.redhat.com/outcoldsolutions/collectorforopenshift:25.10.3
1700        securityContext:
1701          privileged: true
1702          runAsUser: 0
1703        resources:
1704          limits:
1705            cpu: 1000m
1706            memory: 512Mi
1707          requests:
1708            cpu: 200m
1709            memory: 64Mi
1710        env:
1711        - name: KUBERNETES_NODENAME
1712          valueFrom:
1713            fieldRef:
1714              fieldPath: spec.nodeName
1715        - name: POD_NAME
1716          valueFrom:
1717            fieldRef:
1718              fieldPath: metadata.name
1719        volumeMounts:
1720        - name: collectorforopenshift-state
1721          mountPath: /data
1722        - name: collectorforopenshift-config
1723          mountPath: /config/
1724          readOnly: true
1725      volumes:
1726      - name: collectorforopenshift-state
1727        hostPath:
1728          path: /var/lib/collectorforopenshift/data/
1729          type: Directory
1730      - name: collectorforopenshift-config
1731        configMap:
1732          name: collectorforopenshift
1733          items:
1734          - key: 001-general.conf
1735            path: 001-general.conf
1736          - key: 004-addon.conf
1737            path: 004-addon.conf

Monitoring OpenShift

Collectord configuration

Download

Link

CURL

WGET

collectorforopenshift.yaml

About Outcold Solutions