1apiVersion: v1
2kind: Namespace
3metadata:
4 labels:
5 app: collectorforkubernetes
6 name: collectorforkubernetes
7---
8apiVersion: apiextensions.k8s.io/v1
9kind: CustomResourceDefinition
10metadata:
11 name: configurations.collectord.io
12spec:
13 group: collectord.io
14 versions:
15 - name: v1
16 served: true
17 storage: true
18 schema:
19 openAPIV3Schema:
20 type: object
21 properties:
22 spec:
23 type: object
24 additionalProperties: true
25 force:
26 type: boolean
27 scope: Cluster
28 names:
29 listKind: ConfigurationList
30 plural: configurations
31 singular: configuration
32 kind: Configuration
33---
34apiVersion: apiextensions.k8s.io/v1
35kind: CustomResourceDefinition
36metadata:
37 name: splunkoutputs.collectord.io
38spec:
39 group: collectord.io
40 versions:
41 - name: v1
42 served: true
43 storage: true
44 schema:
45 openAPIV3Schema:
46 type: object
47 properties:
48 spec:
49 type: object
50 properties:
51 url:
52 type: string
53 format: uri
54 insecure:
55 type: boolean
56 token:
57 type: string
58 description: "Plain token"
59 tokenFromSecret:
60 type: object
61 description: "Reference to a Kubernetes Secret"
62 properties:
63 secret:
64 type: string
65 key:
66 type: string
67 oneOf:
68 - required: ["token"]
69 - required: ["tokenFromSecret"]
70 scope: Namespaced
71 names:
72 listKind: SplunkOutputList
73 plural: splunkoutputs
74 singular: splunkoutput
75 kind: SplunkOutput
76---
77apiVersion: v1
78kind: ServiceAccount
79metadata:
80 labels:
81 app: collectorforkubernetes
82 name: collectorforkubernetes
83 namespace: collectorforkubernetes
84---
85apiVersion: scheduling.k8s.io/v1
86kind: PriorityClass
87metadata:
88 name: collectorforkubernetes-critical
89value: 1000000000
90---
91apiVersion: rbac.authorization.k8s.io/v1
92kind: ClusterRole
93metadata:
94 labels:
95 app: collectorforkubernetes
96 name: collectorforkubernetes
97rules:
98- apiGroups: ['extensions']
99 resources: ['podsecuritypolicies']
100 verbs: ['use']
101 resourceNames:
102 - privileged
103- apiGroups:
104 - ""
105 - apps
106 - batch
107 - extensions
108 - collectord.io
109 - rbac.authorization.k8s.io
110 resources:
111 - splunkoutputs
112 - alertmanagers
113 - cronjobs
114 - daemonsets
115 - deployments
116 - endpoints
117 - events
118 - jobs
119 - namespaces
120 - nodes
121 - nodes/metrics
122 - nodes/proxy
123 - pods
124 - replicasets
125 - replicationcontrollers
126 - scheduledjobs
127 - secrets
128 - services
129 - statefulsets
130 - persistentvolumeclaims
131 - configurations
132 - resourcequotas
133 - clusterroles
134 verbs:
135 - get
136 - list
137 - watch
138- nonResourceURLs:
139 - /metrics
140 verbs:
141 - get
142 apiGroups: []
143 resources: []
144---
145apiVersion: rbac.authorization.k8s.io/v1
146kind: ClusterRoleBinding
147metadata:
148 labels:
149 app: collectorforkubernetes
150 name: collectorforkubernetes
151roleRef:
152 apiGroup: rbac.authorization.k8s.io
153 kind: ClusterRole
154 name: collectorforkubernetes
155subjects:
156 - kind: ServiceAccount
157 name: collectorforkubernetes
158 namespace: collectorforkubernetes
159---
160apiVersion: v1
161kind: ConfigMap
162metadata:
163 name: collectorforkubernetes
164 namespace: collectorforkubernetes
165 labels:
166 app: collectorforkubernetes
167data:
168 001-general.conf: |
169 # The general configuration is used for all deployments
170 #
171 # Run collectord with the flag -conf and specify location of the configuration files.
172 #
173 # You can override all the values using environment variables with the format like
174 # COLLECTOR__<ANYNAME>=<section>__<key>=<value>
175 # As an example you can set dataPath in [general] section as
176 # COLLECTOR__DATAPATH=general__dataPath=C:\\some\\path\\data.db
177 # This parameter can be configured using -env-override, set it to empty string to disable this feature
178
179 [general]
180
181 # Please review license https://www.outcoldsolutions.com/docs/license-agreement/
182 # and accept license by changing the value to *true*
183 acceptLicense = false
184
185 # Location for the database
186 # Collectord stores positions of the files and internal state
187 dataPath = ./data/
188
189 # log level (accepted values are trace, debug, info, warn, error, fatal)
190 logLevel = info
191
192 # http server gives access to two endpoints
193 # /healthz
194 # /metrics/json
195 # /metrics/prometheus
196 # httpServerBinding = 0.0.0.0:11888
197 httpServerBinding =
198
199 # log requests to the http server
200 httpServerLog = false
201
202 # telemetry report endpoint, set it to empty string to disable telemetry
203 telemetryEndpoint = https://license.outcold.solutions/telemetry/
204
205 # license check endpoint
206 licenseEndpoint = https://license.outcold.solutions/license/
207
208 # license server through proxy
209 # This configuration is used only for the Outcold Solutions License Server
210 # For license server running on-premises, use configuration under [license.client]
211 licenseServerProxyUrl =
212
213 # authentication with basic authorization (user:password)
214 # This configuration is used only for the Outcold Solutions License Server
215 # For license server running on-premises, use configuration under [license.client]
216 licenseServerProxyBasicAuth =
217
218 # license key
219 license =
220
221 # Environment variable $KUBERNETES_NODENAME is used by default to setup hostname
222 # Use value below to override specific name
223 hostname =
224
225 # Default output for events, logs and metrics
226 # valid values: splunk and devnull
227 # Use devnull by default if you don't want to redirect data
228 defaultOutput = splunk
229
230 # Default buffer size for file input
231 fileInputBufferSize = 256b
232
233 # Maximum size of one line the file reader can read
234 fileInputLineMaxSize = 1mb
235
236 # Include custom fields to attach to every event, in example below every event sent to Splunk will hav
237 # indexed field my_environment=dev. Fields names should match to ^[a-z][_a-z0-9]*$
238 # Better way to configure that is to specify labels for Kubernetes Nodes.
239 # ; fields.my_environment = dev
240 # Identify the cluster if you are planning to monitor multiple clusters
241 fields.kubernetes_cluster = -
242
243 # Include EC2 Metadata (see list of possible fields https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html)
244 # Should be in format ec2Metadata.{desired_field_name} = {url path to read the value}
245 # ec2Metadata.ec2_instance_id = /latest/meta-data/instance-id
246 # ec2Metadata.ec2_instance_type = /latest/meta-data/instance-type
247
248 # subdomain for the annotations added to the pods, workloads, namespaces or containers, like splunk.collectord.io/..
249 annotationsSubdomain =
250
251 # configure global thruput per second for forwarded logs (metrics are not included)
252 # for example if you set `thruputPerSecond = 512Kb`, that will limit amount of logs forwarded
253 # from the single Collectord instance to 512Kb per second.
254 # You can configure thruput individually for the logs (including specific for container logs) below
255 thruputPerSecond =
256
257 # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
258 # older than 7 days
259 tooOldEvents =
260
261 # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
262 tooNewEvents =
263
264 # For input.files::X and application logs, when glob or match are configured, Collectord can automatically
265 # detect gzipped files and skip them (based on the extensions or magic numbers)
266 autoSkipGzipFiles = true
267
268 [license.client]
269 # point to the license located on the HTTP web server, or a hosted by the Collectord running as license server
270 url =
271 # basic authentication for the HTTP server
272 basicAuth =
273 # if SSL, ignore the certificate verification
274 insecure = false
275 # CA Path for the Server certificate
276 capath =
277 # CA Name fot the Server certificate
278 caname =
279 # license server through proxy
280 proxyUrl =
281 # authentication with basic authorization (user:password)
282 proxyBasicAuth =
283
284
285 # forward internal collectord metrics
286 [input.collectord_metrics]
287
288 # disable collectord internal metrics
289 disabled = false
290
291 # override type
292 type = kubernetes_prometheus
293
294 # how often to collect internal metrics
295 interval = 1m
296
297 # set output (splunk or devnull, default is [general]defaultOutput)
298 output =
299
300 # specify Splunk index
301 index =
302
303 # whitelist or blacklist the metrics
304 whitelist.1 = ^file_input_open$
305 whitelist.2 = ^file_input_read_bytes$
306 whitelist.3 = ^kubernetes_handlers$
307 whitelist.4 = ^pipe$
308 whitelist.5 = ^pipelines_num$
309 whitelist.6 = ^splunk_post_bytes_sum.*$
310 whitelist.7 = ^splunk_post_events_count_sum.*$
311 whitelist.8 = ^splunk_post_failed_requests$
312 whitelist.9 = ^splunk_post_message_max_lag_seconds_bucket.*$
313 whitelist.10 = ^splunk_post_requests_seconds_sum.*$
314 whitelist.11 = ^splunk_post_retries_required_sum.*$
315
316
317 # connection to kubernetes api
318 [general.kubernetes]
319
320 # Override service URL for Kubernetes (default is ${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT})
321 serviceURL =
322
323 # Environment variable $KUBERNETES_NODENAME is used by default to setup nodeName
324 # Use it only when you need to override it
325 nodeName =
326
327 # Configuration to access the API server,
328 # see https://kubernetes.io/docs/tasks/access-application-cluster/access-cluster/#accessing-the-api-from-a-pod
329 # for details
330 tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
331 certPath = /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
332
333 # Default timeout for http responses. The streaming/watch requests depend on this timeout.
334 timeout = 30m
335
336 # How long to keep the cache for the recent calls to API server (to limit number of calls when collectord discovers new pods)
337 metadataTTL = 30s
338
339 # regex to find pods
340 podsCgroupFilter = ^/([^/\s]+/)*kubepods(\.slice)?/((kubepods-)?(burstable|besteffort)(\.slice)?/)?([^/]*)pod([0-9a-f]{32}|[0-9a-f\-_]{36})(\.slice)?$
341
342 # regex to find containers in the pods
343 containersCgroupFilter = ^/([^/\s]+/)*kubepods(\.slice)?/((kubepods-)?(burstable|besteffort)(\.slice)?/)?([^/]*)pod([0-9a-f]{32}|[0-9a-f\-_]{36})(\.slice)?/(docker-|crio-|cri-\w+-)?[0-9a-f]{64}(\.scope)?(\/.+)?$
344
345 # path to the kubelet root location (use it to discover application logs for emptyDir)
346 # the expected format is `pods/{pod-id}/volumes/kubernetes.io~empty-dir/{volume-name}/_data/`
347 volumesRootDir = /rootfs/var/lib/kubelet/
348
349 # You can attach annotations as a metadata, using the format
350 # includeAnnotations.{key} = {regexp}
351 # For example if you want to include all annotations that starts with `prometheus.io` or `example.com` you can include
352 # the following format:
353 # includeAnnotations.1 = ^prometheus\.io.*
354 # includeAnnotations.2 = ^example\.com.*
355
356 # You can exclude labels from metadata, using the format
357 # excludeLabels.{key} = {regexp}
358 # For example if you want to exclude all labels that starts with `prometheus.io` or `example.com` you can include
359 # the following format:
360 # excludeLabels.1 = ^prometheus\.io.*
361 # excludeLabels.2 = ^example\.com.*
362
363 # watch for changes (annotations) in the objects
364 watch.namespaces = v1/namespace
365 watch.deployments = apps/v1/deployment
366 watch.configurations = collectord.io/v1/configuration
367
368 # Collectord can review the assigned ClusterRole and traverse metadata for the Pods only for the Owner objects
369 # that are defined in the ClusterRole, ignoring anything else, it does not have access to.
370 # This way Collectord does not generate 403 requests on API Server
371 clusterRole = collectorforkubernetes
372
373 # Alternative of telling Collectord about the ClusterRole is to manually list the objects.
374 # You can define which objects Collectord should traverse when it sees Owners.
375 ; traverseOwnership.namespaces = v1/namespace
376
377 # Implementation of the watch protocol.
378 # 0 - use the default implementation (2)
379 # 1 - use the watch implementation that is optimized for the small number of objects (just issue one watch for all objects)
380 # 2 - use the watch implementation that is optimized for the large number of objects (paginate through the list of objects and issue watch for the last resource version)
381 watchImplementation = 2
382
383 # watch for pods annotations, setup prometheus collection
384 # for these pods
385 # Addon listens on Pod Network
386 # DaemonSets listen on Host Network
387 [input.prometheus_auto]
388
389 # disable prometheus auto discovery for pods
390 disabled = false
391
392 # override type
393 type = kubernetes_prometheus
394
395 # specify Splunk index
396 index =
397
398 # how often to collect prometheus metrics
399 interval = 60s
400
401 # include metrics help with the events
402 includeHelp = true
403
404 # http client timeout
405 timeout = 30s
406
407 # set output (splunk or devnull, default is [general]defaultOutput)
408 output =
409
410 # Include an Authorization header for the prometheus scrapper
411 # When configuring scrapping with collectord using annotations use prometheus.1-AuthorizationKey=key1
412 # authorization.key1 = Bearer FOO
413
414
415 # Splunk output
416 [output.splunk]
417
418 # Splunk HTTP Event Collector url
419 url =
420 # You can specify muiltiple splunk URls with
421 #
422 # urls.0 = https://server1:8088/services/collector/event/1.0
423 # urls.1 = https://server1:8088/services/collector/event/1.0
424 # urls.2 = https://server1:8088/services/collector/event/1.0
425 #
426 # Limitations:
427 # * The urls cannot have different path.
428
429 # Specify how URL should be picked up (in case if multiple is used)
430 # urlSelection = random|round-robin|random-with-round-robin
431 # where:
432 # * random - choose random url on first selection and after each failure (connection or HTTP status code >= 500)
433 # * round-robin - choose url starting from first one and bump on each failure (connection or HTTP status code >= 500)
434 # * random-with-round-robin - choose random url on first selection and after that in round-robin on each
435 # failure (connection or HTTP status code >= 500)
436 urlSelection = random-with-round-robin
437
438 # Splunk HTTP Event Collector Token
439 token =
440
441 # Allow invalid SSL server certificate
442 insecure = false
443 # minTLSVersion = TLSv1.2
444 # maxTLSVersion = TLSv1.3
445
446 # Path to CA cerificate
447 caPath =
448
449 # CA Name to verify
450 caName =
451
452 # path for client certificate (if required)
453 clientCertPath =
454
455 # path for client key (if required)
456 clientKeyPath =
457
458 # Events are batched with the maximum size set by batchSize and staying in pipeline for not longer
459 # than set by frequency
460 frequency = 5s
461 batchSize = 768K
462 # limit by the number of events (0 value has no limit on the number of events)
463 events = 50
464
465 # Splunk through proxy
466 proxyUrl =
467
468 # authentication with basic authorization (user:password)
469 proxyBasicAuth =
470
471 # Splunk acknowledgement url (.../services/collector/ack)
472 ackUrl =
473 # You can specify muiltiple splunk URls for ackUrl
474 #
475 # ackUrls.0 = https://server1:8088/services/collector/ack
476 # ackUrls.1 = https://server1:8088/services/collector/ack
477 # ackUrls.2 = https://server1:8088/services/collector/ack
478 #
479 # Make sure that they in the same order as urls for url, to make sure that this Splunk instance will be
480 # able to acknowledge the payload.
481 #
482 # Limitations:
483 # * The urls cannot have different path.
484
485 # Enable index acknowledgment
486 ackEnabled = false
487
488 # Index acknowledgment timeout
489 ackTimeout = 3m
490
491 # Timeout specifies a time limit for requests made by collectord.
492 # The timeout includes connection time, any
493 # redirects, and reading the response body.
494 timeout = 30s
495
496 # in case when pipeline can post to multiple indexes, we want to avoid posibility of blocking
497 # all pipelines, because just some events have incorrect index
498 dedicatedClientPerIndex = true
499
500 # possible values: RedirectToDefault, Drop, Retry
501 incorrectIndexBehavior = RedirectToDefault
502
503 # gzip compression level (nocompression, default, 1...9)
504 compressionLevel = default
505
506 # number of dedicated splunk output threads (to increase throughput above 4k events per second)
507 threads = 2
508 # Default algorithm between threads is roundrobin, but you can change it to weighted
509 ; threadsAlgorithm = weighted
510
511 # if you want to exclude some preindexed fields from events
512 # excludeFields.kubernetes_pod_ip = true
513
514 # By default if there are no indexes defined on the message, Collectord sends the event without the index, and
515 # Splunk HTTP Event Collector going to use the default index for the Token. You can change that, and tell Collectord
516 # to ignore all events that don't have index defined explicitly
517 ; requireExplicitIndex = true
518
519 # You can define if you want to truncate messages that are larger than 1M in length (or define your own size, like 256K)
520 ; maximumMessageLength = 1M
521
522 # For messages generated from logs, include unique `event_id` in the event
523 ; includeEventID = false
524
525 # Dedicated queue size for the output, default is 1024, larger queue sizes will require more memory,
526 # but will allow to handle more events in case of network issues
527 queueSize = 1024
528
529 # How many digits after the decimal point to keep for timestamps (0-9)
530 # Defaults to 3 (milliseconds)
531 # Change to 6 for microseconds
532 # Change to 9 for nanoseconds
533 ; timestampPrecision = 3
534
535 002-daemonset.conf: |
536 # DaemonSet configuration is used for Nodes and Masters.
537
538 // connection to CRIO
539 [general.cri-o]
540
541 # url for CRIO API, only unix socket is supported
542 url = unix:///rootfs/var/run/crio/crio.sock
543
544 # Timeout for http responses to docker client. The streaming requests depend on this timeout.
545 timeout = 1m
546
547
548 [general.containerd]
549 # Runtime can be on /rootfs/run/containerd (depends on the Linux distribution)
550 runtimePath = /rootfs/var/run/containerd
551 namespace = k8s.io
552
553
554 # cgroup input
555 [input.system_stats]
556
557 # disable system level stats
558 disabled.host = false
559 disabled.cgroup = false
560
561 # cgroups fs location
562 pathCgroups = /rootfs/sys/fs/cgroup
563
564 # proc location
565 pathProc = /rootfs/proc
566
567 # how often to collect cgroup stats
568 statsInterval = 30s
569
570 # override type
571 type.host = kubernetes_stats_v2_host
572 type.cgroup = kubernetes_stats_v2_cgroup
573
574 # specify Splunk index
575 index.host =
576 index.cgroup =
577
578 # set output (splunk or devnull, default is [general]defaultOutput)
579 output.host =
580 output.cgroup =
581
582
583 # proc input
584 [input.proc_stats]
585
586 # disable proc level stats
587 disabled = false
588
589 # proc location
590 pathProc = /rootfs/proc
591
592 # how often to collect proc stats
593 statsInterval = 30s
594
595 # override type
596 type = kubernetes_proc_stats_v2
597
598 # specify Splunk index
599 index.host =
600 index.cgroup =
601
602 # proc filesystem includes by default system threads (there can be over 100 of them)
603 # these stats do not help with the observability
604 # excluding them can reduce the size of the index, performance of the searches and usage of the collector
605 includeSystemThreads = false
606
607 # set output (splunk or devnull, default is [general]defaultOutput)
608 output.host =
609 output.cgroup =
610
611 # Hide arguments for the processes, replacing with HIDDEN_ARGS(NUMBER)
612 hideArgs = false
613
614
615 # network stats
616 [input.net_stats]
617
618 # disable net stats
619 disabled = false
620
621 # proc path location
622 pathProc = /rootfs/proc
623
624 # how often to collect net stats
625 statsInterval = 30s
626
627 # override type
628 type = kubernetes_net_stats_v2
629
630 # specify Splunk index
631 index.host =
632 index.cgroup =
633
634 # set output (splunk or devnull, default is [general]defaultOutput)
635 output.host =
636 output.cgroup =
637
638
639 # network socket table
640 [input.net_socket_table]
641
642 # disable net stats
643 disabled = false
644
645 # proc path location
646 pathProc = /rootfs/proc
647
648 # how often to collect net stats
649 statsInterval = 30s
650
651 # override type
652 type = kubernetes_net_socket_table
653
654 # specify Splunk index
655 index.host =
656 index.cgroup =
657
658 # set output (splunk or devnull, default is [general]defaultOutput)
659 output.host =
660 output.cgroup =
661
662 # group connections by tcp_state, localAddr, remoteAddr (if localPort is not the port it is listening on)
663 # that can significally reduces the amount of events
664 group = true
665
666 # Collectord can watch for services, node, and pod IP addresses, and lookup the names
667 # for the IP addresses. Keeping this enabled can add a significant load on the API Server, with large number of pods.
668 disableLookup = false
669
670
671 # mount input (collects mount stats where kubelet runtime is stored)
672 [input.mount_stats]
673
674 # disable system level stats
675 disabled = false
676
677 # how often to collect mount stats
678 statsInterval = 30s
679
680 # override type
681 type = kubernetes_mount_stats
682
683 # specify Splunk index
684 index =
685
686 # set output (splunk or devnull, default is [general]defaultOutput)
687 output =
688
689
690 # diskstats input (collects /proc/diskstats)
691 [input.disk_stats]
692
693 # disable system level stats
694 disabled = false
695
696 # how often to collect mount stats
697 statsInterval = 30s
698
699 # override type
700 type = kubernetes_disk_stats
701
702 # specify Splunk index
703 index =
704
705 # set output (splunk or devnull, default is [general]defaultOutput)
706 output =
707
708
709 # Container Log files
710 [input.files]
711
712 # disable container logs monitoring
713 disabled = false
714
715 # root location of docker log files
716 # logs are expected in standard docker format like {containerID}/{containerID}-json.log
717 # rotated files
718 path = /rootfs/var/lib/docker/containers/
719 # root location of CRI-O files
720 # logs are expected in Kubernetes format, like {podID}/{containerName}/0.log
721 crioPath = /rootfs/var/log/pods/
722
723 # (obsolete) glob matching pattern for log files
724 # glob = */*-json.log*
725
726 # files are read using polling schema, when reach the EOF how often to check if files got updated
727 pollingInterval = 250ms
728
729 # how often to look for the new files under logs path
730 walkingInterval = 5s
731
732 # include verbose fields in events (file offset)
733 verboseFields = false
734
735 # override type
736 type = kubernetes_logs
737
738 # specify Splunk index
739 index =
740
741 # docker splits events when they are larger than 10-100k (depends on the docker version)
742 # we join them together by default and forward to Splunk as one event
743 joinPartialEvents = true
744
745 # In case if your containers report messages with terminal colors or other escape sequences
746 # you can enable strip for all the containers in one place.
747 # Better is to enable it only for required container with the label collectord.io/strip-terminal-escape-sequences=true
748 stripTerminalEscapeSequences = false
749 # Regexp used for stripping terminal colors, it does not stip all the escape sequences
750 # Read http://man7.org/linux/man-pages/man4/console_codes.4.html for more information
751 stripTerminalEscapeSequencesRegex = (\x1b\[\d{1,3}(;\d{1,3})*m)|(\x07)|(\x1b]\d+(\s\d)?;[^\x07]+\x07)|(.*\x1b\[K)
752
753 # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded)
754 samplingPercent = -1
755
756 # sampling key for hash based sampling (should be regexp with the named match pattern `key`)
757 samplingKey =
758
759 # set output (splunk or devnull, default is [general]defaultOutput)
760 output =
761
762 # configure default thruput per second for for each container log
763 # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
764 # from the single container to 128Kb per second.
765 thruputPerSecond =
766
767 # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
768 # older than 7 days
769 tooOldEvents =
770
771 # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
772 tooNewEvents =
773
774
775 # Application Logs
776 [input.app_logs]
777
778 # disable container application logs monitoring
779 disabled = false
780
781 # root location of mounts (applies to hostPath mounts only), if the hostPath differs inside container from the path on host
782 root = /rootfs/
783
784 # how often to review list of available volumes
785 syncInterval = 5s
786
787 # glob matching pattern for log files
788 glob = *.log*
789
790 # files are read using polling schema, when reach the EOF how often to check if files got updated
791 pollingInterval = 250ms
792
793 # how often to look for the new files under logs path
794 walkingInterval = 5s
795
796 # include verbose fields in events (file offset)
797 verboseFields = false
798
799 # override type
800 type = kubernetes_logs
801
802 # specify Splunk index
803 index =
804
805 # we split files using new line character, with this configuration you can specify what defines the new event
806 # after new line
807 eventPatternRegex = ^[^\s]
808 # Maximum interval of messages in pipeline
809 eventPatternMaxInterval = 100ms
810 # Maximum time to wait for the messages in pipeline
811 eventPatternMaxWait = 1s
812 # Maximum message size
813 eventPatternMaxSize = 1MB
814
815 # set output (splunk or devnull, default is [general]defaultOutput)
816 output =
817
818 # configure default thruput per second for for each container log
819 # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
820 # from the single container to 128Kb per second.
821 thruputPerSecond =
822
823 # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
824 # older than 7 days
825 tooOldEvents =
826
827 # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
828 tooNewEvents =
829
830 # Configure how long Collectord should keep the file descriptors open for files, that has not been forwarded yet
831 # When using PVC, and if pipeline is lagging behind, Collectord holding open fd for files, can cause long termination
832 # of pods, as kubelet cannot unmount the PVC volume from the system
833 maxHoldAfterClose = 1800s
834
835
836 # Host logs. Input syslog(.\d+)? files
837 [input.files::syslog]
838
839 # disable host level logs
840 disabled = false
841
842 # root location of docker files
843 path = /rootfs/var/log/
844
845 # regex matching pattern
846 match = ^(syslog|messages)(.\d+)?$
847
848 # limit search only on one level
849 recursive = false
850
851 # files are read using polling schema, when reach the EOF how often to check if files got updated
852 pollingInterval = 250ms
853
854 # how often o look for the new files under logs path
855 walkingInterval = 5s
856
857 # include verbose fields in events (file offset)
858 verboseFields = false
859
860 # override type
861 type = kubernetes_host_logs
862
863 # specify Splunk index
864 index =
865
866 # field extraction
867 extraction = ^(?P<timestamp>[A-Za-z]+\s+\d+\s\d+:\d+:\d+)\s(?P<syslog_hostname>[^\s]+)\s(?P<syslog_component>[^:\[]+)(\[(?P<syslog_pid>\d+)\])?: (.+)$
868 # extractionMessageField =
869
870 # timestamp field
871 timestampField = timestamp
872
873 # format for timestamp
874 # the layout defines the format by showing how the reference time, defined to be `Mon Jan 2 15:04:05 -0700 MST 2006`
875 timestampFormat = Jan 2 15:04:05
876
877 # Adjust date, if month/day aren't set in format
878 timestampSetMonth = false
879 timestampSetDay = false
880
881 # timestamp location (if not defined by format)
882 timestampLocation = Local
883
884 # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded)
885 samplingPercent = -1
886
887 # sampling key for hash based sampling (should be regexp with the named match pattern `key`)
888 samplingKey =
889
890 # set output (splunk or devnull, default is [general]defaultOutput)
891 output =
892
893 # configure default thruput per second for this files group
894 # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
895 # from the files in this group to 128Kb per second.
896 thruputPerSecond =
897
898 # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
899 # older than 7 days
900 tooOldEvents =
901
902 # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
903 tooNewEvents =
904
905 # by default every new event should start from not space symbol
906 eventPattern = ^[^\s]
907
908 # Blacklisting and whitelisting the logs
909 # whitelist.0 = ^regexp$
910 # blacklist.0 = ^regexp$
911
912
913 # Host logs. Input all *.log(.\d+)? files
914 [input.files::logs]
915
916 # disable host level logs
917 disabled = false
918
919 # root location of log files
920 path = /rootfs/var/log/
921
922 # regex matching pattern
923 match = ^(([\w\-.]+\.log(.[\d\-]+)?)|(docker))$
924
925 # files are read using polling schema, when reach the EOF how often to check if files got updated
926 pollingInterval = 250ms
927
928 # how often o look for the new files under logs path
929 walkingInterval = 5s
930
931 # include verbose fields in events (file offset)
932 verboseFields = false
933
934 # override type
935 type = kubernetes_host_logs
936
937 # specify Splunk index
938 index =
939
940 # field extraction
941 extraction =
942 extractionMessageField =
943
944 # timestamp field
945 timestampField =
946
947 # format for timestamp
948 # the layout defines the format by showing how the reference time, defined to be `Mon Jan 2 15:04:05 -0700 MST 2006`
949 timestampFormat =
950
951 # timestamp location (if not defined by format)
952 timestampLocation =
953
954 # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded)
955 samplingPercent = -1
956
957 # sampling key for hash based sampling (should be regexp with the named match pattern `key`)
958 samplingKey =
959
960 # set output (splunk or devnull, default is [general]defaultOutput)
961 output =
962
963 # configure default thruput per second for this files group
964 # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
965 # from the files in this group to 128Kb per second.
966 thruputPerSecond =
967
968 # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
969 # older than 7 days
970 tooOldEvents =
971
972 # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
973 tooNewEvents =
974
975 # by default every new event should start from not space symbol
976 eventPattern = ^[^\s]
977
978 # Blacklisting and whitelisting the logs
979 # whitelist.0 = ^regexp$
980 # blacklist.0 = ^regexp$
981
982
983 [input.journald]
984
985 # disable host level logs
986 disabled = false
987
988 # root location of log files
989 path.persistent = /rootfs/var/log/journal/
990 path.volatile = /rootfs/run/log/journal/
991
992 # when reach end of journald, how often to pull
993 pollingInterval = 250ms
994
995 # if you don't want to forward journald from the beginning,
996 # set the oldest event in relative value, like -14h or -30m or -30s (h/m/s supported)
997 startFromRel =
998
999 # override type
1000 type = kubernetes_host_logs
1001
1002 # specify Splunk index
1003 index =
1004
1005 # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded)
1006 samplingPercent = -1
1007
1008 # sampling key (should be regexp with the named match pattern `key`)
1009 samplingKey =
1010
1011 # how often to reopen the journald to free old files
1012 reopenInterval = 1h
1013
1014 # set output (splunk or devnull, default is [general]defaultOutput)
1015 output =
1016
1017 # configure default thruput per second for journald
1018 # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded
1019 # from the journald to 128Kb per second.
1020 thruputPerSecond =
1021
1022 # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events
1023 # older than 7 days
1024 tooOldEvents =
1025
1026 # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future
1027 tooNewEvents =
1028
1029 # by default every new event should start from not space symbol
1030 eventPattern = ^[^\s]
1031
1032 # Blacklisting and whitelisting the logs
1033 # whitelist.0 = ^regexp$
1034 # blacklist.0 = ^regexp$
1035
1036 # Move Journald logs reader to a separate process, to prevent process from crashing in case of corrupted log files
1037 spawnExternalProcess = false
1038
1039
1040 # Pipe to join events (container logs only)
1041 [pipe.join]
1042
1043 # disable joining event
1044 disabled = false
1045
1046 # Maximum interval of messages in pipeline
1047 maxInterval = 100ms
1048
1049 # Maximum time to wait for the messages in pipeline
1050 maxWait = 1s
1051
1052 # Maximum message size
1053 maxSize = 1MB
1054
1055 # Default pattern to indicate new message (should start not from space)
1056 patternRegex = ^[^\s]
1057
1058
1059 # (depricated, use annotations for settings up join rules)
1060 # Define special event join patterns for matched events
1061 # Section consist of [pipe.join::<name>]
1062 # [pipe.join::my_app]
1063 ## Set match pattern for the fields
1064 #; matchRegex.docker_container_image = my_app
1065 #; matchRegex.stream = stdout
1066 ## All events start from '[<digits>'
1067 #; patternRegex = ^\[\d+
1068
1069
1070 # You can configure global replace rules for the events, which can help to remove sensitive data
1071 # from logs before they are sent to Splunk. Those rules will be applied to all pipelines for container logs, host logs,
1072 # application logs and events.
1073 # In the following example we replace password=TEST with password=********
1074 ; [pipe.replace::name]
1075 ; patternRegex = (password=)([^\s]+)
1076 ; replace = $1********
1077
1078 # You can configure global hash rules for the events, which can help to hide sensitive data
1079 # from logs before they are sent to outputs. Those rules will be applied to all pipelines for container logs, host logs,
1080 # application logs and events.
1081 # In the following example we hash IP addresses with fnv-1a-64
1082 ; [pipe.hash::name]
1083 ; match = (\d{1,3}\.){3}\d{1,3}'
1084 ; function = fnv-1a-64
1085
1086
1087 [input.prometheus::kubelet]
1088
1089 # disable prometheus kubelet metrics
1090 disabled = false
1091
1092 # override type
1093 type = kubernetes_prometheus
1094
1095 # specify Splunk index
1096 index =
1097
1098 # override host (environment variables are supported, by default Kubernetes node name is used)
1099 host = ${KUBERNETES_NODENAME}
1100
1101 # override source
1102 source = kubelet
1103
1104 # how often to collect prometheus metrics
1105 interval = 60s
1106
1107 # request timeout
1108 timeout = 60s
1109
1110 # Prometheus endpoint, multiple values can be specified, collectord tries them in order till finding the first
1111 # working endpoint.
1112 # At first trying to get it through proxy
1113 endpoint.1proxy = https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT}/api/v1/nodes/${KUBERNETES_NODENAME}/proxy/metrics
1114 # In case if cannot get it through proxy, trying localhost
1115 endpoint.2http = http://127.0.0.1:10255/metrics
1116
1117 # token for "Authorization: Bearer $(cat tokenPath)"
1118 tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
1119
1120 # server certificate for certificate validation
1121 certPath = /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
1122
1123 # client certificate for authentication
1124 clientCertPath =
1125
1126 # Allow invalid SSL server certificate
1127 insecure = true
1128
1129 # include metrics help with the events
1130 includeHelp = false
1131
1132 # set output (splunk or devnull, default is [general]defaultOutput)
1133 output =
1134
1135 whitelist.1 = ^kubernetes_build_info$
1136 whitelist.2 = ^kubelet_runtime_operations_duration_seconds_sum$
1137 whitelist.3 = ^kubelet_docker_operations_duration_seconds_sum$
1138 whitelist.4 = ^kubelet_network_plugin_operations_duration_seconds_sum$
1139 whitelist.5 = ^kubelet_cgroup_manager_duration_seconds_sum$
1140 whitelist.6 = ^storage_operation_duration_seconds_sum$
1141 whitelist.7 = ^kubelet_docker_operations_errors_total$
1142 whitelist.8 = ^kubelet_runtime_operations_errors_total$
1143 whitelist.9 = ^rest_client_requests_total$
1144 whitelist.10 = ^process_cpu_seconds_total$
1145 whitelist.11 = ^process_resident_memory_bytes$
1146 whitelist.12 = ^process_virtual_memory_bytes$
1147 whitelist.13 = ^rest_client_request_duration_seconds_sum$
1148 whitelist.14 = ^kubelet_volume_stats_.+$
1149 whitelist.15 = ^rest_client_requests_total$
1150
1151
1152 ; # Collectord reports if entropy is low
1153 ; [diagnostics::node-entropy]
1154 ; settings.path = /rootfs/proc/sys/kernel/random/entropy_avail
1155 ; settings.interval = 1h
1156 ; settings.threshold = 800
1157
1158 # Collectord can report if node reboot is required
1159 [diagnostics::node-reboot-required]
1160 settings.path = /rootfs/var/run/reboot-required*
1161 settings.interval = 1h
1162
1163 # See https://www.kernel.org/doc/Documentation/admin-guide/hw-vuln/index.rst
1164 # And https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-system-cpu
1165 [diagnostics::cpu-vulnerabilities]
1166 settings.path = /rootfs/sys/devices/system/cpu/vulnerabilities/*
1167 settings.interval = 1h
1168
1169
1170 003-daemonset-master.conf: |
1171 [input.prometheus::kubernetes-api]
1172
1173 # disable prometheus kubernetes-api metrics
1174 disabled = false
1175
1176 # override type
1177 type = kubernetes_prometheus
1178
1179 # specify Splunk index
1180 index =
1181
1182 # override host (environment variables are supported, by default Kubernetes node name is used)
1183 host = ${KUBERNETES_NODENAME}
1184
1185 # override source
1186 source = kubernetes-api
1187
1188 # how often to collect prometheus metrics
1189 interval = 60s
1190
1191 # request timeout
1192 timeout = 60s
1193
1194 # prometheus endpoint
1195 # at first trying to get it from localhost (avoiding load balancer, if multiple api servers)
1196 endpoint.1localhost = https://127.0.0.1:6443/metrics
1197 # as fallback using proxy
1198 endpoint.2kubeapi = https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT}/metrics
1199
1200 # token for "Authorization: Bearer $(cat tokenPath)"
1201 tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
1202
1203 # server certificate for certificate validation
1204 certPath = /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
1205
1206 # client certificate for authentication
1207 clientCertPath =
1208
1209 # Allow invalid SSL server certificate
1210 insecure = true
1211
1212 # include metrics help with the events
1213 includeHelp = false
1214
1215 # set output (splunk or devnull, default is [general]defaultOutput)
1216 output =
1217
1218 whitelist.1 = ^kubernetes_build_info$
1219 whitelist.2 = ^authenticated_user_requests$
1220 whitelist.3 = ^apiserver_request_total$
1221 whitelist.4 = ^process_cpu_seconds_total$
1222 whitelist.5 = ^process_resident_memory_bytes$
1223 whitelist.6 = ^process_virtual_memory_bytes$
1224 whitelist.7 = ^rest_client_request_duration_seconds_sum$
1225 whitelist.8 = ^rest_client_requests_total$
1226
1227
1228 # This configuration works if scheduled is bind to the localhost:10251
1229 [input.prometheus::scheduler]
1230
1231 # disable prometheus scheduler metrics
1232 disabled = false
1233
1234 # override type
1235 type = kubernetes_prometheus
1236
1237 # specify Splunk index
1238 index =
1239
1240 # override host
1241 host = ${KUBERNETES_NODENAME}
1242
1243 # override source
1244 source = scheduler
1245
1246 # how often to collect prometheus metrics
1247 interval = 60s
1248
1249 # request timeout
1250 timeout = 60s
1251
1252 # prometheus endpoint
1253 endpoint.https = https://:10259/metrics
1254 endpoint.http = http://127.0.0.1:10251/metrics
1255
1256 # token for "Authorization: Bearer $(cat tokenPath)"
1257 tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
1258
1259 # server certificate for certificate validation
1260 certPath =
1261
1262 # client certificate for authentication
1263 clientCertPath =
1264
1265 # Allow invalid SSL server certificate
1266 insecure = true
1267
1268 # include metrics help with the events
1269 includeHelp = false
1270
1271 # set output (splunk or devnull, default is [general]defaultOutput)
1272 output =
1273
1274 whitelist.1 = ^kubernetes_build_info$
1275 whitelist.2 = ^scheduler_e2e_scheduling_duration_seconds_sum$
1276 whitelist.3 = ^scheduler_binding_duration_seconds_sum$
1277 whitelist.4 = ^scheduler_scheduling_algorithm_duration_seconds_sum$
1278 whitelist.5 = ^rest_client_request_duration_seconds_sum$
1279 whitelist.6 = ^rest_client_requests_total$
1280 whitelist.7 = ^process_cpu_seconds_total$
1281 whitelist.8 = ^process_resident_memory_bytes$
1282 whitelist.9 = ^process_virtual_memory_bytes$
1283
1284
1285 # This configuration works if controller-manager is bind to the localhost:10252
1286 [input.prometheus::controller-manager]
1287
1288 # disable prometheus controller-manager metrics
1289 disabled = false
1290
1291 # override type
1292 type = kubernetes_prometheus
1293
1294 # specify Splunk index
1295 index =
1296
1297 # override host
1298 host = ${KUBERNETES_NODENAME}
1299
1300 # override source
1301 source = controller-manager
1302
1303 # how often to collect prometheus metrics
1304 interval = 60s
1305
1306 # request timeout
1307 timeout = 60s
1308
1309 # prometheus endpoint
1310 endpoint.https = https://:10257/metrics
1311 endpoint.http = http://127.0.0.1:10252/metrics
1312
1313 # token for "Authorization: Bearer $(cat tokenPath)"
1314 tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token
1315
1316 # server certificate for certificate validation
1317 certPath =
1318
1319 # client certificate for authentication
1320 clientCertPath =
1321
1322 # Allow invalid SSL server certificate
1323 insecure = true
1324
1325 # include metrics help with the events
1326 includeHelp = false
1327
1328 # set output (splunk or devnull, default is [general]defaultOutput)
1329 output =
1330
1331 whitelist.1 = ^kubernetes_build_info$
1332 whitelist.2 = ^node_collector_zone_size$
1333 whitelist.3 = ^node_collector_zone_health$
1334 whitelist.4 = ^node_collector_unhealthy_nodes_in_zone$
1335 whitelist.5 = ^rest_client_request_duration_seconds_sum$
1336 whitelist.6 = ^rest_client_requests_total$
1337 whitelist.7 = ^process_cpu_seconds_total$
1338 whitelist.8 = ^process_resident_memory_bytes$
1339 whitelist.9 = ^process_virtual_memory_bytes$
1340
1341
1342 [input.prometheus::etcd]
1343
1344 # disable prometheus etcd metrics
1345 disabled = false
1346
1347 # override type
1348 type = kubernetes_prometheus
1349
1350 # specify Splunk index
1351 index =
1352
1353 # override host
1354 host = ${KUBERNETES_NODENAME}
1355
1356 # override source
1357 source = etcd
1358
1359 # how often to collect prometheus metricd
1360 interval = 60s
1361
1362 # request timeout
1363 timeout = 60s
1364
1365 # prometheus endpoint
1366 endpoint.http = http://:2379/metrics
1367 endpoint.https = https://:2379/metrics
1368
1369 # token for "Authorization: Bearer $(cat tokenPath)"
1370 tokenPath =
1371
1372 # server certificate for certificate validation
1373 certPath = /rootfs/etc/kubernetes/pki/etcd/ca.crt
1374
1375 # client certificate for authentication
1376 clientCertPath = /rootfs/etc/kubernetes/pki/apiserver-etcd-client.crt
1377 clientKeyPath = /rootfs/etc/kubernetes/pki/apiserver-etcd-client.key
1378
1379 # Allow invalid SSL server certificate
1380 insecure = true
1381
1382 # include metrics help with the events
1383 includeHelp = false
1384
1385 # set output (splunk or devnull, default is [general]defaultOutput)
1386 output =
1387
1388 whitelist.1 = ^etcd_server_leader_changes_seen_total$
1389 whitelist.2 = ^etcd_server_has_leader$
1390 whitelist.3 = ^etcd_server_proposals_committed_total$
1391 whitelist.4 = ^etcd_server_proposals_applied_total$
1392 whitelist.5 = ^etcd_server_proposals_committed_total$
1393 whitelist.6 = ^etcd_server_proposals_pending$
1394 whitelist.7 = ^etcd_server_proposals_failed_total$
1395 whitelist.8 = ^etcd_disk_wal_fsync_duration_seconds_sum$
1396 whitelist.9 = ^etcd_disk_wal_fsync_duration_seconds_count$
1397 whitelist.10 = ^etcd_disk_backend_commit_duration_seconds_sum$
1398 whitelist.11 = ^etcd_disk_backend_commit_duration_seconds_count$
1399 whitelist.12 = ^etcd_network_client_grpc_.*$
1400 whitelist.13 = ^grpc_server_handled_total$
1401 whitelist.14 = ^etcd_network_peer_round_trip_time_seconds_bucket$
1402 whitelist.15 = ^process_cpu_seconds_total$
1403 whitelist.16 = ^process_resident_memory_bytes$
1404 whitelist.17 = ^process_virtual_memory_bytes$
1405 whitelist.18 = ^process_open_fds$
1406 whitelist.19 = ^process_max_fds$
1407 whitelist.20 = ^etcd_disk_backend_commit_duration_seconds_bucket$
1408 whitelist.21 = ^etcd_disk_wal_fsync_duration_seconds_bucket$
1409
1410 004-addon.conf: |
1411 [general]
1412
1413 # addons can be run in parallel with agents
1414 addon = true
1415
1416 [input.kubernetes_events]
1417
1418 # disable events
1419 disabled = false
1420
1421 # override type
1422 type = kubernetes_events
1423
1424 # specify Splunk index
1425 index =
1426
1427 # set output (splunk or devnull, default is [general]defaultOutput)
1428 output =
1429
1430 # exclude managed fields from the metadata
1431 excludeManagedFields = true
1432
1433
1434 [input.kubernetes_watch::pods]
1435
1436 # disable events
1437 disabled = false
1438
1439 # Set the timeout for how often watch request should refresh the whole list
1440 refresh = 10m
1441
1442 apiVersion = v1
1443 kind = Pod
1444 namespace =
1445
1446 # override type
1447 type = kubernetes_objects
1448
1449 # specify Splunk index
1450 index =
1451
1452 # set output (splunk or devnull, default is [general]defaultOutput)
1453 output =
1454
1455 # exclude managed fields from the metadata
1456 excludeManagedFields = true
1457
1458 # you can remove or hash some values in the events (after modifyValues you can define path in the JSON object,
1459 # and the value can be hash:{hashFunction}, or remove to remove the object )
1460 ; modifyValues.object.data.* = hash:sha256
1461 ; modifyValues.object.metadata.annotations.* = remove
1462
1463 # You can exclude events by namespace with blacklist or whitelist only required namespaces
1464 # blacklist.kubernetes_namespace = ^namespace0$
1465 # whitelist.kubernetes_namespace = ^((namespace1)|(namespace2))$
1466
1467 [input.kubernetes_watch::resourcequota]
1468 # disable events
1469 disabled = false
1470
1471 # Set the timeout for how often watch request should refresh the whole list
1472 refresh = 10m
1473
1474 apiVersion = v1
1475 kind = ResourceQuota
1476 namespace =
1477
1478 # override type
1479 type = kubernetes_objects
1480
1481 # specify Splunk index
1482 index =
1483
1484 # set output (splunk or devnull, default is [general]defaultOutput)
1485 output =
1486
1487 # exclude managed fields from the metadata
1488 excludeManagedFields = true
1489
1490 [input.kubernetes_watch::nodes]
1491 # disable events
1492 disabled = false
1493
1494 # Set the timeout for how often watch request should refresh the whole list
1495 refresh = 10m
1496
1497 apiVersion = v1
1498 kind = Node
1499 namespace =
1500
1501 # override type
1502 type = kubernetes_objects
1503
1504 # specify Splunk index
1505 index =
1506
1507 # set output (splunk or devnull, default is [general]defaultOutput)
1508 output =
1509
1510 # exclude managed fields from the metadata
1511 excludeManagedFields = true
1512
1513---
1514apiVersion: apps/v1
1515kind: DaemonSet
1516metadata:
1517 name: collectorforkubernetes
1518 namespace: collectorforkubernetes
1519 labels:
1520 app: collectorforkubernetes
1521spec:
1522 # Default updateStrategy is OnDelete. For collectord RollingUpdate is suitable
1523 # When you update configuration
1524 updateStrategy:
1525 type: RollingUpdate
1526 selector:
1527 matchLabels:
1528 daemon: collectorforkubernetes
1529 template:
1530 metadata:
1531 name: collectorforkubernetes
1532 labels:
1533 daemon: collectorforkubernetes
1534 spec:
1535 priorityClassName: collectorforkubernetes-critical
1536 dnsPolicy: ClusterFirstWithHostNet
1537 hostNetwork: true
1538 serviceAccountName: collectorforkubernetes
1539 # We run this DaemonSet only for Non-Masters
1540 affinity:
1541 nodeAffinity:
1542 requiredDuringSchedulingIgnoredDuringExecution:
1543 nodeSelectorTerms:
1544 - matchExpressions:
1545 - key: node-role.kubernetes.io/control-plane
1546 operator: DoesNotExist
1547 tolerations:
1548 - operator: "Exists"
1549 effect: "NoSchedule"
1550 - operator: "Exists"
1551 effect: "NoExecute"
1552 containers:
1553 - name: collectorforkubernetes
1554 # Collectord version
1555 image: docker.io/outcoldsolutions/collectorforkubernetes:25.10.3
1556 imagePullPolicy: Always
1557 securityContext:
1558 runAsUser: 0
1559 privileged: true
1560 # Define your resources if you need. Defaults should be fine for most.
1561 # You can lower or increase based on your hosts.
1562 resources:
1563 limits:
1564 cpu: 2000m
1565 memory: 1024Mi
1566 requests:
1567 cpu: 500m
1568 memory: 256Mi
1569 env:
1570 - name: KUBERNETES_NODENAME
1571 valueFrom:
1572 fieldRef:
1573 fieldPath: spec.nodeName
1574 - name: POD_NAME
1575 valueFrom:
1576 fieldRef:
1577 fieldPath: metadata.name
1578 volumeMounts:
1579 # We store state in /data folder (file positions)
1580 - name: collectorforkubernetes-state
1581 mountPath: /data
1582 # Configuration file deployed with ConfigMap
1583 - name: collectorforkubernetes-config
1584 mountPath: /config/
1585 readOnly: true
1586 # Root filesystem to have access to logs and metrics
1587 - name: rootfs
1588 mountPath: /rootfs/
1589 readOnly: false
1590 mountPropagation: HostToContainer
1591 # correct timezone
1592 - name: localtime
1593 mountPath: /etc/localtime
1594 readOnly: true
1595 volumes:
1596 # We store state directly on host, change this location, if
1597 # your persistent volume is somewhere else
1598 - name: collectorforkubernetes-state
1599 hostPath:
1600 path: /var/lib/collectorforkubernetes/data/
1601 type: DirectoryOrCreate
1602 # Location of docker root (for container logs and metadata)
1603 - name: rootfs
1604 hostPath:
1605 path: /
1606 # correct timezone
1607 - name: localtime
1608 hostPath:
1609 path: /etc/localtime
1610 # configuration from ConfigMap
1611 - name: collectorforkubernetes-config
1612 configMap:
1613 name: collectorforkubernetes
1614 items:
1615 - key: 001-general.conf
1616 path: 001-general.conf
1617 - key: 002-daemonset.conf
1618 path: 002-daemonset.conf
1619---
1620apiVersion: apps/v1
1621kind: DaemonSet
1622metadata:
1623 name: collectorforkubernetes-master
1624 namespace: collectorforkubernetes
1625 labels:
1626 app: collectorforkubernetes
1627spec:
1628 updateStrategy:
1629 type: RollingUpdate
1630 selector:
1631 matchLabels:
1632 daemon: collectorforkubernetes
1633 template:
1634 metadata:
1635 name: collectorforkubernetes-master
1636 labels:
1637 daemon: collectorforkubernetes
1638 spec:
1639 priorityClassName: collectorforkubernetes-critical
1640 dnsPolicy: ClusterFirstWithHostNet
1641 hostNetwork: true
1642 serviceAccountName: collectorforkubernetes
1643 affinity:
1644 nodeAffinity:
1645 requiredDuringSchedulingIgnoredDuringExecution:
1646 nodeSelectorTerms:
1647 - matchExpressions:
1648 - key: node-role.kubernetes.io/control-plane
1649 operator: Exists
1650 tolerations:
1651 - operator: "Exists"
1652 effect: "NoSchedule"
1653 - operator: "Exists"
1654 effect: "NoExecute"
1655 containers:
1656 - name: collectorforkubernetes
1657 image: docker.io/outcoldsolutions/collectorforkubernetes:25.10.3
1658 imagePullPolicy: Always
1659 securityContext:
1660 runAsUser: 0
1661 privileged: true
1662 resources:
1663 limits:
1664 cpu: 2000m
1665 memory: 1024Mi
1666 requests:
1667 cpu: 500m
1668 memory: 256Mi
1669 env:
1670 - name: KUBERNETES_NODENAME
1671 valueFrom:
1672 fieldRef:
1673 fieldPath: spec.nodeName
1674 - name: POD_NAME
1675 valueFrom:
1676 fieldRef:
1677 fieldPath: metadata.name
1678 volumeMounts:
1679 - name: collectorforkubernetes-state
1680 mountPath: /data
1681 - name: collectorforkubernetes-config
1682 mountPath: /config/
1683 readOnly: true
1684 - name: rootfs
1685 mountPath: /rootfs/
1686 readOnly: false
1687 mountPropagation: HostToContainer
1688 - name: localtime
1689 mountPath: /etc/localtime
1690 readOnly: true
1691 volumes:
1692 - name: collectorforkubernetes-state
1693 hostPath:
1694 path: /var/lib/collectorforkubernetes/data/
1695 type: DirectoryOrCreate
1696 - name: rootfs
1697 hostPath:
1698 path: /
1699 - name: localtime
1700 hostPath:
1701 path: /etc/localtime
1702 - name: collectorforkubernetes-config
1703 configMap:
1704 name: collectorforkubernetes
1705 items:
1706 - key: 001-general.conf
1707 path: 001-general.conf
1708 - key: 002-daemonset.conf
1709 path: 002-daemonset.conf
1710 - key: 003-daemonset-master.conf
1711 path: 003-daemonset-master.conf
1712---
1713apiVersion: apps/v1
1714kind: Deployment
1715metadata:
1716 name: collectorforkubernetes-addon
1717 namespace: collectorforkubernetes
1718 labels:
1719 app: collectorforkubernetes
1720spec:
1721 replicas: 1
1722 selector:
1723 matchLabels:
1724 daemon: collectorforkubernetes
1725 template:
1726 metadata:
1727 name: collectorforkubernetes-addon
1728 labels:
1729 daemon: collectorforkubernetes
1730 spec:
1731 priorityClassName: collectorforkubernetes-critical
1732 serviceAccountName: collectorforkubernetes
1733 containers:
1734 - name: collectorforkubernetes
1735 image: docker.io/outcoldsolutions/collectorforkubernetes:25.10.3
1736 imagePullPolicy: Always
1737 securityContext:
1738 runAsUser: 0
1739 privileged: true
1740 resources:
1741 limits:
1742 cpu: 1000m
1743 memory: 512Mi
1744 requests:
1745 cpu: 200m
1746 memory: 64Mi
1747 env:
1748 - name: KUBERNETES_NODENAME
1749 valueFrom:
1750 fieldRef:
1751 fieldPath: spec.nodeName
1752 - name: POD_NAME
1753 valueFrom:
1754 fieldRef:
1755 fieldPath: metadata.name
1756 volumeMounts:
1757 - name: collectorforkubernetes-state
1758 mountPath: /data
1759 - name: collectorforkubernetes-config
1760 mountPath: /config/
1761 readOnly: true
1762 volumes:
1763 - name: collectorforkubernetes-state
1764 hostPath:
1765 path: /var/lib/collectorforkubernetes/data/
1766 type: Directory
1767 - name: collectorforkubernetes-config
1768 configMap:
1769 name: collectorforkubernetes
1770 items:
1771 - key: 001-general.conf
1772 path: 001-general.conf
1773 - key: 004-addon.conf
1774 path: 004-addon.conf