apiVersion: v1 kind: Project metadata: labels: app: collectorforopenshift name: collectorforopenshift annotations: openshift.io/node-selector: '' openshift.io/description: 'Monitoring OpenShift in Splunk, built by Outcold Solutions' openshift.io/display-name: 'Collector for OpenShift' --- apiVersion: apiextensions.k8s.io/v1beta1 kind: CustomResourceDefinition metadata: name: configurations.collectord.io spec: group: collectord.io versions: - name: v1 served: true storage: true scope: Cluster names: plural: configurations singular: configuration kind: Configuration --- apiVersion: v1 kind: ServiceAccount metadata: labels: app: collectorforopenshift name: collectorforopenshift namespace: collectorforopenshift --- apiVersion: v1 kind: ClusterRole metadata: labels: app: collectorforopenshift name: collectorforopenshift rules: - apiGroups: - "" - apps - batch - extensions - collectord.io - apps.openshift.io - build.openshift.io - authorization.openshift.io - template.openshift.io - quota.openshift.io resources: - alertmanagers - cronjobs - daemonsets - deployments - endpoints - events - jobs - namespaces - nodes - nodes/metrics - nodes/proxy - pods - replicasets - replicationcontrollers - scheduledjobs - services - statefulsets - persistentvolumeclaims - configurations - resourcequotas - deploymentconfigs - clusterroles - clusterresourcequotas verbs: - get - list - watch - nonResourceURLs: - /metrics verbs: - get apiGroups: [] resources: [] --- apiVersion: v1 kind: ClusterRoleBinding metadata: labels: app: collectorforopenshift name: collectorforopenshift roleRef: kind: ClusterRole name: collectorforopenshift apiGroup: rbac.authorization.k8s.io subjects: - kind: ServiceAccount name: collectorforopenshift namespace: collectorforopenshift --- apiVersion: v1 kind: ConfigMap metadata: name: collectorforopenshift namespace: collectorforopenshift labels: app: collectorforopenshift data: 001-general.conf: | # The general configuration is used for all deployments # # Run collector with the flag `-conf` and specify location of the configuration files. # # You can override all the values using environment variables with the format like # COLLECTOR__=
__= # As an example you can set `dataPath` in the `[general]` section as # COLLECTOR__DATAPATH=general__dataPath=C:\\some\\path\\data.db # This parameter can be configured using -env-override, set it to empty string to disable this feature [general] # Please review license https://www.outcoldsolutions.com/docs/license-agreement/ # and accept license by changing the value to *true* acceptLicense = false # Location for the database # Collector stores positions of the files and internal state dataPath = ./data/ # log level (accepted values are trace, debug, info, warn, error, fatal) logLevel = info # http server gives access to two endpoints # /healthz # /metrics/json # /metrics/prometheus # httpServerBinding = 0.0.0.0:11888 httpServerBinding = # telemetry report endpoint, set it to empty string to disable telemetry telemetryEndpoint = https://license.outcold.solutions/telemetry/ # license check endpoint licenseEndpoint = https://license.outcold.solutions/license/ # license server through proxy licenseServerProxyUrl = # authentication with basic authorization (user:password) licenseServerProxyBasicAuth = # license key license = # Environment variable $KUBERNETES_NODENAME is used by default to setup hostname # Use value below to override specific name hostname = # Default output for events, logs and metrics # valid values: splunk and devnull # Use devnull by default if you don't want to redirect data defaultOutput = splunk # Default buffer size for file input fileInputBufferSize = 256b # Maximum size of one line the file reader can read fileInputLineMaxSize = 1mb # Include custom fields to attach to every event, in example below every event sent to Splunk will hav # indexed field my_environment=dev. Fields names should match to ^[a-z][_a-z0-9]*$ # Better way to configure that is to specify labels for OpenShift Nodes. # ; fields.my_environment = dev # Identify the cluster if you are planning to monitor multiple clusters fields.openshift_cluster = - # Include EC2 Metadata (see list of possible fields https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html) # Should be in format ec2Metadata.{desired_field_name} = {url path to read the value} # ec2Metadata.ec2_instance_id = /latest/meta-data/instance-id # ec2Metadata.ec2_instance_type = /latest/meta-data/instance-type # subdomain for the annotations added to the pods, workloads, namespaces or containers, like splunk.collectord.io/.. annotationsSubdomain = # configure global thruput per second for forwarded logs (metrics are not included) # for example if you set `thruputPerSecond = 512Kb`, that will limit amount of logs forwarded # from the single Collectord instance to 512Kb per second. # You can configure thruput individually for the logs (including specific for container logs) below thruputPerSecond = # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events # older than 7 days tooOldEvents = # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future tooNewEvents = [license.client] # point to the license located on the HTTP web server, or a hosted by the Collectord running as license server url = # basic authentication for the HTTP server basicAuth = # if SSL, ignore the certificate verification insecure = false # CA Path for the Server certificate capath = # CA Name fot the Server certificate caname = # forward internal collectord metrics [input.collectord_metrics] # disable collectord internal metrics disabled = false # override type type = openshift_prometheus # how often to collect internal metrics interval = 1m # set output (splunk or devnull, default is [general]defaultOutput) output = # specify Splunk index index = # whitelist or blacklist the metrics whitelist.1 = ^file_input_open$ whitelist.2 = ^file_input_read_bytes$ whitelist.3 = ^openshift_handlers$ whitelist.4 = ^pipe$ whitelist.5 = ^pipelines_num$ whitelist.6 = ^splunk_post_bytes_sum.*$ whitelist.7 = ^splunk_post_events_count_sum.*$ whitelist.8 = ^splunk_post_failed_requests$ whitelist.9 = ^splunk_post_message_max_lag_seconds_bucket.*$ whitelist.10 = ^splunk_post_requests_seconds_sum.*$ whitelist.11 = ^splunk_post_retries_required_sum.*$ # connection to kubernetes api [general.kubernetes] # Override service URL for Kubernetes (default is ${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT}) serviceURL = # Environment variable $KUBERNETES_NODENAME is used by default to setup nodeName # Use it only when you need to override it nodeName = # Configuration to access the API server, # see https://kubernetes.io/docs/tasks/access-application-cluster/access-cluster/#accessing-the-api-from-a-pod # for details tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token certPath = /var/run/secrets/kubernetes.io/serviceaccount/ca.crt # Default timeout for http responses. The streaming/watch requests depend on this timeout. timeout = 30m # How long to keep the cache for the recent calls to API server (to limit number of calls when collector discovers new pods) metadataTTL = 30s # regex to find pods podsCgroupFilter = ^/([^/\s]+/)*kubepods(\.slice)?/((kubepods-)?(burstable|besteffort)(\.slice)?/)?([^/]*)pod([0-9a-f]{32}|[0-9a-f\-_]{36})(\.slice)?$ # regex to find containers in the pods containersCgroupFilter = ^/([^/\s]+/)*kubepods(\.slice)?/((kubepods-)?(burstable|besteffort)(\.slice)?/)?([^/]*)pod([0-9a-f]{32}|[0-9a-f\-_]{36})(\.slice)?/(docker-|crio-|cri-\w+-)?[0-9a-f]{64}(\.scope)?(\/.+)?$ # path to the kubelet root location (use it to discover application logs for emptyDir) # the expected format is `pods/{pod-id}/volumes/kubernetes.io~empty-dir/{volume-name}/_data/` volumesRootDir = /rootfs/var/lib/origin/openshift.local.volumes/ # You can attach annotations as a metadata, using the format # includeAnnotations.{key} = {regexp} # For example if you want to include all annotations that starts with `prometheus.io` or `example.com` you can include # the following format: # includeAnnotations.1 = ^prometheus\.io.* # includeAnnotations.2 = ^example\.com.* # watch for changes (annotations) in the objects watch.namespaces = v1/namespace watch.deploymentconfigs = apps.openshift.io/v1/deploymentconfig watch.configurations = collectord.io/v1/configuration # Collectord can review the assigned ClusterRole and traverse metadata for the Pods only for the Owner objects # that are defined in the ClusterRole, ignoring anything else, it does not have access to. # This way Collectord does not generate 403 requests on API Server clusterRole = collectorforopenshift # Alternative of telling Collectord about the ClusterRole is to manually list the objects. # You can define which objects Collectord should traverse when it sees Owners. ; traverseOwnership.namespaces = v1/namespace # watch for pods annotations, setup prometheus collection # for these pods # Addon listens on Pod Network # DaemonSets listen on Host Network [input.prometheus_auto] # disable prometheus auto discovery for pods disabled = false # override type type = openshift_prometheus # specify Splunk index index = # how often to collect prometheus metrics interval = 60s # request timeout timeout = 60s # include metrics help with the events includeHelp = true # http client timeout timeout = 30s # set output (splunk or devnull, default is [general]defaultOutput) output = # Include an Authorization header for the prometheus scrapper # When configuring scrapping with collectord using annotations use prometheus.1-AuthorizationKey=key1 # authorization.key1 = Bearer FOO # Splunk output [output.splunk] # Splunk HTTP Event Collector url url = # You can specify muiltiple splunk URls with # # urls.0 = https://server1:8088/services/collector/event/1.0 # urls.1 = https://server1:8088/services/collector/event/1.0 # urls.2 = https://server1:8088/services/collector/event/1.0 # # Limitations: # * The urls cannot have different path. # Specify how URL should be picked up (in case if multiple is used) # urlSelection = random|round-robin|random-with-round-robin # where: # * random - choose random url on first selection and after each failure (connection or HTTP status code >= 500) # * round-robin - choose url starting from first one and bump on each failure (connection or HTTP status code >= 500) # * random-with-round-robin - choose random url on first selection and after that in round-robin on each # failure (connection or HTTP status code >= 500) urlSelection = random-with-round-robin # Splunk HTTP Event Collector Token token = # Allow invalid SSL server certificate insecure = false # Path to CA cerificate caPath = # CA Name to verify caName = # path for client certificate (if required) clientCertPath = # path for client key (if required) clientKeyPath = # Events are batched with the maximum size set by batchSize and staying in pipeline for not longer # than set by frequency frequency = 5s batchSize = 768K # limit by the number of events (0 value has no limit on the number of events) events = 50 # Splunk through proxy proxyUrl = # authentication with basic authorization (user:password) proxyBasicAuth = # Splunk acknowledgement url (.../services/collector/ack) ackUrl = # You can specify muiltiple splunk URls for ackUrl # # ackUrls.0 = https://server1:8088/services/collector/ack # ackUrls.1 = https://server1:8088/services/collector/ack # ackUrls.2 = https://server1:8088/services/collector/ack # # Make sure that they in the same order as urls for url, to make sure that this Splunk instance will be # able to acknowledge the payload. # # Limitations: # * The urls cannot have different path. # Enable index acknowledgment ackEnabled = false # Index acknowledgment timeout ackTimeout = 3m # Timeout specifies a time limit for requests made by collector. # The timeout includes connection time, any # redirects, and reading the response body. timeout = 30s # in case when pipeline can post to multiple indexes, we want to avoid posibility of blocking # all pipelines, because just some events have incorrect index dedicatedClientPerIndex = true # possible values: RedirectToDefault, Drop, Retry incorrectIndexBehavior = RedirectToDefault # gzip compression level (nocompression, default, 1...9) compressionLevel = default # number of dedicated splunk output threads (to increase throughput above 4k events per second) threads = 2 # Default algorithm between threads is roundrobin, but you can change it to weighted ; threadsAlgorithm = weighted # if you want to exclude some preindexed fields from events # excludeFields.openshift_pod_ip = true # By default if there are no indexes defined on the message, Collectord sends the event without the index, and # Splunk HTTP Event Collector going to use the default index for the Token. You can change that, and tell Collectord # to ignore all events that don't have index defined explicitly ; requireExplicitIndex = true # You can define if you want to truncate messages that are larger than 1M in length (or define your own size, like 256K) ; maximumMessageLength = 1M # For messages generated from logs, include unique `event_id` in the event ; includeEventID = false # Dedicated queue size for the output, default is 1024, larger queue sizes will require more memory, # but will allow to handle more events in case of network issues queueSize = 1024 # How many digits after the decimal point to keep for timestamps (0-9) # Defaults to 3 (milliseconds) # Change to 6 for microseconds # Change to 9 for nanoseconds ; timestampPrecision = 3 002-daemonset.conf: | # DaemonSet configuration is used for Nodes and Masters. # Connection to the docker host [general.docker] # url for docker API, only unix socket is supported url = unix:///rootfs/var/run/docker.sock # path to docker root folder (can fallback to use folder structure to read docker metadata) dockerRootFolder = /rootfs/var/lib/docker/ # (obsolete) In case if pod metadata was not retrievied. how often collector should retry to reload the pod metadata # metadataFetchRetry = 5s # (obsolete) In case if event is recent, how long pipeline should wait for the metadata to be available in Kubernetes API # metadataFetchWait = 30s # (obsolete) In case if collector does not see new events for specific container and with the last metadata refresh # We have not found this container - fow how long we should keep this metadata in cache. # metadataTTL = 5s # Timeout for http responses to docker client. The streaming requests depend on this timeout. timeout = 1m # in case of Kubernetes/OpenShift if you schedule some containers with Docker, but not with the Kubernetes # that allows us to find them (by default finding all containers with name not starting with k8s_) containersNameFilter = ^(([^k])|(k[^8])|(k8[^s])|(k8s[^_])).*$ # regex to find docker container cgroups (helps excluding other cgroups with matched ID) containersCgroupFilter = ^(/([^/\s]+/)*(docker-|docker/)[0-9a-f]{64}(\.scope)?)$ # cgroup input [input.system_stats] # disable system level stats disabled.host = false disabled.cgroup = false # cgroups fs location pathCgroups = /rootfs/sys/fs/cgroup # proc location pathProc = /rootfs/proc # how often to collect cgroup stats statsInterval = 30s # override type type.host = openshift_stats_v2_host type.cgroup = openshift_stats_v2_cgroup # specify Splunk index index.host = index.cgroup = # set output (splunk or devnull, default is [general]defaultOutput) output.host = output.cgroup = # proc input [input.proc_stats] # disable proc level stats disabled = false # proc location pathProc = /rootfs/proc # how often to collect proc stats statsInterval = 60s # override type type = openshift_proc_stats_v2 # specify Splunk index index.host = index.cgroup = # proc filesystem includes by default system threads (there can be over 100 of them) # these stats do not help with the observability # excluding them can reduce the size of the index, performance of the searches and usage of the collector includeSystemThreads = false # set output (splunk or devnull, default is [general]defaultOutput) output.host = output.cgroup = # network stats [input.net_stats] # disable net stats disabled = false # proc path location pathProc = /rootfs/proc # how often to collect net stats statsInterval = 30s # override type type = openshift_net_stats_v2 # specify Splunk index index.host = index.cgroup = # set output (splunk or devnull, default is [general]defaultOutput) output.host = output.cgroup = # network socket table [input.net_socket_table] # disable net stats disabled = false # proc path location pathProc = /rootfs/proc # how often to collect net stats statsInterval = 30s # override type type = openshift_net_socket_table # specify Splunk index index.host = index.cgroup = # set output (splunk or devnull, default is [general]defaultOutput) output.host = output.cgroup = # group connections by tcp_state, localAddr, remoteAddr (if localPort is not the port it is listening on) # that can significally reduces the amount of events group = true # mount input (collects mount stats where kubelet runtime is stored) [input.mount_stats] # disable system level stats disabled = false # how often to collect mount stats statsInterval = 30s # override type type = openshift_mount_stats # specify Splunk index index = # set output (splunk or devnull, default is [general]defaultOutput) output = # Container Log files [input.files] # disable container logs monitoring disabled = false # root location of docker log files # logs are expected in standard docker format like {containerID}/{containerID}-json.log # rotated files path = /rootfs/var/lib/docker/containers/ # root location of CRI-O files # logs are expected in Kubernetes format, like {podID}/{containerName}/0.log crioPath = /rootfs/var/log/pods/ # (obsolete) glob matching pattern for log files # glob = */*-json.log* # files are read using polling schema, when reach the EOF how often to check if files got updated pollingInterval = 250ms # how often to look for the new files under logs path walkingInterval = 5s # include verbose fields in events (file offset) verboseFields = false # override type type = openshift_logs # specify Splunk index index = # docker splits events when they are larger than 10-100k (depends on the docker version) # we join them together by default and forward to Splunk as one event joinPartialEvents = true # In case if your containers report messages with terminal colors or other escape sequences # you can enable strip for all the containers in one place. # Better is to enable it only for required container with the label collectord.io/strip-terminal-escape-sequences=true stripTerminalEscapeSequences = false # Regexp used for stripping terminal colors, it does not stip all the escape sequences # Read http://man7.org/linux/man-pages/man4/console_codes.4.html for more information stripTerminalEscapeSequencesRegex = (\x1b\[\d{1,3}(;\d{1,3})*m)|(\x07)|(\x1b]\d+(\s\d)?;[^\x07]+\x07)|(.*\x1b\[K) # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded) samplingPercent = -1 # sampling key for hash based sampling (should be regexp with the named match pattern `key`) samplingKey = # set output (splunk or devnull, default is [general]defaultOutput) output = # configure default thruput per second for for each container log # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded # from the single container to 128Kb per second. thruputPerSecond = # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events # older than 7 days tooOldEvents = # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future tooNewEvents = # by default every new event should start from not space symbol eventPattern = ^[^\s] # Application Logs [input.app_logs] # disable container application logs monitoring disabled = false # root location of mounts (applies to hostPath mounts only), if the hostPath differs inside container from the path on host root = /rootfs/ # how often to review list of available volumes syncInterval = 5s # glob matching pattern for log files glob = *.log* # files are read using polling schema, when reach the EOF how often to check if files got updated pollingInterval = 250ms # how often to look for the new files under logs path walkingInterval = 5s # include verbose fields in events (file offset) verboseFields = false # override type type = openshift_logs # specify Splunk index index = # we split files using new line character, with this configuration you can specify what defines the new event # after new line eventPatternRegex = ^[^\s] # Maximum interval of messages in pipeline eventPatternMaxInterval = 100ms # Maximum time to wait for the messages in pipeline eventPatternMaxWait = 1s # Maximum message size eventPatternMaxSize = 100kb # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded) samplingPercent = -1 # sampling key for hash based sampling (should be regexp with the named match pattern `key`) samplingKey = # set output (splunk or devnull, default is [general]defaultOutput) output = # configure default thruput per second for for each container log # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded # from the single container to 128Kb per second. thruputPerSecond = # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events # older than 7 days tooOldEvents = # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future tooNewEvents = # Configure how long Collectord should keep the file descriptors open for files, that has not been forwarded yet # When using PVC, and if pipeline is lagging behind, Collectord holding open fd for files, can cause long termination # of pods, as kubelet cannot unmount the PVC volume from the system maxHoldAfterClose = 1800s # Host logs. Input syslog(.\d+)? files [input.files::syslog] # disable host level logs disabled = false # root location of docker files path = /rootfs/var/log/ # regex matching pattern match = ^(syslog|messages)(.\d+)?$ # limit search only on one level recursive = false # files are read using polling schema, when reach the EOF how often to check if files got updated pollingInterval = 250ms # how often o look for the new files under logs path walkingInterval = 5s # include verbose fields in events (file offset) verboseFields = false # override type type = openshift_host_logs # specify Splunk index index = # field extraction extraction = ^(?P[A-Za-z]+\s+\d+\s\d+:\d+:\d+)\s(?P[^\s]+)\s(?P[^:\[]+)(\[(?P\d+)\])?: (.+)$ # extractionMessageField = # timestamp field timestampField = timestamp # format for timestamp # the layout defines the format by showing how the reference time, defined to be `Mon Jan 2 15:04:05 -0700 MST 2006` timestampFormat = Jan 2 15:04:05 # Adjust date, if month/day aren't set in format timestampSetMonth = false timestampSetDay = false # timestamp location (if not defined by format) timestampLocation = Local # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded) samplingPercent = -1 # sampling key for hash based sampling (should be regexp with the named match pattern `key`) samplingKey = # set output (splunk or devnull, default is [general]defaultOutput) output = # configure default thruput per second for this files group # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded # from the files in this group to 128Kb per second. thruputPerSecond = # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events # older than 7 days tooOldEvents = # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future tooNewEvents = # by default every new event should start from not space symbol eventPattern = ^[^\s] # Blacklisting and whitelisting the logs # whitelist.0 = ^regexp$ # blacklist.0 = ^regexp$ # Host logs. Input all *.log(.\d+)? files [input.files::logs] # disable host level logs disabled = false # root location of log files path = /rootfs/var/log/ # regex matching pattern match = ^(([\w\-.]+\.log(.[\d\-]+)?)|(docker))$ # files are read using polling schema, when reach the EOF how often to check if files got updated pollingInterval = 250ms # how often o look for the new files under logs path walkingInterval = 5s # include verbose fields in events (file offset) verboseFields = false # override type type = openshift_host_logs # specify Splunk index index = # field extraction extraction = # extractionMessageField = # timestamp field timestampField = # format for timestamp # the layout defines the format by showing how the reference time, defined to be `Mon Jan 2 15:04:05 -0700 MST 2006` timestampFormat = # timestamp location (if not defined by format) timestampLocation = # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded) samplingPercent = -1 # sampling key for hash based sampling (should be regexp with the named match pattern `key`) samplingKey = # set output (splunk or devnull, default is [general]defaultOutput) output = # configure default thruput per second for this files group # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded # from the files in this group to 128Kb per second. thruputPerSecond = # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events # older than 7 days tooOldEvents = # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future tooNewEvents = # by default every new event should start from not space symbol eventPattern = ^[^\s] # Blacklisting and whitelisting the logs # whitelist.0 = ^regexp$ # blacklist.0 = ^regexp$ [input.journald] # disable host level logs disabled = false # root location of log files path.persistent = /rootfs/var/log/journal/ # only if required # path.volatile = /rootfs/run/log/journal/ # when reach end of journald, how often to pull pollingInterval = 250ms # if you don't want to forward journald from the beginning, # set the oldest event in relative value, like -14h or -30m or -30s (h/m/s supported) startFromRel = # override type type = openshift_host_logs # specify Splunk index index = # sample output (-1 does not sample, 20 - only 20% of the logs should be forwarded) samplingPercent = -1 # sampling key (should be regexp with the named match pattern `key`) samplingKey = # how often to reopen the journald to free old files reopenInterval = 1h # set output (splunk or devnull, default is [general]defaultOutput) output = # configure default thruput per second for this files group # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded # from the files in this group to 128Kb per second. thruputPerSecond = # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events # older than 7 days tooOldEvents = # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future tooNewEvents = # by default every new event should start from not space symbol eventPattern = ^[^\s] # By default ignoring verbose hyperkube logs (all INFO messages) blacklist.0 = ^I\d+.*$ # whitelist.0 = ^regexp$ # blacklist.1 = ^regexp$ # Pipe to join events (container logs only) [pipe.join] # disable joining event disabled = false # Maximum interval of messages in pipeline maxInterval = 100ms # Maximum time to wait for the messages in pipeline maxWait = 1s # Maximum message size maxSize = 100K # Default pattern to indicate new message (should start not from space) patternRegex = ^[^\s] # (depricated, use annotations for settings up join rules) # Define special event join patterns for matched events # Section consist of [pipe.join::] # [pipe.join::my_app] ## Set match pattern for the fields #; matchRegex.docker_container_image = my_app #; matchRegex.stream = stdout ## All events start from '[' #; patternRegex = ^\[\d+ # You can configure global replace rules for the events, which can help to remove sensitive data # from logs before they are sent to Splunk. Those rules will be applied to all pipelines for container logs, host logs, # application logs and events. # In the following example we replace password=TEST with password=******** ; [pipe.replace::name] ; patternRegex = (password=)([^\s]+) ; replace = $1******** [input.prometheus::kubelet] # disable prometheus kubelet metrics disabled = false # override type type = openshift_prometheus # specify Splunk index index = # Override host (environment variables are supported) host = ${KUBERNETES_NODENAME} # Override source source = kubelet # how often to collect prometheus metrics interval = 60s # request timeout timeout = 60s # prometheus endpoint endpoint = https://127.0.0.1:10250/metrics # token for "Authorization: Bearer $(cat tokenPath)" tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token # server certificate for certificate validation certPath = /var/run/secrets/kubernetes.io/serviceaccount/ca.crt # client certificate for authentication clientCertPath = # Allow invalid SSL server certificate insecure = true # include metrics help with the events # can be useful to explore prometheus metrics includeHelp = false # set output (splunk or devnull, default is [general]defaultOutput) output = # filter only metrics used by dashboards whitelist.1 = ^(kubernetes|openshift)_build_info$ whitelist.2 = ^kubelet_runtime_operations_latency_microseconds$ whitelist.3 = ^kubelet_docker_operations_latency_microseconds_sum$ whitelist.4 = ^kubelet_network_plugin_operations_latency_microseconds_sum$ whitelist.5 = ^kubelet_cgroup_manager_latency_microseconds_sum$ whitelist.6 = ^storage_operation_duration_seconds_sum$ whitelist.7 = ^kubelet_docker_operations_errors$ whitelist.8 = ^kubelet_runtime_operations_errors$ whitelist.9 = ^rest_client_requests_total$ whitelist.10 = ^process_cpu_seconds_total$ whitelist.11 = ^process_resident_memory_bytes$ whitelist.12 = ^process_virtual_memory_bytes$ whitelist.13 = ^kubelet_volume_stats_.+$ # Collectord reports if entropy is low (uncomment to use it) ; [diagnostics::node-entropy] ; settings.path = /rootfs/proc/sys/kernel/random/entropy_avail ; settings.interval = 1h ; settings.threshold = 800 # Collectord can report if node reboot is required (uncomment to use it) ; [diagnostics::node-reboot-required] ; settings.path = /rootfs/var/run/reboot-required* ; settings.interval = 1h 003-daemonset-master.conf: | [input.prometheus::kubernetes-api] # disable prometheus kubernetes-api input disabled = false # override type type = openshift_prometheus # specify Splunk index index = # override host host = ${KUBERNETES_NODENAME} # override source source = kubernetes-api # how often to collect prometheus metrics interval = 60s # request timeout timeout = 60s # prometheus endpoint # at first trying to get it from localhost (that way avoiding load balancer, if multiple) # as fallback using proxy endpoint.1localhost = https://127.0.0.1:8443/metrics endpoint.2kubeapi = https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT}/metrics # token for "Authorization: Bearer $(cat tokenPath)" tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token # server certificate for certificate validation certPath = /var/run/secrets/kubernetes.io/serviceaccount/ca.crt # client certificate for authentication clientCertPath = # Allow invalid SSL server certificate insecure = true # include metrics help with the events includeHelp = false # set output (splunk or devnull, default is [general]defaultOutput) output = # filter only metrics used by dashboards whitelist.1 = ^(kubernetes|openshift)_build_info$ whitelist.2 = ^authenticated_user_requests$ whitelist.3 = ^apiserver_request_count$ whitelist.4 = ^process_cpu_seconds_total$ whitelist.5 = ^process_resident_memory_bytes$ whitelist.6 = ^process_virtual_memory_bytes$ [input.prometheus::controller] # disable prometheus controller metrics disabled = false # override type type = openshift_prometheus # specify Splunk index index = # override host host = ${KUBERNETES_NODENAME} # override source source = controller # how often to collect prometheus metrics interval = 60s # request timeout timeout = 60s # prometheus endpoint endpoint.https1 = https://:10257/metrics endpoint.https2 = https://:8444/metrics # token for "Authorization: Bearer $(cat tokenPath)" tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token # server certificate for certificate validation certPath = # client certificate for authentication clientCertPath = clientKeyPath = # Allow invalid SSL server certificate insecure = true # include metrics help with the events includeHelp = false # set output (splunk or devnull, default is [general]defaultOutput) output = # filter only metrics used by dashboards whitelist.1 = ^(kubernetes|openshift)_build_info$ whitelist.2 = ^process_cpu_seconds_total$ whitelist.3 = ^process_resident_memory_bytes$ whitelist.4 = ^process_virtual_memory_bytes$ whitelist.5 = ^node_collector_zone_size$ whitelist.6 = ^node_collector_zone_health$ whitelist.7 = ^node_collector_unhealthy_nodes_in_zone$ [input.prometheus::scheduler] # disable prometheus scheduler metrics disabled = false # override type type = openshift_prometheus # specify Splunk index index = # override host host = ${KUBERNETES_NODENAME} # override source source = scheduler # how often to collect prometheus metrics interval = 60s # request timeout timeout = 60s # prometheus endpoint endpoint.https1 = https://:10259/metrics endpoint.https2 = https://:8444/metrics # token for "Authorization: Bearer $(cat tokenPath)" tokenPath = /var/run/secrets/kubernetes.io/serviceaccount/token # server certificate for certificate validation certPath = # client certificate for authentication clientCertPath = clientKeyPath = # Allow invalid SSL server certificate insecure = true # include metrics help with the events includeHelp = false # set output (splunk or devnull, default is [general]defaultOutput) output = # filter only metrics used by dashboards whitelist.1 = ^(kubernetes|openshift)_build_info$ whitelist.2 = ^scheduler_e2e_scheduling_duration_seconds_sum$ whitelist.3 = ^scheduler_binding_duration_seconds_sum$ whitelist.4 = ^scheduler_scheduling_algorithm_duration_seconds_sum$ whitelist.5 = ^process_cpu_seconds_total$ whitelist.6 = ^process_resident_memory_bytes$ whitelist.7 = ^process_virtual_memory_bytes$ [input.prometheus::etcd] # disable prometheus etcd metrics disabled = false # override type type = openshift_prometheus # specify Splunk index index = # override host host = ${KUBERNETES_NODENAME} # override source source = etcd # how often to collect prometheus metricd interval = 60s # prometheus endpoint endpoint.https = https://:2379/metrics # token for "Authorization: Bearer $(cat tokenPath)" tokenPath = # server certificate for certificate validation certPath = /rootfs/etc/origin/master/master.etcd-ca.crt # client certificate for authentication clientCertPath = /rootfs/etc/origin/master/master.etcd-client.crt clientKeyPath = /rootfs/etc/origin/master/master.etcd-client.key # Allow invalid SSL server certificate insecure = true # include metrics help with the events includeHelp = false # set output (splunk or devnull, default is [general]defaultOutput) output = whitelist.1 = ^etcd_server_leader_changes_seen_total$ whitelist.2 = ^etcd_server_has_leader$ whitelist.3 = ^etcd_server_proposals_committed_total$ whitelist.4 = ^etcd_server_proposals_applied_total$ whitelist.5 = ^etcd_server_proposals_committed_total$ whitelist.6 = ^etcd_server_proposals_pending$ whitelist.7 = ^etcd_server_proposals_failed_total$ whitelist.8 = ^etcd_disk_wal_fsync_duration_seconds_sum$ whitelist.9 = ^etcd_disk_wal_fsync_duration_seconds_count$ whitelist.10 = ^etcd_disk_backend_commit_duration_seconds_sum$ whitelist.11 = ^etcd_disk_backend_commit_duration_seconds_count$ whitelist.12 = ^etcd_network_client_grpc_.*$ whitelist.13 = ^grpc_server_handled_total$ whitelist.14 = ^etcd_network_peer_round_trip_time_seconds_bucket$ whitelist.15 = ^process_cpu_seconds_total$ whitelist.16 = ^process_resident_memory_bytes$ whitelist.17 = ^process_virtual_memory_bytes$ whitelist.18 = ^process_open_fds$ whitelist.19 = ^process_max_fds$ whitelist.20 = ^etcd_disk_backend_commit_duration_seconds_bucket$ whitelist.21 = ^etcd_disk_wal_fsync_duration_seconds_bucket$ # Audit logs [input.files::audit-logs] # disable host level logs disabled = false # root location of for audit logs path = /rootfs/var/lib/origin/openpaas-oscp-audit/ # regex matching pattern match = ^[\w\-\.]+\.log(.\d+)?$ # files are read using polling schema, when reach the EOF how often to check if files got updated pollingInterval = 250ms # how often o look for the new files under logs path walkingInterval = 5s # include verbose fields in events (file offset) verboseFields = false # override type type = openshift_host_logs # specify Splunk index index = # field extraction extraction = # extractionMessageField = # timestamp field timestampField = # format for timestamp # the layout defines the format by showing how the reference time, defined to be `Mon Jan 2 15:04:05 -0700 MST 2006` timestampFormat = # timestamp location (if not defined by format) timestampLocation = # set output (splunk or devnull, default is [general]defaultOutput) output = # configure default thruput per second for this files group # for example if you set `thruputPerSecond = 128Kb`, that will limit amount of logs forwarded # from the files in this group to 128Kb per second. thruputPerSecond = # Configure events that are too old to be forwarded, for example 168h (7 days) - that will drop all events # older than 7 days tooOldEvents = # Configure events that are too new to be forwarded, for example 1h - that will drop all events that are 1h in future tooNewEvents = # Blacklisting and whitelisting the logs # whitelist.0 = ^regexp$ # blacklist.0 = ^regexp$ 004-addon.conf: | [general] # addons can be run in parallel with agents addon = true [input.kubernetes_events] # disable collecting kubernetes events disabled = false # override type type = openshift_events # specify Splunk index index = # set output (splunk or devnull, default is [general]defaultOutput) output = # exclude managed fields from the metadata excludeManagedFields = true [input.kubernetes_watch::pods] # disable events disabled = false # Set the timeout for how often watch request should refresh the whole list refresh = 10m apiVersion = v1 kind = pod namespace = # override type type = openshift_objects # specify Splunk index index = # set output (splunk or devnull, default is [general]defaultOutput) output = # exclude managed fields from the metadata excludeManagedFields = true # you can remove or hash some values in the events (after modifyValues you can define path in the JSON object, # and the value can be hash:{hashFunction}, or remove to remove the object ) ; modifyValues.object.data.* = hash:sha256 ; modifyValues.object.metadata.annotations.* = remove # You can exclude events by namespace with blacklist or whitelist only required namespaces # blacklist.kubernetes_namespace = ^namespace0$ # whitelist.kubernetes_namespace = ^((namespace1)|(namespace2))$ [input.kubernetes_watch::resourcequota] # disable events disabled = false # Set the timeout for how often watch request should refresh the whole list refresh = 10m apiVersion = v1 kind = ResourceQuota namespace = # override type type = openshift_objects # specify Splunk index index = # set output (splunk or devnull, default is [general]defaultOutput) output = # exclude managed fields from the metadata excludeManagedFields = true [input.kubernetes_watch::clusterresourcequota] # disable events disabled = false # Set the timeout for how often watch request should refresh the whole list refresh = 10m apiVersion = quota.openshift.io/v1 kind = ClusterResourceQuota namespace = # override type type = openshift_objects # specify Splunk index index = # set output (splunk or devnull, default is [general]defaultOutput) output = # exclude managed fields from the metadata excludeManagedFields = true --- apiVersion: extensions/v1beta1 kind: DaemonSet metadata: name: collectorforopenshift namespace: collectorforopenshift labels: app: collectorforopenshift spec: # Default updateStrategy is OnDelete. For collector RollingUpdate is suitable # When you update configuration updateStrategy: type: RollingUpdate selector: matchLabels: daemon: collectorforopenshift template: metadata: name: collectorforopenshift labels: daemon: collectorforopenshift annotations: scheduler.alpha.kubernetes.io/critical-pod: '' spec: dnsPolicy: ClusterFirstWithHostNet hostNetwork: true serviceAccountName: collectorforopenshift # We run this DaemonSet only for Non-Masters affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: node-role.kubernetes.io/master operator: DoesNotExist tolerations: - operator: "Exists" effect: "NoSchedule" - operator: "Exists" effect: "NoExecute" containers: - name: collectorforopenshift # Stick to specific version image: registry.connect.redhat.com/outcoldsolutions/collectorforopenshift:5.21.412 securityContext: privileged: true runAsUser: 0 # Define your resources if you need. Defaults should be fine for most. resources: limits: cpu: 2 memory: 512Mi requests: cpu: 200m memory: 192Mi env: - name: KUBERNETES_NODENAME valueFrom: fieldRef: fieldPath: spec.nodeName - name: POD_NAME valueFrom: fieldRef: fieldPath: metadata.name volumeMounts: # We store state in /data folder (file positions) - name: collectorforopenshift-state mountPath: /data # Configuration file deployed with ConfigMap - name: collectorforopenshift-config mountPath: /config/ readOnly: true # Rootfs - name: rootfs mountPath: /rootfs/ readOnly: false mountPropagation: HostToContainer # correct timezone - name: localtime mountPath: /etc/localtime readOnly: true volumes: # We store state directly on host, change this location, if # your persistent volume is somewhere else - name: collectorforopenshift-state hostPath: path: /var/lib/collectorforopenshift/data/ type: DirectoryOrCreate # Location of rootfs - name: rootfs hostPath: path: / # correct timezone - name: localtime hostPath: path: /etc/localtime # configuration from ConfigMap - name: collectorforopenshift-config configMap: name: collectorforopenshift items: - key: 001-general.conf path: 001-general.conf - key: 002-daemonset.conf path: 002-daemonset.conf --- apiVersion: extensions/v1beta1 kind: DaemonSet metadata: name: collectorforopenshift-master namespace: collectorforopenshift labels: app: collectorforopenshift spec: updateStrategy: type: RollingUpdate selector: matchLabels: daemon: collectorforopenshift template: metadata: name: collectorforopenshift-master labels: daemon: collectorforopenshift annotations: scheduler.alpha.kubernetes.io/critical-pod: '' spec: dnsPolicy: ClusterFirstWithHostNet hostNetwork: true serviceAccountName: collectorforopenshift # Deploy only on master affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: node-role.kubernetes.io/master operator: Exists tolerations: - operator: "Exists" effect: "NoSchedule" - operator: "Exists" effect: "NoExecute" containers: - name: collectorforopenshift image: registry.connect.redhat.com/outcoldsolutions/collectorforopenshift:5.21.412 securityContext: privileged: true runAsUser: 0 resources: limits: cpu: 2 memory: 512Mi requests: cpu: 200m memory: 192Mi env: - name: KUBERNETES_NODENAME valueFrom: fieldRef: fieldPath: spec.nodeName - name: POD_NAME valueFrom: fieldRef: fieldPath: metadata.name volumeMounts: - name: collectorforopenshift-state mountPath: /data - name: collectorforopenshift-config mountPath: /config/ readOnly: true # Rootfs - name: rootfs mountPath: /rootfs/ readOnly: false mountPropagation: HostToContainer - name: localtime mountPath: /etc/localtime readOnly: true volumes: - name: collectorforopenshift-state hostPath: path: /var/lib/collectorforopenshift/data/ type: DirectoryOrCreate - name: rootfs hostPath: path: / - name: localtime hostPath: path: /etc/localtime - name: collectorforopenshift-config configMap: name: collectorforopenshift items: - key: 001-general.conf path: 001-general.conf - key: 002-daemonset.conf path: 002-daemonset.conf - key: 003-daemonset-master.conf path: 003-daemonset-master.conf --- apiVersion: apps/v1beta1 kind: Deployment metadata: name: collectorforopenshift-addon namespace: collectorforopenshift labels: app: collectorforopenshift spec: replicas: 1 selector: matchLabels: daemon: collectorforopenshift template: metadata: name: collectorforopenshift-addon labels: daemon: collectorforopenshift annotations: scheduler.alpha.kubernetes.io/critical-pod: '' spec: serviceAccountName: collectorforopenshift containers: - name: collectorforopenshift image: registry.connect.redhat.com/outcoldsolutions/collectorforopenshift:5.21.412 securityContext: privileged: true runAsUser: 0 resources: limits: cpu: 500m memory: 256Mi requests: cpu: 50m memory: 64Mi env: - name: KUBERNETES_NODENAME valueFrom: fieldRef: fieldPath: spec.nodeName - name: POD_NAME valueFrom: fieldRef: fieldPath: metadata.name volumeMounts: - name: collectorforopenshift-state mountPath: /data - name: collectorforopenshift-config mountPath: /config/ readOnly: true volumes: - name: collectorforopenshift-state hostPath: path: /var/lib/collectorforopenshift/data/ type: Directory - name: collectorforopenshift-config configMap: name: collectorforopenshift items: - key: 001-general.conf path: 001-general.conf - key: 004-addon.conf path: 004-addon.conf