1
0
Fork 0

Adding upstream version 1.34.4.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-05-24 07:26:29 +02:00
parent e393c3af3f
commit 4978089aab
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
4963 changed files with 677545 additions and 0 deletions

View file

@ -0,0 +1,467 @@
# Prometheus Input Plugin
The prometheus input plugin gathers metrics from HTTP servers exposing metrics
in Prometheus format.
## Global configuration options <!-- @/docs/includes/plugin_config.md -->
In addition to the plugin-specific configuration settings, plugins support
additional global and plugin configuration settings. These settings are used to
modify metrics, tags, and field or create aliases and configure ordering, etc.
See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
[CONFIGURATION.md]: ../../../docs/CONFIGURATION.md#plugins
## Secret-store support
This plugin supports secrets from secret-stores for the `username`, `password`
and `bearer_token_string` option. See the
[secret-store documentation][SECRETSTORE] for more details on how to use them.
[SECRETSTORE]: ../../../docs/CONFIGURATION.md#secret-store-secrets
## Configuration
```toml @sample.conf
# Read metrics from one or many prometheus clients
[[inputs.prometheus]]
## An array of urls to scrape metrics from.
urls = ["http://localhost:9100/metrics"]
## Metric version controls the mapping from Prometheus metrics into Telegraf metrics.
## See "Metric Format Configuration" in plugins/inputs/prometheus/README.md for details.
## Valid options: 1, 2
# metric_version = 1
## Url tag name (tag containing scrapped url. optional, default is "url")
# url_tag = "url"
## Whether the timestamp of the scraped metrics will be ignored.
## If set to true, the gather time will be used.
# ignore_timestamp = false
## Override content-type of the returned message
## Available options are for prometheus:
## text, protobuf-delimiter, protobuf-compact, protobuf-text,
## and for openmetrics:
## openmetrics-text, openmetrics-protobuf
## By default the content-type of the response is used.
# content_type_override = ""
## An array of Kubernetes services to scrape metrics from.
# kubernetes_services = ["http://my-service-dns.my-namespace:9100/metrics"]
## Kubernetes config file to create client from.
# kube_config = "/path/to/kubernetes.config"
## Scrape Pods
## Enable scraping of k8s pods. Further settings as to which pods to scape
## are determiend by the 'method' option below. When enabled, the default is
## to use annotations to determine whether to scrape or not.
# monitor_kubernetes_pods = false
## Scrape Pods Method
## annotations: default, looks for specific pod annotations documented below
## settings: only look for pods matching the settings provided, not
## annotations
## settings+annotations: looks at pods that match annotations using the user
## defined settings
# monitor_kubernetes_pods_method = "annotations"
## Scrape Pods 'annotations' method options
## If set method is set to 'annotations' or 'settings+annotations', these
## annotation flags are looked for:
## - prometheus.io/scrape: Required to enable scraping for this pod. Can also
## use 'prometheus.io/scrape=false' annotation to opt-out entirely.
## - prometheus.io/scheme: If the metrics endpoint is secured then you will
## need to set this to 'https' & most likely set the tls config
## - prometheus.io/path: If the metrics path is not /metrics, define it with
## this annotation
## - prometheus.io/port: If port is not 9102 use this annotation
## Scrape Pods 'settings' method options
## When using 'settings' or 'settings+annotations', the default values for
## annotations can be modified using with the following options:
# monitor_kubernetes_pods_scheme = "http"
# monitor_kubernetes_pods_port = "9102"
# monitor_kubernetes_pods_path = "/metrics"
## Get the list of pods to scrape with either the scope of
## - cluster: the kubernetes watch api (default, no need to specify)
## - node: the local cadvisor api; for scalability. Note that the config node_ip or the environment variable NODE_IP must be set to the host IP.
# pod_scrape_scope = "cluster"
## Only for node scrape scope: node IP of the node that telegraf is running on.
## Either this config or the environment variable NODE_IP must be set.
# node_ip = "10.180.1.1"
## Only for node scrape scope: interval in seconds for how often to get updated pod list for scraping.
## Default is 60 seconds.
# pod_scrape_interval = 60
## Content length limit
## When set, telegraf will drop responses with length larger than the configured value.
## Default is "0KB" which means unlimited.
# content_length_limit = "0KB"
## Restricts Kubernetes monitoring to a single namespace
## ex: monitor_kubernetes_pods_namespace = "default"
# monitor_kubernetes_pods_namespace = ""
## The name of the label for the pod that is being scraped.
## Default is 'namespace' but this can conflict with metrics that have the label 'namespace'
# pod_namespace_label_name = "namespace"
# label selector to target pods which have the label
# kubernetes_label_selector = "env=dev,app=nginx"
# field selector to target pods
# eg. To scrape pods on a specific node
# kubernetes_field_selector = "spec.nodeName=$HOSTNAME"
## Filter which pod annotations and labels will be added to metric tags
#
# pod_annotation_include = ["annotation-key-1"]
# pod_annotation_exclude = ["exclude-me"]
# pod_label_include = ["label-key-1"]
# pod_label_exclude = ["exclude-me"]
# cache refresh interval to set the interval for re-sync of pods list.
# Default is 60 minutes.
# cache_refresh_interval = 60
## Use bearer token for authorization. ('bearer_token' takes priority)
# bearer_token = "/path/to/bearer/token"
## OR
# bearer_token_string = "abc_123"
## HTTP Basic Authentication username and password. ('bearer_token' and
## 'bearer_token_string' take priority)
# username = ""
# password = ""
## Optional custom HTTP headers
# http_headers = {"X-Special-Header" = "Special-Value"}
## Specify timeout duration for slower prometheus clients (default is 5s)
# timeout = "5s"
## This option is now used by the HTTP client to set the header response
## timeout, not the overall HTTP timeout.
# response_timeout = "5s"
## HTTP Proxy support
# use_system_proxy = false
# http_proxy_url = ""
## Optional TLS Config
# tls_ca = /path/to/cafile
# tls_cert = /path/to/certfile
# tls_key = /path/to/keyfile
## Use TLS but skip chain & host verification
# insecure_skip_verify = false
## Use the given name as the SNI server name on each URL
# tls_server_name = "myhost.example.org"
## TLS renegotiation method, choose from "never", "once", "freely"
# tls_renegotiation_method = "never"
## Enable/disable TLS
## Set to true/false to enforce TLS being enabled/disabled. If not set,
## enable TLS only if any of the other options are specified.
# tls_enable = true
## This option allows you to report the status of prometheus requests.
# enable_request_metrics = false
## Scrape Services available in Consul Catalog
# [inputs.prometheus.consul]
# enabled = true
# agent = "http://localhost:8500"
# query_interval = "5m"
# [[inputs.prometheus.consul.query]]
# name = "a service name"
# tag = "a service tag"
# url = 'http://{{if ne .ServiceAddress ""}}{{.ServiceAddress}}{{else}}{{.Address}}{{end}}:{{.ServicePort}}/{{with .ServiceMeta.metrics_path}}{{.}}{{else}}metrics{{end}}'
# [inputs.prometheus.consul.query.tags]
# host = "{{.Node}}"
## Control pod scraping based on pod namespace annotations
## Pass and drop here act like tagpass and tagdrop, but instead
## of filtering metrics they filters pod candidates for scraping
#[inputs.prometheus.namespace_annotation_pass]
# annotation_key = ["value1", "value2"]
#[inputs.prometheus.namespace_annotation_drop]
# some_annotation_key = ["dont-scrape"]
```
`urls` can contain a unix socket as well. If a different path is required
(default is `/metrics` for both http[s] and unix) for a unix socket, add `path`
as a query parameter as follows:
`unix:///var/run/prometheus.sock?path=/custom/metrics`
### Metric Format Configuration
The `metric_version` setting controls how telegraf translates prometheus format
metrics to telegraf metrics. There are two options.
With `metric_version = 1`, the prometheus metric name becomes the telegraf
metric name. Prometheus labels become telegraf tags. Prometheus values become
telegraf field values. The fields have generic keys based on the type of the
prometheus metric. This option produces metrics that are dense (not
sparse). Denseness is a useful property for some outputs, including those that
are more efficient with row-oriented data.
`metric_version = 2` differs in a few ways. The prometheus metric name becomes a
telegraf field key. Metrics hold more than one value and the field keys aren't
generic. The resulting metrics are sparse, but for some outputs they may be
easier to process or query, including those that are more efficient with
column-oriented data. The telegraf metric name is the same for all metrics in
the input instance. It can be set with the `name_override` setting and defaults
to "prometheus". To have multiple metric names, you can use multiple instances
of the plugin, each with its own `name_override`.
`metric_version = 2` uses the same histogram format as the [histogram
aggregator](../../aggregators/histogram/README.md)
The Example Outputs sections shows examples for both options.
When using this plugin along with the prometheus_client output, use the same
option in both to ensure metrics are round-tripped without modification.
### Kubernetes Service Discovery
URLs listed in the `kubernetes_services` parameter will be expanded by looking
up all A records assigned to the hostname as described in [Kubernetes DNS
service discovery][serv-disc].
This method can be used to locate all [Kubernetes headless services][headless].
[serv-disc]: https://kubernetes.io/docs/concepts/services-networking/service/#dns
[headless]: https://kubernetes.io/docs/concepts/services-networking/service/#headless-services
### Kubernetes scraping
Enabling this option will allow the plugin to scrape for prometheus annotation
on Kubernetes pods. Currently, you can run this plugin in your kubernetes
cluster, or we use the kubeconfig file to determine where to monitor. Currently
the following annotation are supported:
* `prometheus.io/scrape` Enable scraping for this pod.
* `prometheus.io/scheme` If the metrics endpoint is secured then you will need to set this to `https` & most likely set the tls config. (default 'http')
* `prometheus.io/path` Override the path for the metrics endpoint on the service. (default '/metrics')
* `prometheus.io/port` Used to override the port. (default 9102)
Using the `monitor_kubernetes_pods_namespace` option allows you to limit which
pods you are scraping.
The setting `pod_namespace_label_name` allows you to change the label name for
the namespace of the pod you are scraping. The default is `namespace`, but this
will overwrite a label with the name `namespace` from a metric scraped.
Using `pod_scrape_scope = "node"` allows more scalable scraping for pods which
will scrape pods only in the node that telegraf is running. It will fetch the
pod list locally from the node's kubelet. This will require running Telegraf in
every node of the cluster. Note that either `node_ip` must be specified in the
config or the environment variable `NODE_IP` must be set to the host IP. ThisThe
latter can be done in the yaml of the pod running telegraf:
```sh
env:
- name: NODE_IP
valueFrom:
fieldRef:
fieldPath: status.hostIP
```
If using node level scrape scope, `pod_scrape_interval` specifies how often (in
seconds) the pod list for scraping should updated. If not specified, the default
is 60 seconds.
The pod running telegraf will need to have the proper rbac configuration in
order to be allowed to call the k8s api to discover and watch pods in the
cluster. A typical configuration will create a service account, a cluster role
with the appropriate rules and a cluster role binding to tie the cluster role to
the service account. Example of configuration for cluster level discovery:
```yaml
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: telegraf-k8s-role-{{.Release.Name}}
rules:
- apiGroups: [""]
resources:
- nodes
- nodes/proxy
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
---
# Rolebinding for namespace to cluster-admin
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: telegraf-k8s-role-{{.Release.Name}}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: telegraf-k8s-role-{{.Release.Name}}
subjects:
- kind: ServiceAccount
name: telegraf-k8s-{{ .Release.Name }}
namespace: {{ .Release.Namespace }}
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: telegraf-k8s-{{ .Release.Name }}
```
### Consul Service Discovery
Enabling this option and configuring consul `agent` url will allow the plugin to
query consul catalog for available services. Using `query_interval` the plugin
will periodically query the consul catalog for services with `name` and `tag`
and refresh the list of scraped urls. It can use the information from the
catalog to build the scraped url and additional tags from a template.
Multiple consul queries can be configured, each for different service.
The following example fields can be used in url or tag templates:
* Node
* Address
* NodeMeta
* ServicePort
* ServiceAddress
* ServiceTags
* ServiceMeta
For full list of available fields and their type see struct CatalogService in
<https://github.com/hashicorp/consul/blob/master/api/catalog.go>
### Bearer Token
If set, the file specified by the `bearer_token` parameter will be read on
each interval and its contents will be appended to the Bearer string in the
Authorization header.
## Usage for Caddy HTTP server
Steps to monitor Caddy with Telegraf's Prometheus input plugin:
* Download [Caddy](https://caddyserver.com/download)
* Download Prometheus and set up [monitoring Caddy with Prometheus metrics](https://caddyserver.com/docs/metrics#monitoring-caddy-with-prometheus-metrics)
* Restart Caddy
* Configure Telegraf to fetch metrics on it:
```toml
[[inputs.prometheus]]
# ## An array of urls to scrape metrics from.
urls = ["http://localhost:2019/metrics"]
```
> This is the default URL where Caddy will send data.
> For more details, please read the [Caddy Prometheus documentation](https://github.com/miekg/caddy-prometheus/blob/master/README.md).
## Metrics
Measurement names are based on the Metric Family and tags are created for each
label. The value is added to a field named based on the metric type.
All metrics receive the `url` tag indicating the related URL specified in the
Telegraf configuration. If using Kubernetes service discovery the `address`
tag is also added indicating the discovered ip address.
* prometheus_request
* tags:
* url
* address
* fields:
* response_time (float, seconds)
* content_length (int, response body length)
## Example Output
### Source
```shell
# HELP go_gc_duration_seconds A summary of the GC invocation durations.
# TYPE go_gc_duration_seconds summary
go_gc_duration_seconds{quantile="0"} 7.4545e-05
go_gc_duration_seconds{quantile="0.25"} 7.6999e-05
go_gc_duration_seconds{quantile="0.5"} 0.000277935
go_gc_duration_seconds{quantile="0.75"} 0.000706591
go_gc_duration_seconds{quantile="1"} 0.000706591
go_gc_duration_seconds_sum 0.00113607
go_gc_duration_seconds_count 4
# HELP go_goroutines Number of goroutines that currently exist.
# TYPE go_goroutines gauge
go_goroutines 15
# HELP cpu_usage_user Telegraf collected metric
# TYPE cpu_usage_user gauge
cpu_usage_user{cpu="cpu0"} 1.4112903225816156
cpu_usage_user{cpu="cpu1"} 0.702106318955865
cpu_usage_user{cpu="cpu2"} 2.0161290322588776
cpu_usage_user{cpu="cpu3"} 1.5045135406226022
```
### Output
```text
go_gc_duration_seconds,url=http://example.org:9273/metrics 1=0.001336611,count=14,sum=0.004527551,0=0.000057965,0.25=0.000083812,0.5=0.000286537,0.75=0.000365303 1505776733000000000
go_goroutines,url=http://example.org:9273/metrics gauge=21 1505776695000000000
cpu_usage_user,cpu=cpu0,url=http://example.org:9273/metrics gauge=1.513622603430151 1505776751000000000
cpu_usage_user,cpu=cpu1,url=http://example.org:9273/metrics gauge=5.829145728641773 1505776751000000000
cpu_usage_user,cpu=cpu2,url=http://example.org:9273/metrics gauge=2.119071644805144 1505776751000000000
cpu_usage_user,cpu=cpu3,url=http://example.org:9273/metrics gauge=1.5228426395944945 1505776751000000000
prometheus_request,result=success,url=http://example.org:9273/metrics content_length=179013i,http_response_code=200i,response_time=0.051521601 1505776751000000000
```
### Output (when metric_version = 2)
```text
prometheus,quantile=1,url=http://example.org:9273/metrics go_gc_duration_seconds=0.005574303 1556075100000000000
prometheus,quantile=0.75,url=http://example.org:9273/metrics go_gc_duration_seconds=0.0001046 1556075100000000000
prometheus,quantile=0.5,url=http://example.org:9273/metrics go_gc_duration_seconds=0.0000719 1556075100000000000
prometheus,quantile=0.25,url=http://example.org:9273/metrics go_gc_duration_seconds=0.0000579 1556075100000000000
prometheus,quantile=0,url=http://example.org:9273/metrics go_gc_duration_seconds=0.0000349 1556075100000000000
prometheus,url=http://example.org:9273/metrics go_gc_duration_seconds_count=324,go_gc_duration_seconds_sum=0.091340353 1556075100000000000
prometheus,url=http://example.org:9273/metrics go_goroutines=15 1556075100000000000
prometheus,cpu=cpu0,url=http://example.org:9273/metrics cpu_usage_user=1.513622603430151 1505776751000000000
prometheus,cpu=cpu1,url=http://example.org:9273/metrics cpu_usage_user=5.829145728641773 1505776751000000000
prometheus,cpu=cpu2,url=http://example.org:9273/metrics cpu_usage_user=2.119071644805144 1505776751000000000
prometheus,cpu=cpu3,url=http://example.org:9273/metrics cpu_usage_user=1.5228426395944945 1505776751000000000
prometheus_request,result=success,url=http://example.org:9273/metrics content_length=179013i,http_response_code=200i,response_time=0.051521601 1505776751000000000
```
### Output with timestamp included
Below is an example of a Prometheus metric which includes a timestamp:
```text
# TYPE test_counter counter
test_counter{label="test"} 1 1685443805885
```
Telegraf will generate the following metric:
```text
test_counter,address=127.0.0.1,label=test counter=1 1685443805885000000
```
using the standard configuration
```toml
[[inputs.prometheus]]
## An array of urls to scrape metrics from.
urls = ["http://localhost:2019/metrics"]
```
**Please note:** Metrics generated by Prometheus endpoints are generated with
*millisecond precision*. The default Telegraf agent level precision setting
reduces this to seconds. Change the `precision` setting at agent or plugin level
to milliseconds or smaller to report metric timestamps with full precision.

View file

@ -0,0 +1,209 @@
package prometheus
import (
"bytes"
"context"
"fmt"
"net/url"
"strings"
"text/template"
"time"
"github.com/hashicorp/consul/api"
"github.com/influxdata/telegraf/config"
)
type consulConfig struct {
// Address of the Consul agent. The address must contain a hostname or an IP address
// and optionally a port (format: "host:port").
Enabled bool `toml:"enabled"`
Agent string `toml:"agent"`
QueryInterval config.Duration `toml:"query_interval"`
Queries []*consulQuery `toml:"query"`
}
// One Consul service discovery query
type consulQuery struct {
// A name of the searched services (not ID)
ServiceName string `toml:"name"`
// A tag of the searched services
ServiceTag string `toml:"tag"`
// A DC of the searched services
ServiceDc string `toml:"dc"`
// A template URL of the Prometheus gathering interface. The hostname part
// of the URL will be replaced by discovered address and port.
ServiceURL string `toml:"url"`
// Extra tags to add to metrics found in Consul
ServiceExtraTags map[string]string `toml:"tags"`
serviceURLTemplate *template.Template
serviceExtraTagsTemplate map[string]*template.Template
// Store last error status and change log level depending on repeated occurrence
lastQueryFailed bool
}
func (p *Prometheus) startConsul(ctx context.Context) error {
consulAPIConfig := api.DefaultConfig()
if p.ConsulConfig.Agent != "" {
consulAPIConfig.Address = p.ConsulConfig.Agent
}
consul, err := api.NewClient(consulAPIConfig)
if err != nil {
return fmt.Errorf("cannot connect to the Consul agent: %w", err)
}
// Parse the template for metrics URL, drop queries with template parse errors
i := 0
for _, q := range p.ConsulConfig.Queries {
serviceURLTemplate, err := template.New("URL").Parse(q.ServiceURL)
if err != nil {
p.Log.Errorf("Could not parse the Consul query URL template (%s), skipping it. Error: %s", q.ServiceURL, err)
continue
}
q.serviceURLTemplate = serviceURLTemplate
// Allow to use join function in tags
templateFunctions := template.FuncMap{"join": strings.Join}
// Parse the tag value templates
q.serviceExtraTagsTemplate = make(map[string]*template.Template)
for tagName, tagTemplateString := range q.ServiceExtraTags {
tagTemplate, err := template.New(tagName).Funcs(templateFunctions).Parse(tagTemplateString)
if err != nil {
p.Log.Errorf("Could not parse the Consul query Extra Tag template (%s), skipping it. Error: %s", tagTemplateString, err)
continue
}
q.serviceExtraTagsTemplate[tagName] = tagTemplate
}
p.ConsulConfig.Queries[i] = q
i++
}
// Prevent memory leak by erasing truncated values
for j := i; j < len(p.ConsulConfig.Queries); j++ {
p.ConsulConfig.Queries[j] = nil
}
p.ConsulConfig.Queries = p.ConsulConfig.Queries[:i]
catalog := consul.Catalog()
p.wg.Add(1)
go func() {
// Store last error status and change log level depending on repeated occurrence
var refreshFailed = false
defer p.wg.Done()
err := p.refreshConsulServices(catalog)
if err != nil {
refreshFailed = true
p.Log.Errorf("Unable to refresh Consul services: %v", err)
}
for {
select {
case <-ctx.Done():
return
case <-time.After(time.Duration(p.ConsulConfig.QueryInterval)):
err := p.refreshConsulServices(catalog)
if err != nil {
message := fmt.Sprintf("Unable to refresh Consul services: %v", err)
if refreshFailed {
p.Log.Debug(message)
} else {
p.Log.Warn(message)
}
refreshFailed = true
} else if refreshFailed {
refreshFailed = false
p.Log.Info("Successfully refreshed Consul services after previous errors")
}
}
}
}()
return nil
}
func (p *Prometheus) refreshConsulServices(c *api.Catalog) error {
consulServiceURLs := make(map[string]urlAndAddress)
p.Log.Debugf("Refreshing Consul services")
for _, q := range p.ConsulConfig.Queries {
queryOptions := api.QueryOptions{}
if q.ServiceDc != "" {
queryOptions.Datacenter = q.ServiceDc
}
// Request services from Consul
consulServices, _, err := c.Service(q.ServiceName, q.ServiceTag, &queryOptions)
if err != nil {
return err
}
if len(consulServices) == 0 {
p.Log.Debugf("Queried Consul for Service (%s, %s) but did not find any instances", q.ServiceName, q.ServiceTag)
continue
}
p.Log.Debugf("Queried Consul for Service (%s, %s) and found %d instances", q.ServiceName, q.ServiceTag, len(consulServices))
for _, consulService := range consulServices {
uaa, err := p.getConsulServiceURL(q, consulService)
if err != nil {
message := fmt.Sprintf("Unable to get scrape URLs from Consul for Service (%s, %s): %s", q.ServiceName, q.ServiceTag, err)
if q.lastQueryFailed {
p.Log.Debug(message)
} else {
p.Log.Warn(message)
}
q.lastQueryFailed = true
break
}
if q.lastQueryFailed {
p.Log.Infof("Created scrape URLs from Consul for Service (%s, %s)", q.ServiceName, q.ServiceTag)
}
q.lastQueryFailed = false
p.Log.Debugf("Adding scrape URL from Consul for Service (%s, %s): %s", q.ServiceName, q.ServiceTag, uaa.url.String())
consulServiceURLs[uaa.url.String()] = *uaa
}
}
p.lock.Lock()
p.consulServices = consulServiceURLs
p.lock.Unlock()
return nil
}
func (p *Prometheus) getConsulServiceURL(q *consulQuery, s *api.CatalogService) (*urlAndAddress, error) {
var buffer bytes.Buffer
buffer.Reset()
err := q.serviceURLTemplate.Execute(&buffer, s)
if err != nil {
return nil, err
}
serviceURL, err := url.Parse(buffer.String())
if err != nil {
return nil, err
}
extraTags := make(map[string]string)
for tagName, tagTemplate := range q.serviceExtraTagsTemplate {
buffer.Reset()
err = tagTemplate.Execute(&buffer, s)
if err != nil {
return nil, err
}
extraTags[tagName] = buffer.String()
}
p.Log.Debugf("Will scrape metrics from Consul Service %s", serviceURL.String())
return &urlAndAddress{
url: serviceURL,
originalURL: serviceURL,
tags: extraTags,
}, nil
}

View file

@ -0,0 +1,500 @@
package prometheus
import (
"context"
"crypto/tls"
"encoding/json"
"fmt"
"net"
"net/http"
"net/url"
"os"
"os/user"
"path/filepath"
"strconv"
"time"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/clientcmd"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/models"
)
type podMetadata struct {
ResourceVersion string `json:"resourceVersion"`
SelfLink string `json:"selfLink"`
}
type podResponse struct {
Kind string `json:"kind"`
APIVersion string `json:"apiVersion"`
Metadata podMetadata `json:"metadata"`
Items []*corev1.Pod `json:"items,omitempty"`
}
const cAdvisorPodListDefaultInterval = 60
// loadConfig parses a kubeconfig from a file and returns a Kubernetes rest.Config
func loadConfig(kubeconfigPath string) (*rest.Config, error) {
if kubeconfigPath == "" {
return rest.InClusterConfig()
}
return clientcmd.BuildConfigFromFlags("", kubeconfigPath)
}
func (p *Prometheus) startK8s(ctx context.Context) error {
config, err := loadConfig(p.KubeConfig)
if err != nil {
return fmt.Errorf("failed to get rest.Config from %q: %w", p.KubeConfig, err)
}
client, err := kubernetes.NewForConfig(config)
if err != nil {
u, err := user.Current()
if err != nil {
return fmt.Errorf("failed to get current user: %w", err)
}
kubeconfig := filepath.Join(u.HomeDir, ".kube", "config")
config, err = loadConfig(kubeconfig)
if err != nil {
return fmt.Errorf("failed to get rest.Config from %q: %w", kubeconfig, err)
}
client, err = kubernetes.NewForConfig(config)
if err != nil {
return fmt.Errorf("failed to get kubernetes client: %w", err)
}
}
if !p.isNodeScrapeScope {
err = p.watchPod(ctx, client)
if err != nil {
p.Log.Warnf("Error while attempting to watch pod: %s", err.Error())
}
}
p.wg.Add(1)
go func() {
defer p.wg.Done()
for {
select {
case <-ctx.Done():
return
case <-time.After(time.Second):
if p.isNodeScrapeScope {
bearerToken := config.BearerToken
if config.BearerTokenFile != "" {
bearerTokenBytes, err := os.ReadFile(config.BearerTokenFile)
if err != nil {
p.Log.Errorf("Error reading bearer token file hence falling back to BearerToken: %s", err.Error())
} else {
bearerToken = string(bearerTokenBytes)
}
}
err = p.cAdvisor(ctx, bearerToken)
if err != nil {
p.Log.Errorf("Unable to monitor pods with node scrape scope: %s", err.Error())
}
} else {
<-ctx.Done()
}
}
}
}()
return nil
}
func shouldScrapePod(pod *corev1.Pod, p *Prometheus) bool {
isCandidate := podReady(pod) &&
podHasMatchingNamespace(pod, p) &&
podHasMatchingLabelSelector(pod, p.podLabelSelector) &&
podHasMatchingFieldSelector(pod, p.podFieldSelector)
var shouldScrape bool
switch p.MonitorKubernetesPodsMethod {
case monitorMethodAnnotations: // must have 'true' annotation to be scraped
shouldScrape = pod.Annotations != nil && pod.Annotations["prometheus.io/scrape"] == "true"
case monitorMethodSettings: // will be scraped regardless of annotation
shouldScrape = true
case monitorMethodSettingsAndAnnotations: // will be scraped unless opts out with 'false' annotation
shouldScrape = pod.Annotations == nil || pod.Annotations["prometheus.io/scrape"] != "false"
}
return isCandidate && shouldScrape
}
// Share informer per namespace across all instances of this plugin
var informerfactory map[string]informers.SharedInformerFactory
// An edge case exists if a pod goes offline at the same time a new pod is created
// (without the scrape annotations). K8s may re-assign the old pod ip to the non-scrape
// pod, causing errors in the logs. This is only true if the pod going offline is not
// directed to do so by K8s.
func (p *Prometheus) watchPod(ctx context.Context, clientset *kubernetes.Clientset) error {
var resyncinterval time.Duration
if p.CacheRefreshInterval != 0 {
resyncinterval = time.Duration(p.CacheRefreshInterval) * time.Minute
} else {
resyncinterval = 60 * time.Minute
}
if informerfactory == nil {
informerfactory = make(map[string]informers.SharedInformerFactory)
}
var f informers.SharedInformerFactory
var ok bool
if f, ok = informerfactory[p.PodNamespace]; !ok {
var informerOptions []informers.SharedInformerOption
if p.PodNamespace != "" {
informerOptions = append(informerOptions, informers.WithNamespace(p.PodNamespace))
}
f = informers.NewSharedInformerFactoryWithOptions(clientset, resyncinterval, informerOptions...)
informerfactory[p.PodNamespace] = f
}
if p.nsAnnotationPass != nil || p.nsAnnotationDrop != nil {
p.nsStore = f.Core().V1().Namespaces().Informer().GetStore()
}
podinformer := f.Core().V1().Pods()
_, err := podinformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(newObj interface{}) {
newPod, ok := newObj.(*corev1.Pod)
if !ok {
p.Log.Errorf("[BUG] received unexpected object: %v", newObj)
return
}
if shouldScrapePod(newPod, p) {
registerPod(newPod, p)
}
},
// On Pod status updates and regular reList by Informer
UpdateFunc: func(_, newObj interface{}) {
newPod, ok := newObj.(*corev1.Pod)
if !ok {
p.Log.Errorf("[BUG] received unexpected object: %v", newObj)
return
}
key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(newObj)
if err != nil {
p.Log.Errorf("getting key from cache %s", err.Error())
}
podID := podID(key)
if shouldScrapePod(newPod, p) {
// When Informers re-Lists, pod might already be registered,
// do nothing if it is, register otherwise
if _, ok = p.kubernetesPods[podID]; !ok {
registerPod(newPod, p)
}
} else {
// Pods are largely immutable, but it's readiness status can change, unregister then
unregisterPod(podID, p)
}
},
DeleteFunc: func(oldObj interface{}) {
key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(oldObj)
if err == nil {
unregisterPod(podID(key), p)
}
},
})
f.Start(ctx.Done())
f.WaitForCacheSync(wait.NeverStop)
return err
}
func (p *Prometheus) cAdvisor(ctx context.Context, bearerToken string) error {
// The request will be the same each time
podsURL := fmt.Sprintf("https://%s:10250/pods", p.NodeIP)
req, err := http.NewRequest("GET", podsURL, nil)
if err != nil {
return fmt.Errorf("error when creating request to %s to get pod list: %w", podsURL, err)
}
req.Header.Set("Authorization", "Bearer "+bearerToken)
req.Header.Add("Accept", "application/json")
// Update right away so code is not waiting the length of the specified scrape interval initially
err = updateCadvisorPodList(p, req)
if err != nil {
return fmt.Errorf("error initially updating pod list: %w", err)
}
scrapeInterval := cAdvisorPodListDefaultInterval
if p.PodScrapeInterval != 0 {
scrapeInterval = p.PodScrapeInterval
}
for {
select {
case <-ctx.Done():
return nil
case <-time.After(time.Duration(scrapeInterval) * time.Second):
err := updateCadvisorPodList(p, req)
if err != nil {
return fmt.Errorf("error updating pod list: %w", err)
}
}
}
}
func updateCadvisorPodList(p *Prometheus, req *http.Request) error {
http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
httpClient := http.Client{}
resp, err := httpClient.Do(req)
if err != nil {
return fmt.Errorf("error when making request for pod list: %w", err)
}
// If err is nil, still check response code
if resp.StatusCode != 200 {
return fmt.Errorf("error when making request for pod list with status %s", resp.Status)
}
defer resp.Body.Close()
cadvisorPodsResponse := podResponse{}
// Will have expected type errors for some parts of corev1.Pod struct for some unused fields
// Instead have nil checks for every used field in case of incorrect decoding
if err := json.NewDecoder(resp.Body).Decode(&cadvisorPodsResponse); err != nil {
return fmt.Errorf("decoding response failed: %w", err)
}
pods := cadvisorPodsResponse.Items
// Updating pod list to be latest cadvisor response
p.lock.Lock()
p.kubernetesPods = make(map[podID]urlAndAddress)
// Register pod only if it has an annotation to scrape, if it is ready,
// and if namespace and selectors are specified and match
for _, pod := range pods {
if necessaryPodFieldsArePresent(pod) && shouldScrapePod(pod, p) {
registerPod(pod, p)
}
}
p.lock.Unlock()
// No errors
return nil
}
func necessaryPodFieldsArePresent(pod *corev1.Pod) bool {
return pod.Annotations != nil &&
pod.Labels != nil &&
pod.Status.ContainerStatuses != nil
}
/* See the docs on kubernetes label selectors:
* https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors
*/
func podHasMatchingLabelSelector(pod *corev1.Pod, labelSelector labels.Selector) bool {
if labelSelector == nil {
return true
}
var labelsSet labels.Set = pod.Labels
return labelSelector.Matches(labelsSet)
}
/* See ToSelectableFields() for list of fields that are selectable:
* https://github.com/kubernetes/kubernetes/release-1.20/pkg/registry/core/pod/strategy.go
* See docs on kubernetes field selectors:
* https://kubernetes.io/docs/concepts/overview/working-with-objects/field-selectors/
*/
func podHasMatchingFieldSelector(pod *corev1.Pod, fieldSelector fields.Selector) bool {
if fieldSelector == nil {
return true
}
fieldsSet := make(fields.Set)
fieldsSet["spec.nodeName"] = pod.Spec.NodeName
fieldsSet["spec.restartPolicy"] = string(pod.Spec.RestartPolicy)
fieldsSet["spec.schedulerName"] = pod.Spec.SchedulerName
fieldsSet["spec.serviceAccountName"] = pod.Spec.ServiceAccountName
fieldsSet["status.phase"] = string(pod.Status.Phase)
fieldsSet["status.podIP"] = pod.Status.PodIP
fieldsSet["status.nominatedNodeName"] = pod.Status.NominatedNodeName
return fieldSelector.Matches(fieldsSet)
}
// Get corev1.Namespace object by name
func getNamespaceObject(name string, p *Prometheus) *corev1.Namespace {
nsObj, exists, err := p.nsStore.GetByKey(name)
if err != nil {
p.Log.Errorf("Err fetching namespace '%s': %v", name, err)
return nil
} else if !exists {
return nil // can't happen
}
ns, ok := nsObj.(*corev1.Namespace)
if !ok {
p.Log.Errorf("[BUG] received unexpected object: %v", nsObj)
return nil
}
return ns
}
func namespaceAnnotationMatch(nsName string, p *Prometheus) bool {
// In case of no filtering or any issues with acquiring namespace information
// just let it pass trough...
if (p.nsAnnotationPass == nil && p.nsAnnotationDrop == nil) || p.nsStore == nil {
return true
}
ns := getNamespaceObject(nsName, p)
if ns == nil {
return true
}
tags := make([]*telegraf.Tag, 0, len(ns.Annotations))
for k, v := range ns.Annotations {
tags = append(tags, &telegraf.Tag{Key: k, Value: v})
}
return models.ShouldTagsPass(p.nsAnnotationPass, p.nsAnnotationDrop, tags)
}
/*
* If a namespace is specified and the pod doesn't have that namespace, return false
* Else return true
*/
func podHasMatchingNamespace(pod *corev1.Pod, p *Prometheus) bool {
return p.PodNamespace == "" || pod.Namespace == p.PodNamespace
}
func podReady(pod *corev1.Pod) bool {
for _, cond := range pod.Status.Conditions {
if cond.Type == corev1.PodReady {
return pod.Status.Phase == corev1.PodRunning
}
}
return false
}
func registerPod(pod *corev1.Pod, p *Prometheus) {
targetURL, err := getScrapeURL(pod, p)
if err != nil {
p.Log.Errorf("could not parse URL: %s", err)
return
} else if targetURL == nil {
return
}
p.Log.Debugf("will scrape metrics from %q", targetURL.String())
tags := make(map[string]string, len(pod.Annotations)+len(pod.Labels)+2)
// add annotation as metrics tags, subject to include/exclude filters
for k, v := range pod.Annotations {
if models.ShouldPassFilters(p.podAnnotationIncludeFilter, p.podAnnotationExcludeFilter, k) {
tags[k] = v
}
}
tags["pod_name"] = pod.Name
podNamespace := "namespace"
if p.PodNamespaceLabelName != "" {
podNamespace = p.PodNamespaceLabelName
}
tags[podNamespace] = pod.Namespace
// add labels as metrics tags, subject to include/exclude filters
for k, v := range pod.Labels {
if models.ShouldPassFilters(p.podLabelIncludeFilter, p.podLabelExcludeFilter, k) {
tags[k] = v
}
}
podURL := addressToURL(targetURL, targetURL.Hostname())
// Locks earlier if using cAdvisor calls - makes a new list each time
// rather than updating and removing from the same list
if !p.isNodeScrapeScope {
p.lock.Lock()
defer p.lock.Unlock()
}
p.kubernetesPods[podID(pod.GetNamespace()+"/"+pod.GetName())] = urlAndAddress{
url: podURL,
address: targetURL.Hostname(),
originalURL: targetURL,
tags: tags,
namespace: pod.GetNamespace(),
}
}
func getScrapeURL(pod *corev1.Pod, p *Prometheus) (*url.URL, error) {
ip := pod.Status.PodIP
if ip == "" {
// return as if scrape was disabled, we will be notified again once the pod
// has an IP
return nil, nil
}
var scheme, pathAndQuery, port string
if p.MonitorKubernetesPodsMethod == monitorMethodSettings ||
p.MonitorKubernetesPodsMethod == monitorMethodSettingsAndAnnotations {
scheme = p.MonitorKubernetesPodsScheme
pathAndQuery = p.MonitorKubernetesPodsPath
port = strconv.Itoa(p.MonitorKubernetesPodsPort)
}
if p.MonitorKubernetesPodsMethod == monitorMethodAnnotations ||
p.MonitorKubernetesPodsMethod == monitorMethodSettingsAndAnnotations {
if ann := pod.Annotations["prometheus.io/scheme"]; ann != "" {
scheme = ann
}
if ann := pod.Annotations["prometheus.io/path"]; ann != "" {
pathAndQuery = ann
}
if ann := pod.Annotations["prometheus.io/port"]; ann != "" {
port = ann
}
}
if scheme == "" {
scheme = "http"
}
if port == "" || port == "0" {
port = "9102"
}
if pathAndQuery == "" {
pathAndQuery = "/metrics"
}
base, err := url.Parse(pathAndQuery)
if err != nil {
return nil, err
}
base.Scheme = scheme
base.Host = net.JoinHostPort(ip, port)
return base, nil
}
func unregisterPod(podID podID, p *Prometheus) {
p.lock.Lock()
defer p.lock.Unlock()
if v, ok := p.kubernetesPods[podID]; ok {
p.Log.Debugf("registered a delete request for %s", podID)
delete(p.kubernetesPods, podID)
p.Log.Debugf("will stop scraping for %q", v.url.String())
}
}

View file

@ -0,0 +1,368 @@
package prometheus
import (
"testing"
"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/tools/cache"
"github.com/influxdata/telegraf/testutil"
)
func initPrometheus() *Prometheus {
prom := &Prometheus{Log: testutil.Logger{}}
prom.MonitorKubernetesPodsScheme = "http"
prom.MonitorKubernetesPodsPort = 9102
prom.MonitorKubernetesPodsPath = "/metrics"
prom.MonitorKubernetesPodsMethod = monitorMethodAnnotations
prom.kubernetesPods = map[podID]urlAndAddress{}
return prom
}
func TestScrapeURLNoAnnotations(t *testing.T) {
prom := &Prometheus{Log: testutil.Logger{}}
p := &corev1.Pod{ObjectMeta: metav1.ObjectMeta{}}
p.Annotations = map[string]string{}
url, err := getScrapeURL(p, prom)
require.NoError(t, err)
require.Nil(t, url)
}
func TestScrapeURLNoAnnotationsScrapeConfig(t *testing.T) {
prom := initPrometheus()
prom.MonitorKubernetesPodsMethod = monitorMethodSettingsAndAnnotations
p := pod()
p.Annotations = map[string]string{}
url, err := getScrapeURL(p, prom)
require.NoError(t, err)
require.Equal(t, "http://127.0.0.1:9102/metrics", url.String())
}
func TestScrapeURLScrapeConfigCustom(t *testing.T) {
prom := initPrometheus()
prom.MonitorKubernetesPodsMethod = monitorMethodSettingsAndAnnotations
prom.MonitorKubernetesPodsScheme = "https"
prom.MonitorKubernetesPodsPort = 9999
prom.MonitorKubernetesPodsPath = "/svc/metrics"
p := pod()
url, err := getScrapeURL(p, prom)
require.NoError(t, err)
require.Equal(t, "https://127.0.0.1:9999/svc/metrics", url.String())
}
func TestScrapeURLAnnotations(t *testing.T) {
prom := &Prometheus{Log: testutil.Logger{}}
p := pod()
url, err := getScrapeURL(p, prom)
require.NoError(t, err)
require.Equal(t, "http://127.0.0.1:9102/metrics", url.String())
}
func TestScrapeURLAnnotationsScrapeConfig(t *testing.T) {
prom := initPrometheus()
prom.MonitorKubernetesPodsMethod = monitorMethodSettingsAndAnnotations
p := pod()
url, err := getScrapeURL(p, prom)
require.NoError(t, err)
require.Equal(t, "http://127.0.0.1:9102/metrics", url.String())
}
func TestScrapeURLAnnotationsCustomPort(t *testing.T) {
prom := initPrometheus()
p := pod()
p.Annotations = map[string]string{"prometheus.io/port": "9000"}
url, err := getScrapeURL(p, prom)
require.NoError(t, err)
require.Equal(t, "http://127.0.0.1:9000/metrics", url.String())
}
func TestScrapeURLAnnotationsCustomPortScrapeConfig(t *testing.T) {
prom := initPrometheus()
prom.MonitorKubernetesPodsMethod = monitorMethodSettingsAndAnnotations
p := pod()
p.Annotations = map[string]string{"prometheus.io/port": "9000"}
url, err := getScrapeURL(p, prom)
require.NoError(t, err)
require.Equal(t, "http://127.0.0.1:9000/metrics", url.String())
}
func TestScrapeURLAnnotationsCustomPath(t *testing.T) {
prom := initPrometheus()
p := pod()
p.Annotations = map[string]string{"prometheus.io/path": "mymetrics"}
url, err := getScrapeURL(p, prom)
require.NoError(t, err)
require.Equal(t, "http://127.0.0.1:9102/mymetrics", url.String())
}
func TestScrapeURLAnnotationsCustomPathWithSep(t *testing.T) {
prom := initPrometheus()
p := pod()
p.Annotations = map[string]string{"prometheus.io/path": "/mymetrics"}
url, err := getScrapeURL(p, prom)
require.NoError(t, err)
require.Equal(t, "http://127.0.0.1:9102/mymetrics", url.String())
}
func TestScrapeURLAnnotationsCustomPathWithQueryParameters(t *testing.T) {
prom := initPrometheus()
p := pod()
p.Annotations = map[string]string{"prometheus.io/path": "/v1/agent/metrics?format=prometheus"}
url, err := getScrapeURL(p, prom)
require.NoError(t, err)
require.Equal(t, "http://127.0.0.1:9102/v1/agent/metrics?format=prometheus", url.String())
}
func TestScrapeURLAnnotationsCustomPathWithFragment(t *testing.T) {
prom := initPrometheus()
p := pod()
p.Annotations = map[string]string{"prometheus.io/path": "/v1/agent/metrics#prometheus"}
url, err := getScrapeURL(p, prom)
require.NoError(t, err)
require.Equal(t, "http://127.0.0.1:9102/v1/agent/metrics#prometheus", url.String())
}
func TestAddPod(t *testing.T) {
prom := &Prometheus{Log: testutil.Logger{}, kubernetesPods: map[podID]urlAndAddress{}}
p := pod()
p.Annotations = map[string]string{"prometheus.io/scrape": "true"}
registerPod(p, prom)
require.Len(t, prom.kubernetesPods, 1)
}
func TestAddPodScrapeConfig(t *testing.T) {
prom := initPrometheus()
prom.MonitorKubernetesPodsMethod = monitorMethodSettingsAndAnnotations
p := pod()
p.Annotations = map[string]string{}
registerPod(p, prom)
require.Len(t, prom.kubernetesPods, 1)
}
func TestAddMultipleDuplicatePods(t *testing.T) {
prom := &Prometheus{Log: testutil.Logger{}, kubernetesPods: map[podID]urlAndAddress{}}
p := pod()
p.Annotations = map[string]string{"prometheus.io/scrape": "true"}
registerPod(p, prom)
p.Name = "Pod2"
registerPod(p, prom)
urls, err := prom.getAllURLs()
require.NoError(t, err)
require.Len(t, urls, 1)
}
func TestAddMultiplePods(t *testing.T) {
prom := &Prometheus{Log: testutil.Logger{}, kubernetesPods: map[podID]urlAndAddress{}}
p := pod()
p.Annotations = map[string]string{"prometheus.io/scrape": "true"}
registerPod(p, prom)
p.Name = "Pod2"
p.Status.PodIP = "127.0.0.2"
registerPod(p, prom)
require.Len(t, prom.kubernetesPods, 2)
}
func TestDeletePods(t *testing.T) {
prom := &Prometheus{Log: testutil.Logger{}, kubernetesPods: map[podID]urlAndAddress{}}
p := pod()
p.Annotations = map[string]string{"prometheus.io/scrape": "true"}
registerPod(p, prom)
id, err := cache.MetaNamespaceKeyFunc(p)
require.NoError(t, err)
unregisterPod(podID(id), prom)
require.Empty(t, prom.kubernetesPods)
}
func TestKeepDefaultNamespaceLabelName(t *testing.T) {
prom := &Prometheus{Log: testutil.Logger{}, kubernetesPods: map[podID]urlAndAddress{}}
p := pod()
p.Annotations = map[string]string{"prometheus.io/scrape": "true"}
registerPod(p, prom)
id, err := cache.MetaNamespaceKeyFunc(p)
require.NoError(t, err)
tags := prom.kubernetesPods[podID(id)].tags
require.Equal(t, "default", tags["namespace"])
}
func TestChangeNamespaceLabelName(t *testing.T) {
prom := &Prometheus{Log: testutil.Logger{}, PodNamespaceLabelName: "pod_namespace", kubernetesPods: map[podID]urlAndAddress{}}
p := pod()
p.Annotations = map[string]string{"prometheus.io/scrape": "true"}
registerPod(p, prom)
id, err := cache.MetaNamespaceKeyFunc(p)
require.NoError(t, err)
tags := prom.kubernetesPods[podID(id)].tags
require.Equal(t, "default", tags["pod_namespace"])
require.Empty(t, tags["namespace"])
}
func TestPodHasMatchingNamespace(t *testing.T) {
prom := &Prometheus{Log: testutil.Logger{}, PodNamespace: "default"}
pod := pod()
pod.Name = "Pod1"
pod.Namespace = "default"
shouldMatch := podHasMatchingNamespace(pod, prom)
require.True(t, shouldMatch)
pod.Name = "Pod2"
pod.Namespace = "namespace"
shouldNotMatch := podHasMatchingNamespace(pod, prom)
require.False(t, shouldNotMatch)
}
func TestPodHasMatchingLabelSelector(t *testing.T) {
labelSelectorString := "label0==label0,label1=label1,label2!=label,label3 in (label1,label2, label3),label4 notin (label1, label2,label3),label5,!label6"
prom := &Prometheus{Log: testutil.Logger{}, KubernetesLabelSelector: labelSelectorString}
pod := pod()
pod.Labels = make(map[string]string)
pod.Labels["label0"] = "label0"
pod.Labels["label1"] = "label1"
pod.Labels["label2"] = "label2"
pod.Labels["label3"] = "label3"
pod.Labels["label4"] = "label4"
pod.Labels["label5"] = "label5"
labelSelector, err := labels.Parse(prom.KubernetesLabelSelector)
require.NoError(t, err)
require.True(t, podHasMatchingLabelSelector(pod, labelSelector))
}
func TestPodHasMatchingFieldSelector(t *testing.T) {
fieldSelectorString := "status.podIP=127.0.0.1,spec.restartPolicy=Always,spec.NodeName!=nodeName"
prom := &Prometheus{Log: testutil.Logger{}, KubernetesFieldSelector: fieldSelectorString}
pod := pod()
pod.Spec.RestartPolicy = "Always"
pod.Spec.NodeName = "node1000"
fieldSelector, err := fields.ParseSelector(prom.KubernetesFieldSelector)
require.NoError(t, err)
require.True(t, podHasMatchingFieldSelector(pod, fieldSelector))
}
func TestInvalidFieldSelector(t *testing.T) {
fieldSelectorString := "status.podIP=127.0.0.1,spec.restartPolicy=Always,spec.NodeName!=nodeName,spec.nodeName"
prom := &Prometheus{Log: testutil.Logger{}, KubernetesFieldSelector: fieldSelectorString}
pod := pod()
pod.Spec.RestartPolicy = "Always"
pod.Spec.NodeName = "node1000"
_, err := fields.ParseSelector(prom.KubernetesFieldSelector)
require.Error(t, err)
}
func TestAnnotationFilters(t *testing.T) {
p := pod()
p.Annotations = map[string]string{
"prometheus.io/scrape": "true",
"includeme": "true",
"excludeme": "true",
"neutral": "true",
}
cases := []struct {
desc string
include []string
exclude []string
expectedTags []string
}{
{"Just include",
[]string{"includeme"},
nil,
[]string{"includeme"}},
{"Just exclude",
nil,
[]string{"excludeme"},
[]string{"includeme", "neutral"}},
{"Include & exclude",
[]string{"includeme"},
[]string{"exludeme"},
[]string{"includeme"}},
}
for _, tc := range cases {
t.Run(tc.desc, func(t *testing.T) {
prom := &Prometheus{Log: testutil.Logger{}, kubernetesPods: map[podID]urlAndAddress{}}
prom.PodAnnotationInclude = tc.include
prom.PodAnnotationExclude = tc.exclude
require.NoError(t, prom.initFilters())
registerPod(p, prom)
for _, pd := range prom.kubernetesPods {
for _, tagKey := range tc.expectedTags {
require.Contains(t, pd.tags, tagKey)
}
}
})
}
}
func TestLabelFilters(t *testing.T) {
p := pod()
p.Annotations = map[string]string{"prometheus.io/scrape": "true"}
p.Labels = map[string]string{
"includeme": "true",
"excludeme": "true",
"neutral": "true",
}
cases := []struct {
desc string
include []string
exclude []string
expectedTags []string
}{
{"Just include",
[]string{"includeme"},
nil,
[]string{"includeme"}},
{"Just exclude",
nil,
[]string{"excludeme"},
[]string{"includeme", "neutral"}},
{"Include & exclude",
[]string{"includeme"},
[]string{"exludeme"},
[]string{"includeme"}},
}
for _, tc := range cases {
t.Run(tc.desc, func(t *testing.T) {
prom := &Prometheus{Log: testutil.Logger{}, kubernetesPods: map[podID]urlAndAddress{}}
prom.PodLabelInclude = tc.include
prom.PodLabelExclude = tc.exclude
require.NoError(t, prom.initFilters())
registerPod(p, prom)
for _, pd := range prom.kubernetesPods {
for _, tagKey := range tc.expectedTags {
require.Contains(t, pd.tags, tagKey)
}
}
})
}
}
func pod() *corev1.Pod {
p := &corev1.Pod{ObjectMeta: metav1.ObjectMeta{}, Status: corev1.PodStatus{}, Spec: corev1.PodSpec{}}
p.Status.PodIP = "127.0.0.1"
p.Name = "myPod"
p.Namespace = "default"
return p
}

View file

@ -0,0 +1,638 @@
//go:generate ../../../tools/readme_config_includer/generator
package prometheus
import (
"context"
_ "embed"
"errors"
"fmt"
"io"
"net"
"net/http"
"net/url"
"os"
"strings"
"sync"
"time"
"github.com/prometheus/common/expfmt"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/tools/cache"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/config"
"github.com/influxdata/telegraf/filter"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/models"
common_http "github.com/influxdata/telegraf/plugins/common/http"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/parsers/openmetrics"
parsers_prometheus "github.com/influxdata/telegraf/plugins/parsers/prometheus"
)
//go:embed sample.conf
var sampleConfig string
const (
acceptHeader = `application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited;q=0.7,text/plain;version=0.0.4;q=0.3`
monitorMethodNone monitorMethod = ""
monitorMethodAnnotations monitorMethod = "annotations"
monitorMethodSettings monitorMethod = "settings"
monitorMethodSettingsAndAnnotations monitorMethod = "settings+annotations"
)
type Prometheus struct {
URLs []string `toml:"urls"`
BearerToken string `toml:"bearer_token"`
BearerTokenString config.Secret `toml:"bearer_token_string"`
Username config.Secret `toml:"username"`
Password config.Secret `toml:"password"`
HTTPHeaders map[string]string `toml:"http_headers"`
ContentLengthLimit config.Size `toml:"content_length_limit"`
ContentTypeOverride string `toml:"content_type_override"`
EnableRequestMetrics bool `toml:"enable_request_metrics"`
MetricVersion int `toml:"metric_version"`
URLTag string `toml:"url_tag"`
IgnoreTimestamp bool `toml:"ignore_timestamp"`
// Kubernetes service discovery
MonitorPods bool `toml:"monitor_kubernetes_pods"`
PodScrapeScope string `toml:"pod_scrape_scope"`
NodeIP string `toml:"node_ip"`
PodScrapeInterval int `toml:"pod_scrape_interval"`
PodNamespace string `toml:"monitor_kubernetes_pods_namespace"`
PodNamespaceLabelName string `toml:"pod_namespace_label_name"`
KubernetesServices []string `toml:"kubernetes_services"`
KubeConfig string `toml:"kube_config"`
KubernetesLabelSelector string `toml:"kubernetes_label_selector"`
KubernetesFieldSelector string `toml:"kubernetes_field_selector"`
MonitorKubernetesPodsMethod monitorMethod `toml:"monitor_kubernetes_pods_method"`
MonitorKubernetesPodsScheme string `toml:"monitor_kubernetes_pods_scheme"`
MonitorKubernetesPodsPath string `toml:"monitor_kubernetes_pods_path"`
MonitorKubernetesPodsPort int `toml:"monitor_kubernetes_pods_port"`
NamespaceAnnotationPass map[string][]string `toml:"namespace_annotation_pass"`
NamespaceAnnotationDrop map[string][]string `toml:"namespace_annotation_drop"`
PodAnnotationInclude []string `toml:"pod_annotation_include"`
PodAnnotationExclude []string `toml:"pod_annotation_exclude"`
PodLabelInclude []string `toml:"pod_label_include"`
PodLabelExclude []string `toml:"pod_label_exclude"`
CacheRefreshInterval int `toml:"cache_refresh_interval"`
// Consul discovery
ConsulConfig consulConfig `toml:"consul"`
Log telegraf.Logger `toml:"-"`
common_http.HTTPClientConfig
client *http.Client
headers map[string]string
contentType string
nsStore cache.Store
nsAnnotationPass []models.TagFilter
nsAnnotationDrop []models.TagFilter
// Should we scrape Kubernetes services for prometheus annotations
lock sync.Mutex
kubernetesPods map[podID]urlAndAddress
cancel context.CancelFunc
wg sync.WaitGroup
// Only for monitor_kubernetes_pods=true and pod_scrape_scope="node"
podLabelSelector labels.Selector
podFieldSelector fields.Selector
isNodeScrapeScope bool
podAnnotationIncludeFilter filter.Filter
podAnnotationExcludeFilter filter.Filter
podLabelIncludeFilter filter.Filter
podLabelExcludeFilter filter.Filter
// List of consul services to scrape
consulServices map[string]urlAndAddress
}
type urlAndAddress struct {
originalURL *url.URL
url *url.URL
address string
tags map[string]string
namespace string
}
type monitorMethod string
type podID string
func (*Prometheus) SampleConfig() string {
return sampleConfig
}
func (p *Prometheus) Init() error {
// Setup content-type override if requested
switch p.ContentTypeOverride {
case "": // No override
case "text":
p.contentType = string(expfmt.NewFormat(expfmt.TypeTextPlain))
case "protobuf-delimiter":
p.contentType = string(expfmt.NewFormat(expfmt.TypeProtoDelim))
case "protobuf-compact":
p.contentType = string(expfmt.NewFormat(expfmt.TypeProtoCompact))
case "protobuf-text":
p.contentType = string(expfmt.NewFormat(expfmt.TypeProtoText))
case "openmetrics-text":
f, err := expfmt.NewOpenMetricsFormat(expfmt.OpenMetricsVersion_1_0_0)
if err != nil {
return err
}
p.contentType = string(f)
case "openmetrics-protobuf":
p.contentType = "application/openmetrics-protobuf;version=1.0.0"
default:
return fmt.Errorf("invalid 'content_type_override' setting %q", p.ContentTypeOverride)
}
// Config processing for node scrape scope for monitor_kubernetes_pods
p.isNodeScrapeScope = strings.EqualFold(p.PodScrapeScope, "node")
if p.isNodeScrapeScope {
// Need node IP to make cAdvisor call for pod list. Check if set in config and valid IP address
if p.NodeIP == "" || net.ParseIP(p.NodeIP) == nil {
p.Log.Infof("The config node_ip is empty or invalid. Using NODE_IP env var as default.")
// Check if set as env var and is valid IP address
envVarNodeIP := os.Getenv("NODE_IP")
if envVarNodeIP == "" || net.ParseIP(envVarNodeIP) == nil {
return errors.New("the node_ip config and the environment variable NODE_IP are not set or invalid; " +
"cannot get pod list for monitor_kubernetes_pods using node scrape scope")
}
p.NodeIP = envVarNodeIP
}
p.Log.Infof("Using pod scrape scope at node level to get pod list using cAdvisor.")
}
if p.MonitorKubernetesPodsMethod == monitorMethodNone {
p.MonitorKubernetesPodsMethod = monitorMethodAnnotations
}
// Parse label and field selectors - will be used to filter pods after cAdvisor call
var err error
p.podLabelSelector, err = labels.Parse(p.KubernetesLabelSelector)
if err != nil {
return fmt.Errorf("error parsing the specified label selector(s): %w", err)
}
p.podFieldSelector, err = fields.ParseSelector(p.KubernetesFieldSelector)
if err != nil {
return fmt.Errorf("error parsing the specified field selector(s): %w", err)
}
isValid, invalidSelector := fieldSelectorIsSupported(p.podFieldSelector)
if !isValid {
return fmt.Errorf("the field selector %q is not supported for pods", invalidSelector)
}
if p.KubernetesLabelSelector != "" {
p.Log.Debugf("Using the label selector: %v", p.podLabelSelector)
}
if p.KubernetesFieldSelector != "" {
p.Log.Debugf("Using the field selector: %v", p.podFieldSelector)
}
for k, vs := range p.NamespaceAnnotationPass {
tagFilter := models.TagFilter{}
tagFilter.Name = k
tagFilter.Values = append(tagFilter.Values, vs...)
if err := tagFilter.Compile(); err != nil {
return fmt.Errorf("error compiling 'namespace_annotation_pass', %w", err)
}
p.nsAnnotationPass = append(p.nsAnnotationPass, tagFilter)
}
for k, vs := range p.NamespaceAnnotationDrop {
tagFilter := models.TagFilter{}
tagFilter.Name = k
tagFilter.Values = append(tagFilter.Values, vs...)
if err := tagFilter.Compile(); err != nil {
return fmt.Errorf("error compiling 'namespace_annotation_drop', %w", err)
}
p.nsAnnotationDrop = append(p.nsAnnotationDrop, tagFilter)
}
if err := p.initFilters(); err != nil {
return err
}
if p.MetricVersion == 0 {
p.MetricVersion = 1
}
ctx := context.Background()
client, err := p.HTTPClientConfig.CreateClient(ctx, p.Log)
if err != nil {
return err
}
p.client = client
if p.HTTPClientConfig.ResponseHeaderTimeout != 0 {
p.Log.Warn(
"Config option response_timeout was set to non-zero value. This option's behavior was " +
"changed in Telegraf 1.30.2 and now controls the HTTP client's header timeout and " +
"not the Prometheus timeout. Users can ignore this warning if that was the intention. " +
"Otherwise, please use the timeout config option for the Prometheus timeout.",
)
}
p.headers = map[string]string{
"User-Agent": internal.ProductToken(),
"Accept": acceptHeader,
}
p.kubernetesPods = make(map[podID]urlAndAddress)
return nil
}
// Start will start the Kubernetes and/or Consul scraping if enabled in the configuration
func (p *Prometheus) Start(_ telegraf.Accumulator) error {
var ctx context.Context
p.wg = sync.WaitGroup{}
ctx, p.cancel = context.WithCancel(context.Background())
if p.ConsulConfig.Enabled && len(p.ConsulConfig.Queries) > 0 {
if err := p.startConsul(ctx); err != nil {
return err
}
}
if p.MonitorPods {
if err := p.startK8s(ctx); err != nil {
return err
}
}
return nil
}
func (p *Prometheus) Gather(acc telegraf.Accumulator) error {
var wg sync.WaitGroup
allURLs, err := p.getAllURLs()
if err != nil {
return err
}
for _, URL := range allURLs {
wg.Add(1)
go func(serviceURL urlAndAddress) {
defer wg.Done()
requestFields, tags, err := p.gatherURL(serviceURL, acc)
acc.AddError(err)
// Add metrics
if p.EnableRequestMetrics {
acc.AddFields("prometheus_request", requestFields, tags)
}
}(URL)
}
wg.Wait()
return nil
}
func (p *Prometheus) Stop() {
p.cancel()
p.wg.Wait()
if p.client != nil {
p.client.CloseIdleConnections()
}
}
func (p *Prometheus) initFilters() error {
if p.PodAnnotationExclude != nil {
podAnnotationExclude, err := filter.Compile(p.PodAnnotationExclude)
if err != nil {
return fmt.Errorf("error compiling 'pod_annotation_exclude': %w", err)
}
p.podAnnotationExcludeFilter = podAnnotationExclude
}
if p.PodAnnotationInclude != nil {
podAnnotationInclude, err := filter.Compile(p.PodAnnotationInclude)
if err != nil {
return fmt.Errorf("error compiling 'pod_annotation_include': %w", err)
}
p.podAnnotationIncludeFilter = podAnnotationInclude
}
if p.PodLabelExclude != nil {
podLabelExclude, err := filter.Compile(p.PodLabelExclude)
if err != nil {
return fmt.Errorf("error compiling 'pod_label_exclude': %w", err)
}
p.podLabelExcludeFilter = podLabelExclude
}
if p.PodLabelInclude != nil {
podLabelInclude, err := filter.Compile(p.PodLabelInclude)
if err != nil {
return fmt.Errorf("error compiling 'pod_label_include': %w", err)
}
p.podLabelIncludeFilter = podLabelInclude
}
return nil
}
func addressToURL(u *url.URL, address string) *url.URL {
host := address
if u.Port() != "" {
host = address + ":" + u.Port()
}
reconstructedURL := &url.URL{
Scheme: u.Scheme,
Opaque: u.Opaque,
User: u.User,
Path: u.Path,
RawPath: u.RawPath,
ForceQuery: u.ForceQuery,
RawQuery: u.RawQuery,
Fragment: u.Fragment,
Host: host,
}
return reconstructedURL
}
func (p *Prometheus) getAllURLs() (map[string]urlAndAddress, error) {
allURLs := make(map[string]urlAndAddress, len(p.URLs)+len(p.consulServices)+len(p.kubernetesPods))
for _, u := range p.URLs {
address, err := url.Parse(u)
if err != nil {
p.Log.Errorf("Could not parse %q, skipping it. Error: %s", u, err.Error())
continue
}
allURLs[address.String()] = urlAndAddress{url: address, originalURL: address}
}
p.lock.Lock()
defer p.lock.Unlock()
// add all services collected from consul
for k, v := range p.consulServices {
allURLs[k] = v
}
// loop through all pods scraped via the prometheus annotation on the pods
for _, v := range p.kubernetesPods {
if namespaceAnnotationMatch(v.namespace, p) {
allURLs[v.url.String()] = v
}
}
for _, service := range p.KubernetesServices {
address, err := url.Parse(service)
if err != nil {
return nil, err
}
resolvedAddresses, err := net.LookupHost(address.Hostname())
if err != nil {
p.Log.Errorf("Could not resolve %q, skipping it. Error: %s", address.Host, err.Error())
continue
}
for _, resolved := range resolvedAddresses {
serviceURL := addressToURL(address, resolved)
allURLs[serviceURL.String()] = urlAndAddress{
url: serviceURL,
address: resolved,
originalURL: address,
}
}
}
return allURLs, nil
}
func (p *Prometheus) gatherURL(u urlAndAddress, acc telegraf.Accumulator) (map[string]interface{}, map[string]string, error) {
var req *http.Request
var uClient *http.Client
requestFields := make(map[string]interface{})
tags := make(map[string]string, len(u.tags)+2)
if p.URLTag != "" {
tags[p.URLTag] = u.originalURL.String()
}
if u.address != "" {
tags["address"] = u.address
}
for k, v := range u.tags {
tags[k] = v
}
if u.url.Scheme == "unix" {
path := u.url.Query().Get("path")
if path == "" {
path = "/metrics"
}
var err error
addr := "http://localhost" + path
req, err = http.NewRequest("GET", addr, nil)
if err != nil {
return nil, nil, fmt.Errorf("unable to create new request %q: %w", addr, err)
}
//nolint:errcheck // ignore error because it's been handled before getting here
tlsCfg, _ := p.HTTPClientConfig.TLSConfig()
uClient = &http.Client{
Transport: &http.Transport{
TLSClientConfig: tlsCfg,
DisableKeepAlives: true,
Dial: func(string, string) (net.Conn, error) {
c, err := net.Dial("unix", u.url.Path)
return c, err
},
},
}
} else {
if u.url.Path == "" {
u.url.Path = "/metrics"
}
var err error
req, err = http.NewRequest("GET", u.url.String(), nil)
if err != nil {
return nil, nil, fmt.Errorf("unable to create new request %q: %w", u.url.String(), err)
}
}
p.addHeaders(req)
if p.BearerToken != "" {
token, err := os.ReadFile(p.BearerToken)
if err != nil {
return nil, nil, err
}
req.Header.Set("Authorization", "Bearer "+string(token))
} else if !p.BearerTokenString.Empty() {
token, err := p.BearerTokenString.Get()
if err != nil {
return nil, nil, fmt.Errorf("getting token secret failed: %w", err)
}
req.Header.Set("Authorization", "Bearer "+token.String())
token.Destroy()
} else if !p.Username.Empty() || !p.Password.Empty() {
username, err := p.Username.Get()
if err != nil {
return nil, nil, fmt.Errorf("getting username secret failed: %w", err)
}
password, err := p.Password.Get()
if err != nil {
return nil, nil, fmt.Errorf("getting password secret failed: %w", err)
}
req.SetBasicAuth(username.String(), password.String())
username.Destroy()
password.Destroy()
}
for key, value := range p.HTTPHeaders {
if strings.EqualFold(key, "host") {
req.Host = value
} else {
req.Header.Set(key, value)
}
}
var err error
var resp *http.Response
var start time.Time
if u.url.Scheme != "unix" {
start = time.Now()
resp, err = p.client.Do(req)
} else {
start = time.Now()
resp, err = uClient.Do(req)
}
end := time.Since(start).Seconds()
if err != nil {
return requestFields, tags, fmt.Errorf("error making HTTP request to %q: %w", u.url, err)
}
requestFields["response_time"] = end
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return requestFields, tags, fmt.Errorf("%q returned HTTP status %q", u.url, resp.Status)
}
var body []byte
if p.ContentLengthLimit != 0 {
limit := int64(p.ContentLengthLimit)
// To determine whether io.ReadAll() ended due to EOF or reached the specified limit,
// read up to the specified limit plus one extra byte, and then make a decision based
// on the length of the result.
lr := io.LimitReader(resp.Body, limit+1)
body, err = io.ReadAll(lr)
if err != nil {
return requestFields, tags, fmt.Errorf("error reading body: %w", err)
}
if int64(len(body)) > limit {
p.Log.Infof("skipping %s: content length exceeded maximum body size (%d)", u.url, limit)
return requestFields, tags, nil
}
} else {
body, err = io.ReadAll(resp.Body)
if err != nil {
return requestFields, tags, fmt.Errorf("error reading body: %w", err)
}
}
requestFields["content_length"] = len(body)
// Override the response format if the user requested it
if p.contentType != "" {
resp.Header.Set("Content-Type", p.contentType)
}
// Parse the metrics
var metricParser telegraf.Parser
if openmetrics.AcceptsContent(resp.Header) {
metricParser = &openmetrics.Parser{
Header: resp.Header,
MetricVersion: p.MetricVersion,
IgnoreTimestamp: p.IgnoreTimestamp,
Log: p.Log,
}
} else {
metricParser = &parsers_prometheus.Parser{
Header: resp.Header,
MetricVersion: p.MetricVersion,
IgnoreTimestamp: p.IgnoreTimestamp,
Log: p.Log,
}
}
metrics, err := metricParser.Parse(body)
if err != nil {
return requestFields, tags, fmt.Errorf("error reading metrics for %q: %w", u.url, err)
}
for _, metric := range metrics {
tags := metric.Tags()
// strip user and password from URL
u.originalURL.User = nil
if p.URLTag != "" {
tags[p.URLTag] = u.originalURL.String()
}
if u.address != "" {
tags["address"] = u.address
}
for k, v := range u.tags {
tags[k] = v
}
switch metric.Type() {
case telegraf.Counter:
acc.AddCounter(metric.Name(), metric.Fields(), tags, metric.Time())
case telegraf.Gauge:
acc.AddGauge(metric.Name(), metric.Fields(), tags, metric.Time())
case telegraf.Summary:
acc.AddSummary(metric.Name(), metric.Fields(), tags, metric.Time())
case telegraf.Histogram:
acc.AddHistogram(metric.Name(), metric.Fields(), tags, metric.Time())
default:
acc.AddFields(metric.Name(), metric.Fields(), tags, metric.Time())
}
}
return requestFields, tags, nil
}
func (p *Prometheus) addHeaders(req *http.Request) {
for header, value := range p.headers {
req.Header.Add(header, value)
}
}
/* Check if the field selector specified is valid.
* See ToSelectableFields() for list of fields that are selectable:
* https://github.com/kubernetes/kubernetes/release-1.20/pkg/registry/core/pod/strategy.go
*/
func fieldSelectorIsSupported(fieldSelector fields.Selector) (bool, string) {
supportedFieldsToSelect := map[string]bool{
"spec.nodeName": true,
"spec.restartPolicy": true,
"spec.schedulerName": true,
"spec.serviceAccountName": true,
"status.phase": true,
"status.podIP": true,
"status.nominatedNodeName": true,
}
for _, requirement := range fieldSelector.Requirements() {
if !supportedFieldsToSelect[requirement.Field] {
return false, requirement.Field
}
}
return true, ""
}
func init() {
inputs.Add("prometheus", func() telegraf.Input {
return &Prometheus{
kubernetesPods: make(map[podID]urlAndAddress),
consulServices: make(map[string]urlAndAddress),
URLTag: "url",
}
})
}

View file

@ -0,0 +1,942 @@
package prometheus
import (
"errors"
"fmt"
"math"
"net/http"
"net/http/httptest"
"net/url"
"os"
"path/filepath"
"testing"
"time"
"github.com/stretchr/testify/require"
"k8s.io/apimachinery/pkg/fields"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/metric"
"github.com/influxdata/telegraf/testutil"
)
const sampleTextFormat = `# HELP go_gc_duration_seconds A summary of the GC invocation durations.
# TYPE go_gc_duration_seconds summary
go_gc_duration_seconds{quantile="0"} 0.00010425500000000001
go_gc_duration_seconds{quantile="0.25"} 0.000139108
go_gc_duration_seconds{quantile="0.5"} 0.00015749400000000002
go_gc_duration_seconds{quantile="0.75"} 0.000331463
go_gc_duration_seconds{quantile="1"} 0.000667154
go_gc_duration_seconds_sum 0.0018183950000000002
go_gc_duration_seconds_count 7
# HELP go_goroutines Number of goroutines that currently exist.
# TYPE go_goroutines gauge
go_goroutines 15
# HELP test_metric An untyped metric with a timestamp
# TYPE test_metric untyped
test_metric{label="value"} 1.0 1490802350000`
const sampleSummaryTextFormat = `# HELP go_gc_duration_seconds A summary of the GC invocation durations.
# TYPE go_gc_duration_seconds summary
go_gc_duration_seconds{quantile="0"} 0.00010425500000000001
go_gc_duration_seconds{quantile="0.25"} 0.000139108
go_gc_duration_seconds{quantile="0.5"} 0.00015749400000000002
go_gc_duration_seconds{quantile="0.75"} 0.000331463
go_gc_duration_seconds{quantile="1"} 0.000667154
go_gc_duration_seconds_sum 0.0018183950000000002
go_gc_duration_seconds_count 7`
const sampleGaugeTextFormat = `
# HELP go_goroutines Number of goroutines that currently exist.
# TYPE go_goroutines gauge
go_goroutines 15 1490802350000`
func TestPrometheusGeneratesMetrics(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
if _, err := fmt.Fprintln(w, sampleTextFormat); err != nil {
w.WriteHeader(http.StatusInternalServerError)
t.Error(err)
return
}
}))
defer ts.Close()
p := &Prometheus{
Log: testutil.Logger{},
URLs: []string{ts.URL},
URLTag: "url",
}
err := p.Init()
require.NoError(t, err)
var acc testutil.Accumulator
err = acc.GatherError(p.Gather)
require.NoError(t, err)
require.True(t, acc.HasFloatField("go_gc_duration_seconds", "count"))
require.True(t, acc.HasFloatField("go_goroutines", "gauge"))
require.True(t, acc.HasFloatField("test_metric", "value"))
require.True(t, acc.HasTimestamp("test_metric", time.Unix(1490802350, 0)))
require.False(t, acc.HasTag("test_metric", "address"))
require.Equal(t, ts.URL+"/metrics", acc.TagValue("test_metric", "url"))
}
func TestPrometheusCustomHeader(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.Header.Get("accept") {
case "application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited;q=0.7,text/plain;version=0.0.4;q=0.3":
if _, err := fmt.Fprintln(w, "proto 15 1490802540000"); err != nil {
w.WriteHeader(http.StatusInternalServerError)
t.Error(err)
return
}
case "text/plain":
if _, err := fmt.Fprintln(w, "plain 42 1490802380000"); err != nil {
w.WriteHeader(http.StatusInternalServerError)
t.Error(err)
return
}
default:
if _, err := fmt.Fprintln(w, "other 44 1490802420000"); err != nil {
w.WriteHeader(http.StatusInternalServerError)
t.Error(err)
return
}
}
}))
defer ts.Close()
tests := []struct {
name string
headers map[string]string
expectedMeasurementName string
}{
{
"default",
map[string]string{},
"proto",
},
{
"plain text",
map[string]string{
"accept": "text/plain",
},
"plain",
},
{
"other",
map[string]string{
"accept": "fakeACCEPTitem",
},
"other",
},
}
for _, test := range tests {
p := &Prometheus{
Log: testutil.Logger{},
URLs: []string{ts.URL},
URLTag: "url",
HTTPHeaders: test.headers,
}
err := p.Init()
require.NoError(t, err)
var acc testutil.Accumulator
require.NoError(t, acc.GatherError(p.Gather))
require.Equal(t, test.expectedMeasurementName, acc.Metrics[0].Measurement)
}
}
func TestPrometheusGeneratesMetricsWithHostNameTag(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
if _, err := fmt.Fprintln(w, sampleTextFormat); err != nil {
w.WriteHeader(http.StatusInternalServerError)
t.Error(err)
return
}
}))
defer ts.Close()
p := &Prometheus{
Log: testutil.Logger{},
KubernetesServices: []string{ts.URL},
URLTag: "url",
}
err := p.Init()
require.NoError(t, err)
u, err := url.Parse(ts.URL)
require.NoError(t, err)
tsAddress := u.Hostname()
var acc testutil.Accumulator
err = acc.GatherError(p.Gather)
require.NoError(t, err)
require.True(t, acc.HasFloatField("go_gc_duration_seconds", "count"))
require.True(t, acc.HasFloatField("go_goroutines", "gauge"))
require.True(t, acc.HasFloatField("test_metric", "value"))
require.True(t, acc.HasTimestamp("test_metric", time.Unix(1490802350, 0)))
require.Equal(t, tsAddress, acc.TagValue("test_metric", "address"))
require.Equal(t, ts.URL, acc.TagValue("test_metric", "url"))
}
func TestPrometheusWithTimestamp(t *testing.T) {
prommetric := `# HELP test_counter A sample test counter.
# TYPE test_counter counter
test_counter{label="test"} 1 1685443805885`
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
if _, err := fmt.Fprintln(w, prommetric); err != nil {
w.WriteHeader(http.StatusInternalServerError)
t.Error(err)
return
}
}))
defer ts.Close()
p := &Prometheus{
Log: testutil.Logger{},
KubernetesServices: []string{ts.URL},
}
require.NoError(t, p.Init())
u, err := url.Parse(ts.URL)
require.NoError(t, err)
tsAddress := u.Hostname()
expected := []telegraf.Metric{
metric.New(
"test_counter",
map[string]string{"address": tsAddress, "label": "test"},
map[string]interface{}{"counter": float64(1.0)},
time.UnixMilli(1685443805885),
telegraf.Counter,
),
}
var acc testutil.Accumulator
require.NoError(t, acc.GatherError(p.Gather))
testutil.RequireMetricsSubset(t, expected, acc.GetTelegrafMetrics())
}
func TestPrometheusGeneratesMetricsAlthoughFirstDNSFailsIntegration(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration test in short mode")
}
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
if _, err := fmt.Fprintln(w, sampleTextFormat); err != nil {
w.WriteHeader(http.StatusInternalServerError)
t.Error(err)
return
}
}))
defer ts.Close()
p := &Prometheus{
Log: testutil.Logger{},
URLs: []string{ts.URL},
KubernetesServices: []string{"http://random.telegraf.local:88/metrics"},
}
err := p.Init()
require.NoError(t, err)
var acc testutil.Accumulator
err = acc.GatherError(p.Gather)
require.NoError(t, err)
require.True(t, acc.HasFloatField("go_gc_duration_seconds", "count"))
require.True(t, acc.HasFloatField("go_goroutines", "gauge"))
require.True(t, acc.HasFloatField("test_metric", "value"))
require.True(t, acc.HasTimestamp("test_metric", time.Unix(1490802350, 0)))
}
func TestPrometheusGeneratesMetricsSlowEndpoint(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
time.Sleep(4 * time.Second)
if _, err := fmt.Fprintln(w, sampleTextFormat); err != nil {
w.WriteHeader(http.StatusInternalServerError)
t.Error(err)
return
}
}))
defer ts.Close()
p := &Prometheus{
Log: testutil.Logger{},
URLs: []string{ts.URL},
URLTag: "url",
client: &http.Client{
Timeout: time.Second * 5,
},
}
err := p.Init()
require.NoError(t, err)
var acc testutil.Accumulator
err = acc.GatherError(p.Gather)
require.NoError(t, err)
require.True(t, acc.HasFloatField("go_gc_duration_seconds", "count"))
require.True(t, acc.HasFloatField("go_goroutines", "gauge"))
require.True(t, acc.HasFloatField("test_metric", "value"))
require.True(t, acc.HasTimestamp("test_metric", time.Unix(1490802350, 0)))
require.False(t, acc.HasTag("test_metric", "address"))
require.Equal(t, acc.TagValue("test_metric", "url"), ts.URL+"/metrics")
}
func TestPrometheusGeneratesMetricsSlowEndpointHitTheTimeout(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
time.Sleep(6 * time.Second)
if _, err := fmt.Fprintln(w, sampleTextFormat); err != nil {
w.WriteHeader(http.StatusInternalServerError)
t.Error(err)
return
}
}))
defer ts.Close()
p := &Prometheus{
Log: testutil.Logger{},
URLs: []string{ts.URL},
URLTag: "url",
client: &http.Client{
Timeout: time.Second * 5,
},
}
err := p.Init()
require.NoError(t, err)
var acc testutil.Accumulator
err = acc.GatherError(p.Gather)
errMessage := fmt.Sprintf("error making HTTP request to \"%s/metrics\": Get \"%s/metrics\": "+
"context deadline exceeded (Client.Timeout exceeded while awaiting headers)", ts.URL, ts.URL)
errExpected := errors.New(errMessage)
require.Error(t, err)
require.Equal(t, errExpected.Error(), err.Error())
}
func TestPrometheusGeneratesMetricsSlowEndpointNewConfigParameter(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
time.Sleep(4 * time.Second)
if _, err := fmt.Fprintln(w, sampleTextFormat); err != nil {
w.WriteHeader(http.StatusInternalServerError)
t.Error(err)
return
}
}))
defer ts.Close()
p := &Prometheus{
Log: testutil.Logger{},
URLs: []string{ts.URL},
URLTag: "url",
}
err := p.Init()
require.NoError(t, err)
p.client.Timeout = time.Second * 5
var acc testutil.Accumulator
err = acc.GatherError(p.Gather)
require.NoError(t, err)
require.True(t, acc.HasFloatField("go_gc_duration_seconds", "count"))
require.True(t, acc.HasFloatField("go_goroutines", "gauge"))
require.True(t, acc.HasFloatField("test_metric", "value"))
require.True(t, acc.HasTimestamp("test_metric", time.Unix(1490802350, 0)))
require.False(t, acc.HasTag("test_metric", "address"))
require.Equal(t, acc.TagValue("test_metric", "url"), ts.URL+"/metrics")
}
func TestPrometheusGeneratesMetricsSlowEndpointHitTheTimeoutNewConfigParameter(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
time.Sleep(6 * time.Second)
if _, err := fmt.Fprintln(w, sampleTextFormat); err != nil {
w.WriteHeader(http.StatusInternalServerError)
t.Error(err)
return
}
}))
defer ts.Close()
p := &Prometheus{
Log: testutil.Logger{},
URLs: []string{ts.URL},
URLTag: "url",
}
err := p.Init()
require.NoError(t, err)
p.client.Timeout = time.Second * 5
var acc testutil.Accumulator
err = acc.GatherError(p.Gather)
require.ErrorContains(t, err, "error making HTTP request to \""+ts.URL+"/metrics\"")
}
func TestPrometheusContentLengthLimit(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
if _, err := fmt.Fprintln(w, sampleTextFormat); err != nil {
w.WriteHeader(http.StatusInternalServerError)
t.Error(err)
return
}
}))
defer ts.Close()
p := &Prometheus{
Log: testutil.Logger{},
URLs: []string{ts.URL},
URLTag: "url",
ContentLengthLimit: 1,
}
require.NoError(t, p.Init())
var acc testutil.Accumulator
require.NoError(t, acc.GatherError(p.Gather))
require.Empty(t, acc.Metrics)
}
func TestPrometheusGeneratesSummaryMetricsV2(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
if _, err := fmt.Fprintln(w, sampleSummaryTextFormat); err != nil {
w.WriteHeader(http.StatusInternalServerError)
t.Error(err)
return
}
}))
defer ts.Close()
p := &Prometheus{
Log: &testutil.Logger{},
URLs: []string{ts.URL},
URLTag: "url",
MetricVersion: 2,
}
err := p.Init()
require.NoError(t, err)
var acc testutil.Accumulator
err = acc.GatherError(p.Gather)
require.NoError(t, err)
require.Equal(t, "0", acc.TagSetValue("prometheus", "quantile"))
require.True(t, acc.HasFloatField("prometheus", "go_gc_duration_seconds_sum"))
require.True(t, acc.HasFloatField("prometheus", "go_gc_duration_seconds_count"))
require.Equal(t, acc.TagValue("prometheus", "url"), ts.URL+"/metrics")
}
func TestSummaryMayContainNaN(t *testing.T) {
const data = `# HELP go_gc_duration_seconds A summary of the GC invocation durations.
# TYPE go_gc_duration_seconds summary
go_gc_duration_seconds{quantile="0"} NaN
go_gc_duration_seconds{quantile="1"} NaN
go_gc_duration_seconds_sum 42.0
go_gc_duration_seconds_count 42`
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
if _, err := fmt.Fprintln(w, data); err != nil {
w.WriteHeader(http.StatusInternalServerError)
t.Error(err)
return
}
}))
defer ts.Close()
p := &Prometheus{
Log: &testutil.Logger{},
URLs: []string{ts.URL},
URLTag: "",
MetricVersion: 2,
EnableRequestMetrics: true,
}
err := p.Init()
require.NoError(t, err)
var acc testutil.Accumulator
err = p.Gather(&acc)
require.NoError(t, err)
expected := []telegraf.Metric{
testutil.MustMetric(
"prometheus",
map[string]string{
"quantile": "0",
},
map[string]interface{}{
"go_gc_duration_seconds": math.NaN(),
},
time.Unix(0, 0),
telegraf.Summary,
),
testutil.MustMetric(
"prometheus",
map[string]string{
"quantile": "1",
},
map[string]interface{}{
"go_gc_duration_seconds": math.NaN(),
},
time.Unix(0, 0),
telegraf.Summary,
),
testutil.MustMetric(
"prometheus",
map[string]string{},
map[string]interface{}{
"go_gc_duration_seconds_sum": float64(42.0),
"go_gc_duration_seconds_count": float64(42)},
time.Unix(0, 0),
telegraf.Summary,
),
testutil.MustMetric(
"prometheus_request",
map[string]string{},
map[string]interface{}{
"content_length": int64(1),
"response_time": float64(0)},
time.Unix(0, 0),
telegraf.Untyped,
),
}
testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics(),
testutil.IgnoreTime(), testutil.SortMetrics(), testutil.IgnoreFields("content_length", "response_time"))
}
func TestPrometheusGeneratesGaugeMetricsV2(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
if _, err := fmt.Fprintln(w, sampleGaugeTextFormat); err != nil {
w.WriteHeader(http.StatusInternalServerError)
t.Error(err)
return
}
}))
defer ts.Close()
p := &Prometheus{
Log: &testutil.Logger{},
URLs: []string{ts.URL},
URLTag: "url",
MetricVersion: 2,
}
err := p.Init()
require.NoError(t, err)
var acc testutil.Accumulator
err = acc.GatherError(p.Gather)
require.NoError(t, err)
require.True(t, acc.HasFloatField("prometheus", "go_goroutines"))
require.Equal(t, acc.TagValue("prometheus", "url"), ts.URL+"/metrics")
require.True(t, acc.HasTimestamp("prometheus", time.Unix(1490802350, 0)))
}
func TestPrometheusGeneratesMetricsWithIgnoreTimestamp(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
if _, err := fmt.Fprintln(w, sampleTextFormat); err != nil {
w.WriteHeader(http.StatusInternalServerError)
t.Error(err)
return
}
}))
defer ts.Close()
p := &Prometheus{
Log: testutil.Logger{},
URLs: []string{ts.URL},
URLTag: "url",
IgnoreTimestamp: true,
}
require.NoError(t, p.Init())
var acc testutil.Accumulator
require.NoError(t, acc.GatherError(p.Gather))
m, found := acc.Get("test_metric")
require.True(t, found)
require.NotNil(t, m)
require.WithinDuration(t, time.Now(), m.Time, 5*time.Second)
}
func TestUnsupportedFieldSelector(t *testing.T) {
fieldSelectorString := "spec.containerName=container"
prom := &Prometheus{Log: testutil.Logger{}, KubernetesFieldSelector: fieldSelectorString}
fieldSelector, err := fields.ParseSelector(prom.KubernetesFieldSelector)
require.NoError(t, err)
isValid, invalidSelector := fieldSelectorIsSupported(fieldSelector)
require.False(t, isValid)
require.Equal(t, "spec.containerName", invalidSelector)
}
func TestInitConfigErrors(t *testing.T) {
p := &Prometheus{
MetricVersion: 2,
Log: testutil.Logger{},
URLs: nil,
URLTag: "url",
MonitorPods: true,
PodScrapeScope: "node",
PodScrapeInterval: 60,
}
// Both invalid IP addresses
t.Run("Both invalid IP addresses", func(t *testing.T) {
p.NodeIP = "10.240.0.0.0"
t.Setenv("NODE_IP", "10.000.0.0.0")
err := p.Init()
require.Error(t, err)
expectedMessage := "the node_ip config and the environment variable NODE_IP are not set or invalid; " +
"cannot get pod list for monitor_kubernetes_pods using node scrape scope"
require.Equal(t, expectedMessage, err.Error())
})
t.Run("Valid IP address", func(t *testing.T) {
t.Setenv("NODE_IP", "10.000.0.0")
p.KubernetesLabelSelector = "label0==label0, label0 in (=)"
err := p.Init()
expectedMessage := "error parsing the specified label selector(s): unable to parse requirement: found '=', expected: ',', ')' or identifier"
require.Error(t, err, expectedMessage)
p.KubernetesLabelSelector = "label0==label"
p.KubernetesFieldSelector = "field,"
err = p.Init()
expectedMessage = "error parsing the specified field selector(s): invalid selector: 'field,'; can't understand 'field'"
require.Error(t, err, expectedMessage)
p.KubernetesFieldSelector = "spec.containerNames=containerNames"
err = p.Init()
expectedMessage = "the field selector spec.containerNames is not supported for pods"
require.Error(t, err, expectedMessage)
})
}
func TestInitConfigSelectors(t *testing.T) {
p := &Prometheus{
MetricVersion: 2,
Log: testutil.Logger{},
URLs: nil,
URLTag: "url",
MonitorPods: true,
MonitorKubernetesPodsMethod: monitorMethodSettings,
PodScrapeInterval: 60,
KubernetesLabelSelector: "app=test",
KubernetesFieldSelector: "spec.nodeName=node-0",
}
err := p.Init()
require.NoError(t, err)
require.NotNil(t, p.podLabelSelector)
require.NotNil(t, p.podFieldSelector)
}
func TestPrometheusInternalOk(t *testing.T) {
prommetric := `# HELP test_counter A sample test counter.
# TYPE test_counter counter
test_counter{label="test"} 1 1685443805885`
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
if _, err := fmt.Fprintln(w, prommetric); err != nil {
w.WriteHeader(http.StatusInternalServerError)
t.Error(err)
return
}
}))
defer ts.Close()
p := &Prometheus{
Log: testutil.Logger{},
KubernetesServices: []string{ts.URL},
EnableRequestMetrics: true,
}
require.NoError(t, p.Init())
u, err := url.Parse(ts.URL)
require.NoError(t, err)
tsAddress := u.Hostname()
expected := []telegraf.Metric{
metric.New(
"prometheus_request",
map[string]string{
"address": tsAddress},
map[string]interface{}{
"content_length": int64(1),
"response_time": float64(0)},
time.UnixMilli(0),
telegraf.Untyped,
),
}
var acc testutil.Accumulator
testutil.PrintMetrics(acc.GetTelegrafMetrics())
require.NoError(t, acc.GatherError(p.Gather))
testutil.RequireMetricsSubset(t, expected, acc.GetTelegrafMetrics(), testutil.IgnoreFields("content_length", "response_time"), testutil.IgnoreTime())
}
func TestPrometheusInternalContentBadFormat(t *testing.T) {
prommetric := `# HELP test_counter A sample test counter.
# TYPE test_counter counter
<body>Flag test</body>`
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
if _, err := fmt.Fprintln(w, prommetric); err != nil {
w.WriteHeader(http.StatusInternalServerError)
t.Error(err)
return
}
}))
defer ts.Close()
p := &Prometheus{
Log: testutil.Logger{},
KubernetesServices: []string{ts.URL},
EnableRequestMetrics: true,
}
require.NoError(t, p.Init())
u, err := url.Parse(ts.URL)
require.NoError(t, err)
tsAddress := u.Hostname()
expected := []telegraf.Metric{
metric.New(
"prometheus_request",
map[string]string{
"address": tsAddress},
map[string]interface{}{
"content_length": int64(94),
"response_time": float64(0)},
time.UnixMilli(0),
telegraf.Untyped,
),
}
var acc testutil.Accumulator
require.Error(t, acc.GatherError(p.Gather))
testutil.RequireMetricsSubset(t, expected, acc.GetTelegrafMetrics(), testutil.IgnoreFields("content_length", "response_time"), testutil.IgnoreTime())
}
func TestPrometheusInternalNoWeb(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(404)
}))
defer ts.Close()
p := &Prometheus{
Log: testutil.Logger{},
KubernetesServices: []string{ts.URL},
EnableRequestMetrics: true,
}
require.NoError(t, p.Init())
u, err := url.Parse(ts.URL)
require.NoError(t, err)
tsAddress := u.Hostname()
expected := []telegraf.Metric{
metric.New(
"prometheus_request",
map[string]string{
"address": tsAddress},
map[string]interface{}{
"content_length": int64(94),
"response_time": float64(0)},
time.UnixMilli(0),
telegraf.Untyped,
),
}
var acc testutil.Accumulator
testutil.PrintMetrics(acc.GetTelegrafMetrics())
require.Error(t, acc.GatherError(p.Gather))
testutil.RequireMetricsSubset(t, expected, acc.GetTelegrafMetrics(), testutil.IgnoreFields("content_length", "response_time"), testutil.IgnoreTime())
}
func TestOpenmetricsText(t *testing.T) {
const data = `
# HELP go_memstats_gc_cpu_fraction The fraction of this program's available CPU time used by the GC since the program started.
# TYPE go_memstats_gc_cpu_fraction gauge
go_memstats_gc_cpu_fraction -0.00014404354379774563
# HELP go_memstats_gc_sys_bytes Number of bytes used for garbage collection system metadata.
# TYPE go_memstats_gc_sys_bytes gauge
go_memstats_gc_sys_bytes 6.0936192e+07
# HELP go_memstats_heap_alloc_bytes Number of heap bytes allocated and still in use.
# TYPE go_memstats_heap_alloc_bytes gauge
go_memstats_heap_alloc_bytes 1.581062048e+09
# EOF
`
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.Header().Add("Content-Type", "application/openmetrics-text;version=1.0.0")
if _, err := w.Write([]byte(data)); err != nil {
w.WriteHeader(http.StatusInternalServerError)
t.Error(err)
return
}
}))
defer ts.Close()
p := &Prometheus{
Log: &testutil.Logger{},
URLs: []string{ts.URL},
URLTag: "",
MetricVersion: 2,
}
require.NoError(t, p.Init())
var acc testutil.Accumulator
require.NoError(t, p.Gather(&acc))
expected := []telegraf.Metric{
testutil.MustMetric(
"openmetric",
map[string]string{},
map[string]interface{}{"go_memstats_gc_cpu_fraction": float64(-0.00014404354379774563)},
time.Unix(0, 0),
telegraf.Gauge,
),
testutil.MustMetric(
"openmetric",
map[string]string{},
map[string]interface{}{"go_memstats_gc_sys_bytes": 6.0936192e+07},
time.Unix(0, 0),
telegraf.Gauge,
),
testutil.MustMetric(
"openmetric",
map[string]string{},
map[string]interface{}{"go_memstats_heap_alloc_bytes": 1.581062048e+09},
time.Unix(0, 0),
telegraf.Gauge,
),
}
testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics(), testutil.IgnoreTime(), testutil.SortMetrics())
}
func TestOpenmetricsProtobuf(t *testing.T) {
data, err := os.ReadFile(filepath.Join("testdata", "openmetric-proto.bin"))
require.NoError(t, err)
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.Header().Add("Content-Type", "application/openmetrics-protobuf;version=1.0.0")
if _, err := w.Write(data); err != nil {
w.WriteHeader(http.StatusInternalServerError)
t.Error(err)
return
}
}))
defer ts.Close()
p := &Prometheus{
Log: &testutil.Logger{},
URLs: []string{ts.URL},
URLTag: "",
MetricVersion: 2,
}
require.NoError(t, p.Init())
var acc testutil.Accumulator
require.NoError(t, p.Gather(&acc))
expected := []telegraf.Metric{
testutil.MustMetric(
"openmetric",
map[string]string{},
map[string]interface{}{"go_memstats_gc_cpu_fraction": float64(-0.00014404354379774563)},
time.Unix(0, 0),
telegraf.Gauge,
),
testutil.MustMetric(
"openmetric",
map[string]string{},
map[string]interface{}{"go_memstats_gc_sys_bytes": 6.0936192e+07},
time.Unix(0, 0),
telegraf.Gauge,
),
testutil.MustMetric(
"openmetric",
map[string]string{},
map[string]interface{}{"go_memstats_heap_alloc_bytes": 1.581062048e+09},
time.Unix(0, 0),
telegraf.Gauge,
),
}
testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics(), testutil.IgnoreTime(), testutil.SortMetrics())
}
func TestContentTypeOverride(t *testing.T) {
const data = `
# HELP go_memstats_gc_cpu_fraction The fraction of this program's available CPU time used by the GC since the program started.
# TYPE go_memstats_gc_cpu_fraction gauge
go_memstats_gc_cpu_fraction -0.00014404354379774563
# HELP go_memstats_gc_sys_bytes Number of bytes used for garbage collection system metadata.
# TYPE go_memstats_gc_sys_bytes gauge
go_memstats_gc_sys_bytes 6.0936192e+07
# HELP go_memstats_heap_alloc_bytes Number of heap bytes allocated and still in use.
# TYPE go_memstats_heap_alloc_bytes gauge
go_memstats_heap_alloc_bytes 1.581062048e+09
# EOF
`
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
// Provide a wrong version
w.Header().Add("Content-Type", "application/vnd.google.protobuf; proto=io.prometheus.client.MetricFamily; encoding=delimited")
if _, err := w.Write([]byte(data)); err != nil {
w.WriteHeader(http.StatusInternalServerError)
t.Error(err)
return
}
}))
defer ts.Close()
p := &Prometheus{
Log: &testutil.Logger{},
URLs: []string{ts.URL},
URLTag: "",
MetricVersion: 2,
ContentTypeOverride: "openmetrics-text",
}
require.NoError(t, p.Init())
var acc testutil.Accumulator
require.NoError(t, p.Gather(&acc))
expected := []telegraf.Metric{
testutil.MustMetric(
"openmetric",
map[string]string{},
map[string]interface{}{"go_memstats_gc_cpu_fraction": float64(-0.00014404354379774563)},
time.Unix(0, 0),
telegraf.Gauge,
),
testutil.MustMetric(
"openmetric",
map[string]string{},
map[string]interface{}{"go_memstats_gc_sys_bytes": 6.0936192e+07},
time.Unix(0, 0),
telegraf.Gauge,
),
testutil.MustMetric(
"openmetric",
map[string]string{},
map[string]interface{}{"go_memstats_heap_alloc_bytes": 1.581062048e+09},
time.Unix(0, 0),
telegraf.Gauge,
),
}
testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics(), testutil.IgnoreTime(), testutil.SortMetrics())
}

View file

@ -0,0 +1,170 @@
# Read metrics from one or many prometheus clients
[[inputs.prometheus]]
## An array of urls to scrape metrics from.
urls = ["http://localhost:9100/metrics"]
## Metric version controls the mapping from Prometheus metrics into Telegraf metrics.
## See "Metric Format Configuration" in plugins/inputs/prometheus/README.md for details.
## Valid options: 1, 2
# metric_version = 1
## Url tag name (tag containing scrapped url. optional, default is "url")
# url_tag = "url"
## Whether the timestamp of the scraped metrics will be ignored.
## If set to true, the gather time will be used.
# ignore_timestamp = false
## Override content-type of the returned message
## Available options are for prometheus:
## text, protobuf-delimiter, protobuf-compact, protobuf-text,
## and for openmetrics:
## openmetrics-text, openmetrics-protobuf
## By default the content-type of the response is used.
# content_type_override = ""
## An array of Kubernetes services to scrape metrics from.
# kubernetes_services = ["http://my-service-dns.my-namespace:9100/metrics"]
## Kubernetes config file to create client from.
# kube_config = "/path/to/kubernetes.config"
## Scrape Pods
## Enable scraping of k8s pods. Further settings as to which pods to scape
## are determiend by the 'method' option below. When enabled, the default is
## to use annotations to determine whether to scrape or not.
# monitor_kubernetes_pods = false
## Scrape Pods Method
## annotations: default, looks for specific pod annotations documented below
## settings: only look for pods matching the settings provided, not
## annotations
## settings+annotations: looks at pods that match annotations using the user
## defined settings
# monitor_kubernetes_pods_method = "annotations"
## Scrape Pods 'annotations' method options
## If set method is set to 'annotations' or 'settings+annotations', these
## annotation flags are looked for:
## - prometheus.io/scrape: Required to enable scraping for this pod. Can also
## use 'prometheus.io/scrape=false' annotation to opt-out entirely.
## - prometheus.io/scheme: If the metrics endpoint is secured then you will
## need to set this to 'https' & most likely set the tls config
## - prometheus.io/path: If the metrics path is not /metrics, define it with
## this annotation
## - prometheus.io/port: If port is not 9102 use this annotation
## Scrape Pods 'settings' method options
## When using 'settings' or 'settings+annotations', the default values for
## annotations can be modified using with the following options:
# monitor_kubernetes_pods_scheme = "http"
# monitor_kubernetes_pods_port = "9102"
# monitor_kubernetes_pods_path = "/metrics"
## Get the list of pods to scrape with either the scope of
## - cluster: the kubernetes watch api (default, no need to specify)
## - node: the local cadvisor api; for scalability. Note that the config node_ip or the environment variable NODE_IP must be set to the host IP.
# pod_scrape_scope = "cluster"
## Only for node scrape scope: node IP of the node that telegraf is running on.
## Either this config or the environment variable NODE_IP must be set.
# node_ip = "10.180.1.1"
## Only for node scrape scope: interval in seconds for how often to get updated pod list for scraping.
## Default is 60 seconds.
# pod_scrape_interval = 60
## Content length limit
## When set, telegraf will drop responses with length larger than the configured value.
## Default is "0KB" which means unlimited.
# content_length_limit = "0KB"
## Restricts Kubernetes monitoring to a single namespace
## ex: monitor_kubernetes_pods_namespace = "default"
# monitor_kubernetes_pods_namespace = ""
## The name of the label for the pod that is being scraped.
## Default is 'namespace' but this can conflict with metrics that have the label 'namespace'
# pod_namespace_label_name = "namespace"
# label selector to target pods which have the label
# kubernetes_label_selector = "env=dev,app=nginx"
# field selector to target pods
# eg. To scrape pods on a specific node
# kubernetes_field_selector = "spec.nodeName=$HOSTNAME"
## Filter which pod annotations and labels will be added to metric tags
#
# pod_annotation_include = ["annotation-key-1"]
# pod_annotation_exclude = ["exclude-me"]
# pod_label_include = ["label-key-1"]
# pod_label_exclude = ["exclude-me"]
# cache refresh interval to set the interval for re-sync of pods list.
# Default is 60 minutes.
# cache_refresh_interval = 60
## Use bearer token for authorization. ('bearer_token' takes priority)
# bearer_token = "/path/to/bearer/token"
## OR
# bearer_token_string = "abc_123"
## HTTP Basic Authentication username and password. ('bearer_token' and
## 'bearer_token_string' take priority)
# username = ""
# password = ""
## Optional custom HTTP headers
# http_headers = {"X-Special-Header" = "Special-Value"}
## Specify timeout duration for slower prometheus clients (default is 5s)
# timeout = "5s"
## This option is now used by the HTTP client to set the header response
## timeout, not the overall HTTP timeout.
# response_timeout = "5s"
## HTTP Proxy support
# use_system_proxy = false
# http_proxy_url = ""
## Optional TLS Config
# tls_ca = /path/to/cafile
# tls_cert = /path/to/certfile
# tls_key = /path/to/keyfile
## Use TLS but skip chain & host verification
# insecure_skip_verify = false
## Use the given name as the SNI server name on each URL
# tls_server_name = "myhost.example.org"
## TLS renegotiation method, choose from "never", "once", "freely"
# tls_renegotiation_method = "never"
## Enable/disable TLS
## Set to true/false to enforce TLS being enabled/disabled. If not set,
## enable TLS only if any of the other options are specified.
# tls_enable = true
## This option allows you to report the status of prometheus requests.
# enable_request_metrics = false
## Scrape Services available in Consul Catalog
# [inputs.prometheus.consul]
# enabled = true
# agent = "http://localhost:8500"
# query_interval = "5m"
# [[inputs.prometheus.consul.query]]
# name = "a service name"
# tag = "a service tag"
# url = 'http://{{if ne .ServiceAddress ""}}{{.ServiceAddress}}{{else}}{{.Address}}{{end}}:{{.ServicePort}}/{{with .ServiceMeta.metrics_path}}{{.}}{{else}}metrics{{end}}'
# [inputs.prometheus.consul.query.tags]
# host = "{{.Node}}"
## Control pod scraping based on pod namespace annotations
## Pass and drop here act like tagpass and tagdrop, but instead
## of filtering metrics they filters pod candidates for scraping
#[inputs.prometheus.namespace_annotation_pass]
# annotation_key = ["value1", "value2"]
#[inputs.prometheus.namespace_annotation_drop]
# some_annotation_key = ["dont-scrape"]

Binary file not shown.