Adding upstream version 1.34.4.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-05-24 07:26:29 +02:00 · 2025-05-24 07:26:29 +02:00 · 4978089aab
commit 4978089aab
parent e393c3af3f
4963 changed files with 677545 additions and 0 deletions
--- a/plugins/processors/dedup/README.md
+++ b/plugins/processors/dedup/README.md
@ -0,0 +1,36 @@
+# Dedup Processor Plugin
+
+Filter metrics whose field values are exact repetitions of the previous values.
+This plugin will store its state between runs if the `statefile` option in the
+agent config section is set.
+
+## Global configuration options <!-- @/docs/includes/plugin_config.md -->
+
+In addition to the plugin-specific configuration settings, plugins support
+additional global and plugin configuration settings. These settings are used to
+modify metrics, tags, and field or create aliases and configure ordering, etc.
+See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
+
+[CONFIGURATION.md]: ../../../docs/CONFIGURATION.md#plugins
+
+## Configuration
+
+```toml @sample.conf
+# Filter metrics with repeating field values
+[[processors.dedup]]
+  ## Maximum time to suppress output
+  dedup_interval = "600s"
+```
+
+## Example
+
+```diff
+- cpu,cpu=cpu0 time_idle=42i,time_guest=1i
+- cpu,cpu=cpu0 time_idle=42i,time_guest=2i
+- cpu,cpu=cpu0 time_idle=42i,time_guest=2i
+- cpu,cpu=cpu0 time_idle=44i,time_guest=2i
+- cpu,cpu=cpu0 time_idle=44i,time_guest=2i
+ cpu,cpu=cpu0 time_idle=42i,time_guest=1i
+ cpu,cpu=cpu0 time_idle=42i,time_guest=2i
+ cpu,cpu=cpu0 time_idle=44i,time_guest=2i
+```
--- a/plugins/processors/dedup/dedup.go
+++ b/plugins/processors/dedup/dedup.go
@ -0,0 +1,161 @@
+//go:generate ../../../tools/readme_config_includer/generator
+package dedup
+
+import (
+	_ "embed"
+	"fmt"
+	"time"
+
+	"github.com/influxdata/telegraf"
+	"github.com/influxdata/telegraf/config"
+	"github.com/influxdata/telegraf/plugins/parsers/influx"
+	"github.com/influxdata/telegraf/plugins/processors"
+	serializers_influx "github.com/influxdata/telegraf/plugins/serializers/influx"
+)
+
+//go:embed sample.conf
+var sampleConfig string
+
+type Dedup struct {
+	DedupInterval config.Duration `toml:"dedup_interval"`
+	FlushTime     time.Time
+	Cache         map[uint64]telegraf.Metric
+	Log           telegraf.Logger `toml:"-"`
+}
+
+// Remove expired items from cache
+func (d *Dedup) cleanup() {
+	// No need to cleanup cache too often. Lets save some CPU
+	if time.Since(d.FlushTime) < time.Duration(d.DedupInterval) {
+		return
+	}
+	d.FlushTime = time.Now()
+	keep := make(map[uint64]telegraf.Metric)
+	for id, metric := range d.Cache {
+		if time.Since(metric.Time()) < time.Duration(d.DedupInterval) {
+			keep[id] = metric
+		}
+	}
+	d.Cache = keep
+}
+
+// Save item to cache
+func (d *Dedup) save(metric telegraf.Metric, id uint64) {
+	d.Cache[id] = metric.Copy()
+	d.Cache[id].Accept()
+}
+
+func (*Dedup) SampleConfig() string {
+	return sampleConfig
+}
+
+// main processing method
+func (d *Dedup) Apply(metrics ...telegraf.Metric) []telegraf.Metric {
+	idx := 0
+	for _, metric := range metrics {
+		id := metric.HashID()
+		m, ok := d.Cache[id]
+
+		// If not in cache then just save it
+		if !ok {
+			d.save(metric, id)
+			metrics[idx] = metric
+			idx++
+			continue
+		}
+
+		// If cache item has expired then refresh it
+		if time.Since(m.Time()) >= time.Duration(d.DedupInterval) {
+			d.save(metric, id)
+			metrics[idx] = metric
+			idx++
+			continue
+		}
+
+		// For each field compare value with the cached one
+		changed := false
+		added := false
+		sametime := metric.Time() == m.Time()
+		for _, f := range metric.FieldList() {
+			if value, ok := m.GetField(f.Key); ok {
+				if value != f.Value {
+					changed = true
+					break
+				}
+			} else if sametime {
+				// This field isn't in the cached metric but it's the
+				// same series and timestamp. Merge it into the cached
+				// metric.
+
+				// Metrics have a ValueType that applies to all values
+				// in the metric. If an input needs to produce values
+				// with different ValueTypes but the same timestamp,
+				// they have to produce multiple metrics. (See the
+				// system input for an example.) In this case, dedup
+				// ignores the ValueTypes of the metrics and merges
+				// the fields into one metric for the dup check.
+
+				m.AddField(f.Key, f.Value)
+				added = true
+			}
+		}
+		// If any field value has changed then refresh the cache
+		if changed {
+			d.save(metric, id)
+			metrics[idx] = metric
+			idx++
+			continue
+		}
+
+		if sametime && added {
+			metrics[idx] = metric
+			idx++
+			continue
+		}
+
+		// In any other case remove metric from the output
+		metric.Drop()
+	}
+	metrics = metrics[:idx]
+	d.cleanup()
+	return metrics
+}
+
+func (d *Dedup) GetState() interface{} {
+	s := &serializers_influx.Serializer{}
+	v := make([]telegraf.Metric, 0, len(d.Cache))
+	for _, value := range d.Cache {
+		v = append(v, value)
+	}
+	state, err := s.SerializeBatch(v)
+	if err != nil {
+		d.Log.Errorf("dedup processor failed to serialize metric batch: %v", err)
+	}
+	return state
+}
+
+func (d *Dedup) SetState(state interface{}) error {
+	p := &influx.Parser{}
+	if err := p.Init(); err != nil {
+		return err
+	}
+	data, ok := state.([]byte)
+	if !ok {
+		return fmt.Errorf("state has wrong type %T", state)
+	}
+	metrics, err := p.Parse(data)
+	if err == nil {
+		d.Apply(metrics...)
+	}
+	return nil
+}
+
+func init() {
+	processors.Add("dedup", func() telegraf.Processor {
+		return &Dedup{
+			DedupInterval: config.Duration(10 * time.Minute),
+			FlushTime:     time.Now(),
+			Cache:         make(map[uint64]telegraf.Metric),
+		}
+	})
+}
--- a/plugins/processors/dedup/dedup_test.go
+++ b/plugins/processors/dedup/dedup_test.go
@ -0,0 +1,532 @@
+package dedup
+
+import (
+	"fmt"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/influxdata/telegraf"
+	"github.com/influxdata/telegraf/config"
+	"github.com/influxdata/telegraf/metric"
+	"github.com/influxdata/telegraf/testutil"
+)
+
+func TestMetrics(t *testing.T) {
+	now := time.Now()
+
+	tests := []struct {
+		name         string
+		input        []telegraf.Metric
+		expected     []telegraf.Metric
+		cacheContent []telegraf.Metric
+	}{
+		{
+			name: "retain metric",
+			input: []telegraf.Metric{
+				metric.New("m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now,
+				),
+			},
+			expected: []telegraf.Metric{
+				metric.New("m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now,
+				),
+			},
+			cacheContent: []telegraf.Metric{
+				metric.New("m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now,
+				),
+			},
+		},
+		{
+			name: "suppress repeated metric",
+			input: []telegraf.Metric{
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now.Add(-1*time.Second),
+				),
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now,
+				),
+			},
+			expected: []telegraf.Metric{
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now.Add(-1*time.Second),
+				),
+			},
+			cacheContent: []telegraf.Metric{
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now.Add(-1*time.Second),
+				),
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now.Add(-1*time.Second),
+				),
+			},
+		},
+		{
+			name: "pass updated metric",
+			input: []telegraf.Metric{
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now.Add(-1*time.Second),
+				),
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 2},
+					now,
+				),
+			},
+			expected: []telegraf.Metric{
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now.Add(-1*time.Second),
+				),
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 2},
+					now,
+				),
+			},
+			cacheContent: []telegraf.Metric{
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now.Add(-1*time.Second),
+				),
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 2},
+					now,
+				),
+			},
+		},
+		{
+			name: "pass after cache expired",
+			input: []telegraf.Metric{
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now.Add(-1*time.Hour),
+				),
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now,
+				),
+			},
+			expected: []telegraf.Metric{
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now.Add(-1*time.Hour),
+				),
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now,
+				),
+			},
+			cacheContent: []telegraf.Metric{
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now.Add(-1*time.Hour),
+				),
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now,
+				),
+			},
+		},
+		{
+			name: "cache retains metrics",
+			input: []telegraf.Metric{
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now.Add(-3*time.Hour),
+				),
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now.Add(-2*time.Hour),
+				),
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now,
+				),
+			},
+			expected: []telegraf.Metric{
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now.Add(-3*time.Hour),
+				),
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now.Add(-2*time.Hour),
+				),
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now,
+				),
+			},
+			cacheContent: []telegraf.Metric{
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now.Add(-3*time.Hour),
+				),
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now.Add(-2*time.Hour),
+				),
+				metric.New(
+					"m1",
+					map[string]string{"tag": "tag_value"},
+					map[string]interface{}{"value": 1},
+					now,
+				),
+			},
+		},
+		{
+			name: "same timestamp",
+			input: []telegraf.Metric{
+				metric.New("metric",
+					map[string]string{"tag": "value"},
+					map[string]interface{}{"foo": 1}, // field
+					now,
+				),
+				metric.New(
+					"metric",
+					map[string]string{"tag": "value"},
+					map[string]interface{}{"bar": 1}, // different field
+					now,
+				),
+				metric.New(
+					"metric",
+					map[string]string{"tag": "value"},
+					map[string]interface{}{"bar": 2}, // same field different value
+					now,
+				),
+				metric.New(
+					"metric",
+					map[string]string{"tag": "value"},
+					map[string]interface{}{"bar": 2}, // same field same value
+					now,
+				),
+			},
+			expected: []telegraf.Metric{
+				metric.New(
+					"metric",
+					map[string]string{"tag": "value"},
+					map[string]interface{}{"foo": 1},
+					now,
+				),
+				metric.New(
+					"metric",
+					map[string]string{"tag": "value"},
+					map[string]interface{}{"bar": 1},
+					now,
+				),
+				metric.New(
+					"metric",
+					map[string]string{"tag": "value"},
+					map[string]interface{}{"bar": 2},
+					now,
+				),
+			},
+			cacheContent: []telegraf.Metric{
+				metric.New("metric",
+					map[string]string{"tag": "value"},
+					map[string]interface{}{"foo": 1},
+					now,
+				),
+				metric.New(
+					"metric",
+					map[string]string{"tag": "value"},
+					map[string]interface{}{"foo": 1, "bar": 1},
+					now,
+				),
+				metric.New(
+					"metric",
+					map[string]string{"tag": "value"},
+					map[string]interface{}{"bar": 2},
+					now,
+				),
+				metric.New(
+					"metric",
+					map[string]string{"tag": "value"},
+					map[string]interface{}{"bar": 2},
+					now,
+				),
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Create plugin instance
+			plugin := &Dedup{
+				DedupInterval: config.Duration(10 * time.Minute),
+				FlushTime:     now.Add(-1 * time.Second),
+				Cache:         make(map[uint64]telegraf.Metric),
+			}
+
+			// Feed the input metrics and record the outputs
+			var actual []telegraf.Metric
+			for i, m := range tt.input {
+				actual = append(actual, plugin.Apply(m)...)
+
+				// Check the cache content
+				if cm := tt.cacheContent[i]; cm == nil {
+					require.Empty(t, plugin.Cache)
+				} else {
+					id := m.HashID()
+					require.NotEmpty(t, plugin.Cache)
+					require.Contains(t, plugin.Cache, id)
+					testutil.RequireMetricEqual(t, cm, plugin.Cache[id])
+				}
+			}
+
+			// Check if we got the expected metrics
+			testutil.RequireMetricsEqual(t, tt.expected, actual)
+		})
+	}
+}
+
+func TestCacheShrink(t *testing.T) {
+	now := time.Now()
+
+	// Time offset is more than 2 * DedupInterval
+	plugin := &Dedup{
+		DedupInterval: config.Duration(10 * time.Minute),
+		FlushTime:     now.Add(-2 * time.Hour),
+		Cache:         make(map[uint64]telegraf.Metric),
+	}
+
+	// Time offset is more than 1 * DedupInterval
+	input := []telegraf.Metric{
+		metric.New(
+			"m1",
+			map[string]string{"tag": "tag_value"},
+			map[string]interface{}{"value": 1},
+			now.Add(-1*time.Hour),
+		),
+	}
+	actual := plugin.Apply(input...)
+	expected := input
+	testutil.RequireMetricsEqual(t, expected, actual)
+	require.Empty(t, plugin.Cache)
+}
+
+func TestTracking(t *testing.T) {
+	now := time.Now()
+
+	inputRaw := []telegraf.Metric{
+		metric.New("metric",
+			map[string]string{"tag": "value"},
+			map[string]interface{}{"foo": 1},
+			now.Add(-2*time.Second),
+		),
+		metric.New("metric",
+			map[string]string{"tag": "pass"},
+			map[string]interface{}{"foo": 1},
+			now.Add(-2*time.Second),
+		),
+		metric.New("metric",
+			map[string]string{"tag": "value"},
+			map[string]interface{}{"foo": 1},
+			now.Add(-1*time.Second),
+		),
+		metric.New("metric",
+			map[string]string{"tag": "pass"},
+			map[string]interface{}{"foo": 1},
+			now.Add(-1*time.Second),
+		),
+		metric.New(
+			"metric",
+			map[string]string{"tag": "value"},
+			map[string]interface{}{"foo": 3},
+			now,
+		),
+	}
+
+	var mu sync.Mutex
+	delivered := make([]telegraf.DeliveryInfo, 0, len(inputRaw))
+	notify := func(di telegraf.DeliveryInfo) {
+		mu.Lock()
+		defer mu.Unlock()
+		delivered = append(delivered, di)
+	}
+
+	input := make([]telegraf.Metric, 0, len(inputRaw))
+	for _, m := range inputRaw {
+		tm, _ := metric.WithTracking(m, notify)
+		input = append(input, tm)
+	}
+
+	expected := []telegraf.Metric{
+		metric.New("metric",
+			map[string]string{"tag": "value"},
+			map[string]interface{}{"foo": 1},
+			now.Add(-2*time.Second),
+		),
+		metric.New("metric",
+			map[string]string{"tag": "pass"},
+			map[string]interface{}{"foo": 1},
+			now.Add(-2*time.Second),
+		),
+		metric.New(
+			"metric",
+			map[string]string{"tag": "value"},
+			map[string]interface{}{"foo": 3},
+			now,
+		),
+	}
+
+	// Create plugin instance
+	plugin := &Dedup{
+		DedupInterval: config.Duration(10 * time.Minute),
+		FlushTime:     now.Add(-1 * time.Second),
+		Cache:         make(map[uint64]telegraf.Metric),
+	}
+
+	// Process expected metrics and compare with resulting metrics
+	actual := plugin.Apply(input...)
+	testutil.RequireMetricsEqual(t, expected, actual)
+
+	// Simulate output acknowledging delivery
+	for _, m := range actual {
+		m.Accept()
+	}
+
+	// Check delivery
+	require.Eventuallyf(t, func() bool {
+		mu.Lock()
+		defer mu.Unlock()
+		return len(input) == len(delivered)
+	}, time.Second, 100*time.Millisecond, "%d delivered but %d expected", len(delivered), len(expected))
+}
+
+func TestStatePersistence(t *testing.T) {
+	now := time.Now()
+
+	// Define the metrics and states
+	state := fmt.Sprintf("metric,tag=value foo=1i %d\n", now.Add(-1*time.Minute).UnixNano())
+	input := []telegraf.Metric{
+		metric.New("metric",
+			map[string]string{"tag": "value"},
+			map[string]interface{}{"foo": 1},
+			now.Add(-2*time.Second),
+		),
+		metric.New("metric",
+			map[string]string{"tag": "pass"},
+			map[string]interface{}{"foo": 1},
+			now.Add(-1*time.Second),
+		),
+		metric.New(
+			"metric",
+			map[string]string{"tag": "value"},
+			map[string]interface{}{"foo": 3},
+			now,
+		),
+	}
+
+	expected := []telegraf.Metric{
+		metric.New("metric",
+			map[string]string{"tag": "pass"},
+			map[string]interface{}{"foo": 1},
+			now.Add(-1*time.Second),
+		),
+		metric.New(
+			"metric",
+			map[string]string{"tag": "value"},
+			map[string]interface{}{"foo": 3},
+			now,
+		),
+	}
+	expectedState := []string{
+		fmt.Sprintf("metric,tag=pass foo=1i %d\n", now.Add(-1*time.Second).UnixNano()),
+		fmt.Sprintf("metric,tag=value foo=3i %d\n", now.UnixNano()),
+	}
+
+	// Configure the plugin
+	plugin := &Dedup{
+		DedupInterval: config.Duration(10 * time.Hour), // use a long interval to avoid flaky tests
+		FlushTime:     now.Add(-1 * time.Second),
+		Cache:         make(map[uint64]telegraf.Metric),
+	}
+	require.Empty(t, plugin.Cache)
+
+	// Setup the "persisted" state
+	var pi telegraf.StatefulPlugin = plugin
+	require.NoError(t, pi.SetState([]byte(state)))
+	require.Len(t, plugin.Cache, 1)
+
+	// Process expected metrics and compare with resulting metrics
+	actual := plugin.Apply(input...)
+	testutil.RequireMetricsEqual(t, expected, actual)
+
+	// Check getting the persisted state
+	// Because the cache is a map, the order of metrics in the state is not
+	// guaranteed, so check the string contents regardless of the order.
+	actualState, ok := pi.GetState().([]byte)
+	require.True(t, ok, "state is not a bytes array")
+	var expectedLen int
+	for _, m := range expectedState {
+		require.Contains(t, string(actualState), m)
+		expectedLen += len(m)
+	}
+	require.Len(t, actualState, expectedLen)
+}
--- a/plugins/processors/dedup/sample.conf
+++ b/plugins/processors/dedup/sample.conf
@ -0,0 +1,4 @@
+# Filter metrics with repeating field values
+[[processors.dedup]]
+  ## Maximum time to suppress output
+  dedup_interval = "600s"