Adding upstream version 1.34.4.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-05-24 07:26:29 +02:00 · 2025-05-24 07:26:29 +02:00 · 4978089aab
commit 4978089aab
parent e393c3af3f
4963 changed files with 677545 additions and 0 deletions
--- a/plugins/processors/strings/README.md
+++ b/plugins/processors/strings/README.md
@ -0,0 +1,186 @@
+# Strings Processor Plugin
+
+The `strings` plugin maps certain go string functions onto measurement, tag, and
+field values.  Values can be modified in place or stored in another key.
+
+Implemented functions are:
+
+- lowercase
+- uppercase
+- titlecase
+- trim
+- trim_left
+- trim_right
+- trim_prefix
+- trim_suffix
+- replace
+- left
+- base64decode
+- valid_utf8
+
+Please note that in this implementation these are processed in the order that
+they appear above.
+
+Specify the `measurement`, `tag`, `tag_key`, `field`, or `field_key` that you
+want processed in each section and optionally a `dest` if you want the result
+stored in a new tag or field. You can specify lots of transformations on data
+with a single strings processor.
+
+If you'd like to apply the change to every `tag`, `tag_key`, `field`,
+`field_key`, or `measurement`, use the value `"*"` for each respective
+field. Note that the `dest` field will be ignored if `"*"` is used.
+
+If you'd like to apply multiple processings to the same `tag_key` or
+`field_key`, note the process order stated above. See the second example below
+for an example.
+
+## Global configuration options <!-- @/docs/includes/plugin_config.md -->
+
+In addition to the plugin-specific configuration settings, plugins support
+additional global and plugin configuration settings. These settings are used to
+modify metrics, tags, and field or create aliases and configure ordering, etc.
+See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
+
+[CONFIGURATION.md]: ../../../docs/CONFIGURATION.md#plugins
+
+## Configuration
+
+```toml @sample.conf
+# Perform string processing on tags, fields, and measurements
+[[processors.strings]]
+  ## Convert a field value to lowercase and store in a new field
+  # [[processors.strings.lowercase]]
+  #   field = "uri_stem"
+  #   dest = "uri_stem_normalised"
+
+  ## Convert a tag value to uppercase
+  # [[processors.strings.uppercase]]
+  #   tag = "method"
+
+  ## Convert a field value to titlecase
+  # [[processors.strings.titlecase]]
+  #   field = "status"
+
+  ## Trim leading and trailing whitespace using the default cutset
+  # [[processors.strings.trim]]
+  #   field = "message"
+
+  ## Trim leading characters in cutset
+  # [[processors.strings.trim_left]]
+  #   field = "message"
+  #   cutset = "\t"
+
+  ## Trim trailing characters in cutset
+  # [[processors.strings.trim_right]]
+  #   field = "message"
+  #   cutset = "\r\n"
+
+  ## Trim the given prefix from the field
+  # [[processors.strings.trim_prefix]]
+  #   field = "my_value"
+  #   prefix = "my_"
+
+  ## Trim the given suffix from the field
+  # [[processors.strings.trim_suffix]]
+  #   field = "read_count"
+  #   suffix = "_count"
+
+  ## Replace all non-overlapping instances of old with new
+  # [[processors.strings.replace]]
+  #   measurement = "*"
+  #   old = ":"
+  #   new = "_"
+
+  ## Trims strings based on width
+  # [[processors.strings.left]]
+  #   field = "message"
+  #   width = 10
+
+  ## Decode a base64 encoded utf-8 string
+  # [[processors.strings.base64decode]]
+  #   field = "message"
+
+  ## Sanitize a string to ensure it is a valid utf-8 string
+  ## Each run of invalid UTF-8 byte sequences is replaced by the replacement string, which may be empty
+  # [[processors.strings.valid_utf8]]
+  #   field = "message"
+  #   replacement = ""
+```
+
+### Trim, TrimLeft, TrimRight
+
+The `trim`, `trim_left`, and `trim_right` functions take an optional parameter:
+`cutset`.  This value is a string containing the characters to remove from the
+value.
+
+### TrimPrefix, TrimSuffix
+
+The `trim_prefix` and `trim_suffix` functions remote the given `prefix` or
+`suffix` respectively from the string.
+
+### Replace
+
+The `replace` function does a substring replacement across the entire
+string to allow for different conventions between various input and output
+plugins. Some example usages are eliminating disallowed characters in
+field names or replacing separators between different separators.
+Can also be used to eliminate unneeded chars that were in metrics.
+If the entire name would be deleted, it will refuse to perform
+the operation and keep the old name.
+
+## Example
+
+A sample configuration:
+
+```toml
+[[processors.strings]]
+  [[processors.strings.lowercase]]
+    tag = "uri_stem"
+
+  [[processors.strings.trim_prefix]]
+    tag = "uri_stem"
+    prefix = "/api/"
+
+  [[processors.strings.uppercase]]
+    field = "cs-host"
+    dest = "cs-host_normalised"
+```
+
+Sample input:
+
+```text
+iis_log,method=get,uri_stem=/API/HealthCheck cs-host="MIXEDCASE_host",http_version=1.1 1519652321000000000
+```
+
+Sample output:
+
+```text
+iis_log,method=get,uri_stem=healthcheck cs-host="MIXEDCASE_host",http_version=1.1,cs-host_normalised="MIXEDCASE_HOST" 1519652321000000000
+```
+
+### Second Example
+
+A sample configuration:
+
+```toml
+[[processors.strings]]
+  [[processors.strings.lowercase]]
+    tag_key = "URI-Stem"
+
+  [[processors.strings.replace]]
+    tag_key = "uri-stem"
+    old = "-"
+    new = "_"
+```
+
+Sample input:
+
+```text
+iis_log,URI-Stem=/API/HealthCheck http_version=1.1 1519652321000000000
+```
+
+Sample output:
+
+```text
+iis_log,uri_stem=/API/HealthCheck http_version=1.1 1519652321000000000
+```
--- a/plugins/processors/strings/sample.conf
+++ b/plugins/processors/strings/sample.conf
@ -0,0 +1,59 @@
+# Perform string processing on tags, fields, and measurements
+[[processors.strings]]
+  ## Convert a field value to lowercase and store in a new field
+  # [[processors.strings.lowercase]]
+  #   field = "uri_stem"
+  #   dest = "uri_stem_normalised"
+
+  ## Convert a tag value to uppercase
+  # [[processors.strings.uppercase]]
+  #   tag = "method"
+
+  ## Convert a field value to titlecase
+  # [[processors.strings.titlecase]]
+  #   field = "status"
+
+  ## Trim leading and trailing whitespace using the default cutset
+  # [[processors.strings.trim]]
+  #   field = "message"
+
+  ## Trim leading characters in cutset
+  # [[processors.strings.trim_left]]
+  #   field = "message"
+  #   cutset = "\t"
+
+  ## Trim trailing characters in cutset
+  # [[processors.strings.trim_right]]
+  #   field = "message"
+  #   cutset = "\r\n"
+
+  ## Trim the given prefix from the field
+  # [[processors.strings.trim_prefix]]
+  #   field = "my_value"
+  #   prefix = "my_"
+
+  ## Trim the given suffix from the field
+  # [[processors.strings.trim_suffix]]
+  #   field = "read_count"
+  #   suffix = "_count"
+
+  ## Replace all non-overlapping instances of old with new
+  # [[processors.strings.replace]]
+  #   measurement = "*"
+  #   old = ":"
+  #   new = "_"
+
+  ## Trims strings based on width
+  # [[processors.strings.left]]
+  #   field = "message"
+  #   width = 10
+
+  ## Decode a base64 encoded utf-8 string
+  # [[processors.strings.base64decode]]
+  #   field = "message"
+
+  ## Sanitize a string to ensure it is a valid utf-8 string
+  ## Each run of invalid UTF-8 byte sequences is replaced by the replacement string, which may be empty
+  # [[processors.strings.valid_utf8]]
+  #   field = "message"
+  #   replacement = ""
--- a/plugins/processors/strings/strings.go
+++ b/plugins/processors/strings/strings.go
@ -0,0 +1,289 @@
+//go:generate ../../../tools/readme_config_includer/generator
+package strings
+
+import (
+	_ "embed"
+	"encoding/base64"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+
+	"golang.org/x/text/cases"
+	"golang.org/x/text/language"
+
+	"github.com/influxdata/telegraf"
+	"github.com/influxdata/telegraf/plugins/processors"
+)
+
+//go:embed sample.conf
+var sampleConfig string
+
+type Strings struct {
+	Lowercase    []converter `toml:"lowercase"`
+	Uppercase    []converter `toml:"uppercase"`
+	Titlecase    []converter `toml:"titlecase"`
+	Trim         []converter `toml:"trim"`
+	TrimLeft     []converter `toml:"trim_left"`
+	TrimRight    []converter `toml:"trim_right"`
+	TrimPrefix   []converter `toml:"trim_prefix"`
+	TrimSuffix   []converter `toml:"trim_suffix"`
+	Replace      []converter `toml:"replace"`
+	Left         []converter `toml:"left"`
+	Base64Decode []converter `toml:"base64decode"`
+	ValidUTF8    []converter `toml:"valid_utf8"`
+
+	converters []converter
+	init       bool
+}
+
+type ConvertFunc func(s string) string
+
+type converter struct {
+	Field       string
+	FieldKey    string
+	Tag         string
+	TagKey      string
+	Measurement string
+	Dest        string
+	Cutset      string
+	Suffix      string
+	Prefix      string
+	Old         string
+	New         string
+	Width       int
+	Replacement string
+
+	fn ConvertFunc
+}
+
+func (c *converter) convertTag(metric telegraf.Metric) {
+	var tags map[string]string
+	if c.Tag == "*" {
+		tags = metric.Tags()
+	} else {
+		tags = make(map[string]string)
+		tv, ok := metric.GetTag(c.Tag)
+		if !ok {
+			return
+		}
+		tags[c.Tag] = tv
+	}
+
+	for dest, value := range tags {
+		if c.Tag != "*" && c.Dest != "" {
+			dest = c.Dest
+		}
+		metric.AddTag(dest, c.fn(value))
+	}
+}
+
+func (c *converter) convertTagKey(metric telegraf.Metric) {
+	var tags map[string]string
+	if c.TagKey == "*" {
+		tags = metric.Tags()
+	} else {
+		tags = make(map[string]string)
+		tv, ok := metric.GetTag(c.TagKey)
+		if !ok {
+			return
+		}
+		tags[c.TagKey] = tv
+	}
+
+	for key, value := range tags {
+		if k := c.fn(key); k != "" {
+			metric.RemoveTag(key)
+			metric.AddTag(k, value)
+		}
+	}
+}
+
+func (c *converter) convertField(metric telegraf.Metric) {
+	var fields map[string]interface{}
+	if c.Field == "*" {
+		fields = metric.Fields()
+	} else {
+		fields = make(map[string]interface{})
+		fv, ok := metric.GetField(c.Field)
+		if !ok {
+			return
+		}
+		fields[c.Field] = fv
+	}
+
+	for dest, value := range fields {
+		if c.Field != "*" && c.Dest != "" {
+			dest = c.Dest
+		}
+		if fv, ok := value.(string); ok {
+			metric.AddField(dest, c.fn(fv))
+		}
+	}
+}
+
+func (c *converter) convertFieldKey(metric telegraf.Metric) {
+	var fields map[string]interface{}
+	if c.FieldKey == "*" {
+		fields = metric.Fields()
+	} else {
+		fields = make(map[string]interface{})
+		fv, ok := metric.GetField(c.FieldKey)
+		if !ok {
+			return
+		}
+		fields[c.FieldKey] = fv
+	}
+
+	for key, value := range fields {
+		if k := c.fn(key); k != "" {
+			metric.RemoveField(key)
+			metric.AddField(k, value)
+		}
+	}
+}
+
+func (c *converter) convertMeasurement(metric telegraf.Metric) {
+	if metric.Name() != c.Measurement && c.Measurement != "*" {
+		return
+	}
+
+	metric.SetName(c.fn(metric.Name()))
+}
+
+func (c *converter) convert(metric telegraf.Metric) {
+	if c.Field != "" {
+		c.convertField(metric)
+	}
+
+	if c.FieldKey != "" {
+		c.convertFieldKey(metric)
+	}
+
+	if c.Tag != "" {
+		c.convertTag(metric)
+	}
+
+	if c.TagKey != "" {
+		c.convertTagKey(metric)
+	}
+
+	if c.Measurement != "" {
+		c.convertMeasurement(metric)
+	}
+}
+
+func (s *Strings) initOnce() {
+	if s.init {
+		return
+	}
+
+	s.converters = make([]converter, 0)
+	for _, c := range s.Lowercase {
+		c.fn = strings.ToLower
+		s.converters = append(s.converters, c)
+	}
+	for _, c := range s.Uppercase {
+		c.fn = strings.ToUpper
+		s.converters = append(s.converters, c)
+	}
+	for _, c := range s.Titlecase {
+		c.fn = func(s string) string {
+			return cases.Title(language.Und, cases.NoLower).String(s)
+		}
+		s.converters = append(s.converters, c)
+	}
+	for _, c := range s.Trim {
+		if c.Cutset != "" {
+			c.fn = func(s string) string { return strings.Trim(s, c.Cutset) }
+		} else {
+			c.fn = func(s string) string { return strings.TrimFunc(s, unicode.IsSpace) }
+		}
+		s.converters = append(s.converters, c)
+	}
+	for _, c := range s.TrimLeft {
+		if c.Cutset != "" {
+			c.fn = func(s string) string { return strings.TrimLeft(s, c.Cutset) }
+		} else {
+			c.fn = func(s string) string { return strings.TrimLeftFunc(s, unicode.IsSpace) }
+		}
+		s.converters = append(s.converters, c)
+	}
+	for _, c := range s.TrimRight {
+		if c.Cutset != "" {
+			c.fn = func(s string) string { return strings.TrimRight(s, c.Cutset) }
+		} else {
+			c.fn = func(s string) string { return strings.TrimRightFunc(s, unicode.IsSpace) }
+		}
+		s.converters = append(s.converters, c)
+	}
+	for _, c := range s.TrimPrefix {
+		c.fn = func(s string) string { return strings.TrimPrefix(s, c.Prefix) }
+		s.converters = append(s.converters, c)
+	}
+	for _, c := range s.TrimSuffix {
+		c.fn = func(s string) string { return strings.TrimSuffix(s, c.Suffix) }
+		s.converters = append(s.converters, c)
+	}
+	for _, c := range s.Replace {
+		c.fn = func(s string) string {
+			newString := strings.ReplaceAll(s, c.Old, c.New)
+			if newString == "" {
+				return s
+			}
+
+			return newString
+		}
+		s.converters = append(s.converters, c)
+	}
+	for _, c := range s.Left {
+		c.fn = func(s string) string {
+			if len(s) < c.Width {
+				return s
+			}
+
+			return s[:c.Width]
+		}
+		s.converters = append(s.converters, c)
+	}
+	for _, c := range s.Base64Decode {
+		c.fn = func(s string) string {
+			data, err := base64.StdEncoding.DecodeString(s)
+			if err != nil {
+				return s
+			}
+			if utf8.Valid(data) {
+				return string(data)
+			}
+			return s
+		}
+		s.converters = append(s.converters, c)
+	}
+	for _, c := range s.ValidUTF8 {
+		c.fn = func(s string) string { return strings.ToValidUTF8(s, c.Replacement) }
+		s.converters = append(s.converters, c)
+	}
+
+	s.init = true
+}
+
+func (*Strings) SampleConfig() string {
+	return sampleConfig
+}
+
+func (s *Strings) Apply(in ...telegraf.Metric) []telegraf.Metric {
+	s.initOnce()
+
+	for _, metric := range in {
+		for _, converter := range s.converters {
+			converter.convert(metric)
+		}
+	}
+
+	return in
+}
+
+func init() {
+	processors.Add("strings", func() telegraf.Processor {
+		return &Strings{}
+	})
+}
--- a/plugins/processors/strings/strings_test.go
+++ b/plugins/processors/strings/strings_test.go