Adding upstream version 1.34.4.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-05-24 07:26:29 +02:00 · 2025-05-24 07:26:29 +02:00 · 4978089aab
commit 4978089aab
parent e393c3af3f
4963 changed files with 677545 additions and 0 deletions
--- a/plugins/outputs/stackdriver/README.md
+++ b/plugins/outputs/stackdriver/README.md
@ -0,0 +1,115 @@
+# Google Cloud Monitoring Output Plugin
+
+This plugin writes metrics to a `project` in
+[Google Cloud Monitoring][stackdriver] (formerly called Stackdriver).
+[Authentication][authentication] with Google Cloud is required using either a
+service account or user credentials.
+
+> [!IMPORTANT]
+> This plugin accesses APIs which are [chargeable][pricing] and might incur
+> costs.
+
+By default, Metrics are grouped by the `namespace` variable and metric key,
+eg: `custom.googleapis.com/telegraf/system/load5`. However, this is not the
+best practice. Setting `metric_name_format = "official"` will produce a more
+easily queried format of: `metric_type_prefix/[namespace_]name_key/kind`. If
+the global namespace is not set, it is omitted as well.
+
+⭐ Telegraf v1.9.0
+🏷️ cloud, datastore
+💻 all
+
+## Global configuration options <!-- @/docs/includes/plugin_config.md -->
+
+In addition to the plugin-specific configuration settings, plugins support
+additional global and plugin configuration settings. These settings are used to
+modify metrics, tags, and field or create aliases and configure ordering, etc.
+See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
+
+[CONFIGURATION.md]: ../../../docs/CONFIGURATION.md#plugins
+
+## Configuration
+
+```toml @sample.conf
+# Configuration for Google Cloud Stackdriver to send metrics to
+[[outputs.stackdriver]]
+  ## GCP Project
+  project = "erudite-bloom-151019"
+
+  ## The namespace for the metric descriptor
+  ## This is optional and users are encouraged to set the namespace as a
+  ## resource label instead. If omitted it is not included in the metric name.
+  namespace = "telegraf"
+
+  ## Metric Type Prefix
+  ## The DNS name used with the metric type as a prefix.
+  # metric_type_prefix = "custom.googleapis.com"
+
+  ## Metric Name Format
+  ## Specifies the layout of the metric name, choose from:
+  ##  * path: 'metric_type_prefix_namespace_name_key'
+  ##  * official: 'metric_type_prefix/namespace_name_key/kind'
+  # metric_name_format = "path"
+
+  ## Metric Data Type
+  ## By default, telegraf will use whatever type the metric comes in as.
+  ## However, for some use cases, forcing int64, may be preferred for values:
+  ##   * source: use whatever was passed in
+  ##   * double: preferred datatype to allow queries by PromQL.
+  # metric_data_type = "source"
+
+  ## Tags as resource labels
+  ## Tags defined in this option, when they exist, are added as a resource
+  ## label and not included as a metric label. The values from tags override
+  ## the values defined under the resource_labels config options.
+  # tags_as_resource_label = []
+
+  ## Custom resource type
+  # resource_type = "generic_node"
+
+  ## Override metric type by metric name
+  ## Metric names matching the values here, globbing supported, will have the
+  ## metric type set to the corresponding type.
+  # metric_counter = []
+  # metric_gauge = []
+  # metric_histogram = []
+
+  ## NOTE: Due to the way TOML is parsed, tables must be at the END of the
+  ## plugin definition, otherwise additional config options are read as part of
+  ## the table
+
+  ## Additional resource labels
+  # [outputs.stackdriver.resource_labels]
+  #   node_id = "$HOSTNAME"
+  #   namespace = "myapp"
+  #   location = "eu-north0"
+```
+
+## Restrictions
+
+Stackdriver does not support string values in custom metrics, any string fields
+will not be written.
+
+The Stackdriver API does not allow writing points which are out of order, older
+than 24 hours, or more with resolution greater than than one per point minute.
+Since Telegraf writes the newest points first and moves backwards through the
+metric buffer, it may not be possible to write historical data after an
+interruption.
+
+Points collected with greater than 1 minute precision may need to be aggregated
+before then can be written.  Consider using the [basicstats][] aggregator to do
+this.
+
+Histograms are supported only via metrics generated via the Prometheus metric
+version 1 parser. The version 2 parser generates sparse metrics that would need
+to be heavily transformed before sending to Stackdriver.
+
+Note that the plugin keeps an in-memory cache of the start times and last
+observed values of all COUNTER metrics in order to comply with the requirements
+of the stackdriver API.  This cache is not GCed: if you remove a large number of
+counters from the input side, you may wish to restart telegraf to clear it.
+
+[basicstats]: /plugins/aggregators/basicstats/README.md
+[stackdriver]: https://cloud.google.com/monitoring/api/v3/
+[authentication]: https://cloud.google.com/docs/authentication/getting-started
+[pricing]: https://cloud.google.com/stackdriver/pricing#google-clouds-operations-suite-pricing
--- a/plugins/outputs/stackdriver/counter_cache.go
+++ b/plugins/outputs/stackdriver/counter_cache.go
@ -0,0 +1,99 @@
+package stackdriver
+
+import (
+	"path"
+	"sort"
+	"strings"
+	"sync"
+	"time"
+
+	monpb "cloud.google.com/go/monitoring/apiv3/v2/monitoringpb"
+	tspb "google.golang.org/protobuf/types/known/timestamppb"
+
+	"github.com/influxdata/telegraf"
+)
+
+type counterCache struct {
+	sync.RWMutex
+	cache map[string]*counterCacheEntry
+	log   telegraf.Logger
+}
+
+type counterCacheEntry struct {
+	LastValue *monpb.TypedValue
+	StartTime *tspb.Timestamp
+}
+
+func (cce *counterCacheEntry) Reset(ts *tspb.Timestamp) {
+	// always backdate a reset by -1ms, otherwise stackdriver's API will hate us
+	cce.StartTime = tspb.New(ts.AsTime().Add(time.Millisecond * -1))
+}
+
+func (cc *counterCache) get(key string) (*counterCacheEntry, bool) {
+	cc.RLock()
+	defer cc.RUnlock()
+	value, ok := cc.cache[key]
+	return value, ok
+}
+
+func (cc *counterCache) set(key string, value *counterCacheEntry) {
+	cc.Lock()
+	defer cc.Unlock()
+	cc.cache[key] = value
+}
+
+func (cc *counterCache) GetStartTime(key string, value *monpb.TypedValue, endTime *tspb.Timestamp) *tspb.Timestamp {
+	lastObserved, ok := cc.get(key)
+
+	// init: create a new key, backdate the state time to 1ms before the end time
+	if !ok {
+		newEntry := NewCounterCacheEntry(value, endTime)
+		cc.set(key, newEntry)
+		return newEntry.StartTime
+	}
+
+	// update of existing entry
+	if value.GetDoubleValue() < lastObserved.LastValue.GetDoubleValue() || value.GetInt64Value() < lastObserved.LastValue.GetInt64Value() {
+		// counter reset
+		lastObserved.Reset(endTime)
+	} else {
+		// counter increment
+		//
+		// ...but...
+		// start times cannot be over 25 hours old; reset after 1 day to be safe
+		age := endTime.GetSeconds() - lastObserved.StartTime.GetSeconds()
+		if age > 86400 {
+			lastObserved.Reset(endTime)
+		}
+	}
+	// update last observed value
+	lastObserved.LastValue = value
+	return lastObserved.StartTime
+}
+
+func NewCounterCache(log telegraf.Logger) *counterCache {
+	return &counterCache{
+		cache: make(map[string]*counterCacheEntry),
+		log:   log}
+}
+
+func NewCounterCacheEntry(value *monpb.TypedValue, ts *tspb.Timestamp) *counterCacheEntry {
+	// Start times must be _before_ the end time, so backdate our original start time
+	// to 1ms before the observed time.
+	backDatedStart := ts.AsTime().Add(time.Millisecond * -1)
+	return &counterCacheEntry{LastValue: value, StartTime: tspb.New(backDatedStart)}
+}
+
+func GetCounterCacheKey(m telegraf.Metric, f *telegraf.Field) string {
+	// normalize tag list to form a predictable key
+	tags := make([]string, 0, len(m.TagList()))
+	for _, t := range m.TagList() {
+		tags = append(tags, strings.Join([]string{t.Key, t.Value}, "="))
+	}
+	sort.Strings(tags)
+	key := ""
+	if f != nil {
+		key = f.Key
+	}
+	return path.Join(m.Name(), strings.Join(tags, "/"), key)
+}
--- a/plugins/outputs/stackdriver/counter_cache_test.go
+++ b/plugins/outputs/stackdriver/counter_cache_test.go
@ -0,0 +1,166 @@
+package stackdriver
+
+import (
+	"testing"
+	"time"
+
+	monpb "cloud.google.com/go/monitoring/apiv3/v2/monitoringpb"
+	tspb "google.golang.org/protobuf/types/known/timestamppb"
+
+	"github.com/influxdata/telegraf/logger"
+)
+
+func TestCreateCounterCacheEntry(t *testing.T) {
+	cc := NewCounterCache(logger.New("outputs", "stackdriver", "TestCreateCounterCacheEntry"))
+	value := &monpb.TypedValue{
+		Value: &monpb.TypedValue_Int64Value{
+			Int64Value: int64(1),
+		},
+	}
+	endTime := tspb.Now()
+	startTime := cc.GetStartTime("key", value, endTime)
+	if endTime.AsTime().Add(time.Millisecond*-1) != startTime.AsTime() {
+		t.Fatal("Start time on a new entry should be 1ms behind the end time")
+	}
+}
+
+func TestUpdateCounterCacheEntry(t *testing.T) {
+	cc := NewCounterCache(logger.New("outputs", "stackdriver", "TestUpdateCounterCacheEntry"))
+	now := time.Now().UTC()
+	value := &monpb.TypedValue{
+		Value: &monpb.TypedValue_Int64Value{
+			Int64Value: int64(1),
+		},
+	}
+	endTime := tspb.New(now)
+	startTime := cc.GetStartTime("key", value, endTime)
+	if endTime.AsTime().Add(time.Millisecond*-1) != startTime.AsTime() {
+		t.Fatal("Start time on a new entry should be 1ms behind the end time")
+	}
+
+	// next observation, 1m later
+	value = &monpb.TypedValue{
+		Value: &monpb.TypedValue_Int64Value{
+			Int64Value: int64(2),
+		},
+	}
+	endTime = tspb.New(now.Add(time.Second * 60))
+	startTime = cc.GetStartTime("key", value, endTime)
+	// startTime is unchanged
+	if startTime.GetSeconds() != now.Unix() {
+		t.Fatal("Returned start time on an updated counter on the same day should not change")
+	}
+	obs, ok := cc.get("key")
+	if !ok {
+		t.Fatal("GetStartTime should create a fetchable k/v")
+	}
+	if obs.StartTime != startTime {
+		t.Fatal("Start time on fetched observation should match output from GetStartTime()")
+	}
+	if obs.LastValue != value {
+		t.Fatal("Stored value on fetched observation should have been updated.")
+	}
+}
+
+func TestCounterCounterCacheEntryReset(t *testing.T) {
+	cc := NewCounterCache(logger.New("outputs", "stackdriver", "TestCounterCounterCacheEntryReset"))
+	now := time.Now().UTC()
+	backdatedNow := now.Add(time.Millisecond * -1)
+	value := &monpb.TypedValue{
+		Value: &monpb.TypedValue_Int64Value{
+			Int64Value: int64(2),
+		},
+	}
+	endTime := tspb.New(now)
+	startTime := cc.GetStartTime("key", value, endTime)
+	if startTime.AsTime() != backdatedNow {
+		t.Fatal("Start time on a new entry should be 1ms behind the end time")
+	}
+
+	// next observation, 1m later, but a lower value
+	value = &monpb.TypedValue{
+		Value: &monpb.TypedValue_Int64Value{
+			Int64Value: int64(1),
+		},
+	}
+	later := now.Add(time.Second * 60)
+	endTime = tspb.New(later)
+	startTime = cc.GetStartTime("key", value, endTime)
+	// startTime should now be the new endTime -1ms
+	if startTime.AsTime() != later.Add(time.Millisecond*-1) {
+		t.Fatal("Returned start time after a counter reset should equal the end time minus 1ms")
+	}
+	obs, ok := cc.get("key")
+	if !ok {
+		t.Fatal("GetStartTime should create a fetchable k/v")
+	}
+	if obs.StartTime.AsTime() != endTime.AsTime().Add(time.Millisecond*-1) {
+		t.Fatal("Start time on fetched observation after a counter reset should equal the end time minus 1ms")
+	}
+	if obs.LastValue != value {
+		t.Fatal("Stored value on fetched observation should have been updated.")
+	}
+}
+
+func TestCounterCacheDayRollover(t *testing.T) {
+	cc := NewCounterCache(logger.New("outputs", "stackdriver", "TestCounterCacheDayRollover"))
+	now := time.Now().UTC()
+	backdatedNow := now.Add(time.Millisecond * -1)
+	value := &monpb.TypedValue{
+		Value: &monpb.TypedValue_Int64Value{
+			Int64Value: int64(1),
+		},
+	}
+	endTime := tspb.New(now)
+	startTime := cc.GetStartTime("key", value, endTime)
+	if startTime.AsTime() != backdatedNow {
+		t.Fatal("Start time on a new entry should be 1ms behind the end time")
+	}
+
+	// next observation, 24h later
+	value = &monpb.TypedValue{
+		Value: &monpb.TypedValue_Int64Value{
+			Int64Value: int64(2),
+		},
+	}
+	later := now.Add(time.Hour * 24)
+	endTime = tspb.New(later)
+	startTime = cc.GetStartTime("key", value, endTime)
+	if startTime.AsTime() != backdatedNow {
+		t.Fatalf("Returned start time %d 1s before a day rollover should equal the end time %d", startTime.GetSeconds(), now.Unix())
+	}
+	obs, ok := cc.get("key")
+	if !ok {
+		t.Fatal("GetStartTime should create a fetchable k/v")
+	}
+	if obs.StartTime.AsTime() != backdatedNow {
+		t.Fatal("Start time on an updated counter 1s before a day rollover should be unchanged")
+	}
+	if obs.LastValue != value {
+		t.Fatal("Stored value on an updated counter should have been updated.")
+	}
+
+	// next observation, 24h 1s later
+	value = &monpb.TypedValue{
+		Value: &monpb.TypedValue_Int64Value{
+			Int64Value: int64(3),
+		},
+	}
+	tomorrow := later.Add(time.Second * 1)
+	endTime = tspb.New(tomorrow)
+	startTime = cc.GetStartTime("key", value, endTime)
+	// startTime should now be the new endTime
+	if startTime.GetSeconds() != tomorrow.Unix() {
+		t.Fatalf("Returned start time %d after a day rollover should equal the end time %d", startTime.GetSeconds(), tomorrow.Unix())
+	}
+	obs, ok = cc.get("key")
+	if !ok {
+		t.Fatal("GetStartTime should create a fetchable k/v")
+	}
+	if obs.StartTime.AsTime() != endTime.AsTime().Add(time.Millisecond*-1) {
+		t.Fatal("Start time on fetched observation after a day rollover should equal the new end time -1ms")
+	}
+	if obs.LastValue != value {
+		t.Fatal("Stored value on fetched observation should have been updated.")
+	}
+}
--- a/plugins/outputs/stackdriver/sample.conf
+++ b/plugins/outputs/stackdriver/sample.conf
@ -0,0 +1,52 @@
+# Configuration for Google Cloud Stackdriver to send metrics to
+[[outputs.stackdriver]]
+  ## GCP Project
+  project = "erudite-bloom-151019"
+
+  ## The namespace for the metric descriptor
+  ## This is optional and users are encouraged to set the namespace as a
+  ## resource label instead. If omitted it is not included in the metric name.
+  namespace = "telegraf"
+
+  ## Metric Type Prefix
+  ## The DNS name used with the metric type as a prefix.
+  # metric_type_prefix = "custom.googleapis.com"
+
+  ## Metric Name Format
+  ## Specifies the layout of the metric name, choose from:
+  ##  * path: 'metric_type_prefix_namespace_name_key'
+  ##  * official: 'metric_type_prefix/namespace_name_key/kind'
+  # metric_name_format = "path"
+
+  ## Metric Data Type
+  ## By default, telegraf will use whatever type the metric comes in as.
+  ## However, for some use cases, forcing int64, may be preferred for values:
+  ##   * source: use whatever was passed in
+  ##   * double: preferred datatype to allow queries by PromQL.
+  # metric_data_type = "source"
+
+  ## Tags as resource labels
+  ## Tags defined in this option, when they exist, are added as a resource
+  ## label and not included as a metric label. The values from tags override
+  ## the values defined under the resource_labels config options.
+  # tags_as_resource_label = []
+
+  ## Custom resource type
+  # resource_type = "generic_node"
+
+  ## Override metric type by metric name
+  ## Metric names matching the values here, globbing supported, will have the
+  ## metric type set to the corresponding type.
+  # metric_counter = []
+  # metric_gauge = []
+  # metric_histogram = []
+
+  ## NOTE: Due to the way TOML is parsed, tables must be at the END of the
+  ## plugin definition, otherwise additional config options are read as part of
+  ## the table
+
+  ## Additional resource labels
+  # [outputs.stackdriver.resource_labels]
+  #   node_id = "$HOSTNAME"
+  #   namespace = "myapp"
+  #   location = "eu-north0"
--- a/plugins/outputs/stackdriver/stackdriver.go
+++ b/plugins/outputs/stackdriver/stackdriver.go
@ -0,0 +1,694 @@
+//go:generate ../../../tools/readme_config_includer/generator
+package stackdriver
+
+import (
+	"context"
+	_ "embed"
+	"errors"
+	"fmt"
+	"hash/fnv"
+	"path"
+	"sort"
+	"strconv"
+	"strings"
+
+	monitoring "cloud.google.com/go/monitoring/apiv3/v2"
+	"cloud.google.com/go/monitoring/apiv3/v2/monitoringpb"
+	"google.golang.org/api/option"
+	"google.golang.org/genproto/googleapis/api/distribution"
+	metricpb "google.golang.org/genproto/googleapis/api/metric"
+	monitoredrespb "google.golang.org/genproto/googleapis/api/monitoredres"
+	"google.golang.org/grpc/status"
+	"google.golang.org/protobuf/types/known/timestamppb"
+
+	"github.com/influxdata/telegraf"
+	"github.com/influxdata/telegraf/filter"
+	"github.com/influxdata/telegraf/internal"
+	"github.com/influxdata/telegraf/plugins/outputs"
+)
+
+//go:embed sample.conf
+var sampleConfig string
+
+// Stackdriver is the Google Stackdriver config info.
+type Stackdriver struct {
+	Project              string            `toml:"project"`
+	Namespace            string            `toml:"namespace"`
+	ResourceType         string            `toml:"resource_type"`
+	ResourceLabels       map[string]string `toml:"resource_labels"`
+	MetricTypePrefix     string            `toml:"metric_type_prefix"`
+	MetricNameFormat     string            `toml:"metric_name_format"`
+	MetricDataType       string            `toml:"metric_data_type"`
+	TagsAsResourceLabels []string          `toml:"tags_as_resource_label"`
+	MetricCounter        []string          `toml:"metric_counter"`
+	MetricGauge          []string          `toml:"metric_gauge"`
+	MetricHistogram      []string          `toml:"metric_histogram"`
+	Log                  telegraf.Logger   `toml:"-"`
+
+	client          *monitoring.MetricClient
+	counterCache    *counterCache
+	filterCounter   filter.Filter
+	filterGauge     filter.Filter
+	filterHistogram filter.Filter
+}
+
+const (
+	// The user-defined limits are documented below:
+	// https://cloud.google.com/monitoring/quotas#custom_metrics_quotas
+
+	// QuotaLabelsPerMetricDescriptor is the limit
+	// to labels (tags) per metric descriptor.
+	QuotaLabelsPerMetricDescriptor = 30
+	// QuotaStringLengthForLabelKey is the limit
+	// to string length for label key.
+	QuotaStringLengthForLabelKey = 100
+	// QuotaStringLengthForLabelValue is the limit
+	// to string length for label value.
+	QuotaStringLengthForLabelValue = 1024
+
+	// MaxInt is the max int64 value.
+	MaxInt = int(^uint(0) >> 1)
+)
+
+func (s *Stackdriver) Init() error {
+	if s.MetricTypePrefix == "" {
+		s.MetricTypePrefix = "custom.googleapis.com"
+	}
+
+	switch s.MetricNameFormat {
+	case "":
+		s.MetricNameFormat = "path"
+	case "path", "official":
+	default:
+		return fmt.Errorf("unrecognized metric name format: %s", s.MetricNameFormat)
+	}
+
+	switch s.MetricDataType {
+	case "":
+		s.MetricDataType = "source"
+	case "source", "double":
+	default:
+		return fmt.Errorf("unrecognized metric data type: %s", s.MetricDataType)
+	}
+
+	var err error
+	s.filterCounter, err = filter.Compile(s.MetricCounter)
+	if err != nil {
+		return fmt.Errorf("creating counter filter failed: %w", err)
+	}
+	s.filterGauge, err = filter.Compile(s.MetricGauge)
+	if err != nil {
+		return fmt.Errorf("creating gauge filter failed: %w", err)
+	}
+	s.filterHistogram, err = filter.Compile(s.MetricHistogram)
+	if err != nil {
+		return fmt.Errorf("creating histogram filter failed: %w", err)
+	}
+
+	return nil
+}
+
+func (*Stackdriver) SampleConfig() string {
+	return sampleConfig
+}
+
+// Connect initiates the primary connection to the GCP project.
+func (s *Stackdriver) Connect() error {
+	if s.Project == "" {
+		return errors.New("project is a required field for stackdriver output")
+	}
+
+	if s.Namespace == "" {
+		s.Log.Warn("plugin-level namespace is empty")
+	}
+
+	if s.ResourceType == "" {
+		s.ResourceType = "global"
+	}
+
+	if s.ResourceLabels == nil {
+		s.ResourceLabels = make(map[string]string, 1)
+	}
+
+	if s.counterCache == nil {
+		s.counterCache = NewCounterCache(s.Log)
+	}
+
+	s.ResourceLabels["project_id"] = s.Project
+
+	if s.client == nil {
+		ctx := context.Background()
+		client, err := monitoring.NewMetricClient(ctx, option.WithUserAgent(internal.ProductToken()))
+		if err != nil {
+			return err
+		}
+		s.client = client
+	}
+
+	return nil
+}
+
+// Sorted returns a copy of the metrics in time ascending order.  A copy is
+// made to avoid modifying the input metric slice since doing so is not
+// allowed.
+func sorted(metrics []telegraf.Metric) []telegraf.Metric {
+	batch := make([]telegraf.Metric, 0, len(metrics))
+	for i := len(metrics) - 1; i >= 0; i-- {
+		batch = append(batch, metrics[i])
+	}
+	sort.Slice(batch, func(i, j int) bool {
+		return batch[i].Time().Before(batch[j].Time())
+	})
+	return batch
+}
+
+type timeSeriesBuckets map[uint64][]*monitoringpb.TimeSeries
+
+func (tsb timeSeriesBuckets) Add(m telegraf.Metric, f []*telegraf.Field, ts *monitoringpb.TimeSeries) {
+	h := fnv.New64a()
+	h.Write([]byte(m.Name()))
+	h.Write([]byte{'\n'})
+	for _, field := range f {
+		h.Write([]byte(field.Key))
+		h.Write([]byte{'\n'})
+	}
+	for key, value := range m.Tags() {
+		h.Write([]byte(key))
+		h.Write([]byte{'\n'})
+		h.Write([]byte(value))
+		h.Write([]byte{'\n'})
+	}
+	k := h.Sum64()
+
+	s := tsb[k]
+	s = append(s, ts)
+	tsb[k] = s
+}
+
+// Split metrics up by timestamp and send to Google Cloud Stackdriver
+func (s *Stackdriver) Write(metrics []telegraf.Metric) error {
+	metricBatch := make(map[int64][]telegraf.Metric)
+	timestamps := make([]int64, 0, len(metrics))
+	for _, metric := range sorted(metrics) {
+		timestamp := metric.Time().UnixNano()
+		if existingSlice, ok := metricBatch[timestamp]; ok {
+			metricBatch[timestamp] = append(existingSlice, metric)
+		} else {
+			metricBatch[timestamp] = []telegraf.Metric{metric}
+			timestamps = append(timestamps, timestamp)
+		}
+	}
+
+	// sort the timestamps we collected
+	sort.Slice(timestamps, func(i, j int) bool { return timestamps[i] < timestamps[j] })
+
+	s.Log.Debugf("received %d metrics\n", len(metrics))
+	s.Log.Debugf("split into %d groups by timestamp\n", len(metricBatch))
+	for _, timestamp := range timestamps {
+		if err := s.sendBatch(metricBatch[timestamp]); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+// Write the metrics to Google Cloud Stackdriver.
+func (s *Stackdriver) sendBatch(batch []telegraf.Metric) error {
+	ctx := context.Background()
+
+	buckets := make(timeSeriesBuckets)
+	for _, m := range batch {
+		// Set metric types based on user-provided filter
+		metricType := m.Type()
+		if s.filterCounter != nil && s.filterCounter.Match(m.Name()) {
+			metricType = telegraf.Counter
+		}
+		if s.filterGauge != nil && s.filterGauge.Match(m.Name()) {
+			metricType = telegraf.Gauge
+		}
+		if s.filterHistogram != nil && s.filterHistogram.Match(m.Name()) {
+			metricType = telegraf.Histogram
+		}
+
+		metricKind, err := getStackdriverMetricKind(metricType)
+		if err != nil {
+			s.Log.Errorf("Get kind for metric %q (%T) failed: %s", m.Name(), metricType, err)
+			continue
+		}
+
+		// Convert any declared tag to a resource label and remove it from
+		// the metric
+		resourceLabels := make(map[string]string, len(s.ResourceLabels)+len(s.TagsAsResourceLabels))
+		for k, v := range s.ResourceLabels {
+			resourceLabels[k] = v
+		}
+		for _, tag := range s.TagsAsResourceLabels {
+			if val, ok := m.GetTag(tag); ok {
+				resourceLabels[tag] = val
+				m.RemoveTag(tag)
+			}
+		}
+
+		if m.Type() == telegraf.Histogram {
+			value, err := buildHistogram(m)
+			if err != nil {
+				s.Log.Errorf("Unable to build distribution from metric %s: %s", m, err)
+				continue
+			}
+
+			startTime, endTime := getStackdriverIntervalEndpoints(metricKind, value, m, nil, s.counterCache)
+			timeInterval, err := getStackdriverTimeInterval(metricKind, startTime, endTime)
+			if err != nil {
+				s.Log.Errorf("Get time interval failed: %s", err)
+				continue
+			}
+
+			// Prepare an individual data point.
+			dataPoint := &monitoringpb.Point{
+				Interval: timeInterval,
+				Value:    value,
+			}
+
+			// Prepare time series.
+			timeSeries := &monitoringpb.TimeSeries{
+				Metric: &metricpb.Metric{
+					Type:   s.generateHistogramName(m),
+					Labels: s.getStackdriverLabels(m.TagList()),
+				},
+				MetricKind: metricKind,
+				Resource: &monitoredrespb.MonitoredResource{
+					Type:   s.ResourceType,
+					Labels: resourceLabels,
+				},
+				Points: []*monitoringpb.Point{
+					dataPoint,
+				},
+			}
+
+			buckets.Add(m, m.FieldList(), timeSeries)
+			continue
+		}
+
+		for _, f := range m.FieldList() {
+			value, err := s.getStackdriverTypedValue(f.Value)
+			if err != nil {
+				s.Log.Errorf("Get type failed: %q", err)
+				continue
+			}
+			if value == nil {
+				continue
+			}
+
+			startTime, endTime := getStackdriverIntervalEndpoints(metricKind, value, m, f, s.counterCache)
+			timeInterval, err := getStackdriverTimeInterval(metricKind, startTime, endTime)
+			if err != nil {
+				s.Log.Errorf("Get time interval failed: %s", err)
+				continue
+			}
+
+			// Prepare an individual data point.
+			dataPoint := &monitoringpb.Point{
+				Interval: timeInterval,
+				Value:    value,
+			}
+
+			// Prepare time series.
+			timeSeries := &monitoringpb.TimeSeries{
+				Metric: &metricpb.Metric{
+					Type:   s.generateMetricName(m, metricType, f.Key),
+					Labels: s.getStackdriverLabels(m.TagList()),
+				},
+				MetricKind: metricKind,
+				Resource: &monitoredrespb.MonitoredResource{
+					Type:   s.ResourceType,
+					Labels: resourceLabels,
+				},
+				Points: []*monitoringpb.Point{
+					dataPoint,
+				},
+			}
+
+			buckets.Add(m, []*telegraf.Field{f}, timeSeries)
+
+			// If the metric is untyped, it will end with unknown. We will also
+			// send another metric with the unknown:counter suffix. Google will
+			// do some heuristics to know which one to use for queries. This
+			// only occurs when using the official name format.
+			if s.MetricNameFormat == "official" && strings.HasSuffix(timeSeries.Metric.Type, "unknown") {
+				metricKind := metricpb.MetricDescriptor_CUMULATIVE
+				startTime, endTime := getStackdriverIntervalEndpoints(metricKind, value, m, f, s.counterCache)
+				timeInterval, err := getStackdriverTimeInterval(metricKind, startTime, endTime)
+				if err != nil {
+					s.Log.Errorf("Get time interval failed: %s", err)
+					continue
+				}
+				dataPoint := &monitoringpb.Point{
+					Interval: timeInterval,
+					Value:    value,
+				}
+
+				counterTimeSeries := &monitoringpb.TimeSeries{
+					Metric: &metricpb.Metric{
+						Type:   s.generateMetricName(m, metricType, f.Key) + ":counter",
+						Labels: s.getStackdriverLabels(m.TagList()),
+					},
+					MetricKind: metricpb.MetricDescriptor_CUMULATIVE,
+					Resource: &monitoredrespb.MonitoredResource{
+						Type:   s.ResourceType,
+						Labels: resourceLabels,
+					},
+					Points: []*monitoringpb.Point{
+						dataPoint,
+					},
+				}
+				buckets.Add(m, []*telegraf.Field{f}, counterTimeSeries)
+			}
+		}
+	}
+
+	// process the buckets in order
+	keys := make([]uint64, 0, len(buckets))
+	for k := range buckets {
+		keys = append(keys, k)
+	}
+	sort.Slice(keys, func(i, j int) bool { return keys[i] < keys[j] })
+
+	for len(buckets) != 0 {
+		// can send up to 200 time series to stackdriver
+		timeSeries := make([]*monitoringpb.TimeSeries, 0, 200)
+		for i := 0; i < len(keys) && len(timeSeries) < cap(timeSeries); i++ {
+			k := keys[i]
+			s := buckets[k]
+			timeSeries = append(timeSeries, s[0])
+			if len(s) == 1 {
+				delete(buckets, k)
+				keys = append(keys[:i], keys[i+1:]...)
+				i--
+				continue
+			}
+
+			s = s[1:]
+			buckets[k] = s
+		}
+
+		// Prepare time series request.
+		timeSeriesRequest := &monitoringpb.CreateTimeSeriesRequest{
+			Name:       "projects/" + s.Project,
+			TimeSeries: timeSeries,
+		}
+
+		// Create the time series in Stackdriver.
+		err := s.client.CreateTimeSeries(ctx, timeSeriesRequest)
+		if err != nil {
+			if errStatus, ok := status.FromError(err); ok {
+				if errStatus.Code().String() == "InvalidArgument" {
+					s.Log.Warnf("Unable to write to Stackdriver - dropping metrics: %s", err)
+					return nil
+				}
+			}
+
+			s.Log.Errorf("Unable to write to Stackdriver: %s", err)
+			return err
+		}
+	}
+
+	return nil
+}
+
+func (s *Stackdriver) generateMetricName(m telegraf.Metric, metricType telegraf.ValueType, key string) string {
+	if s.MetricNameFormat == "path" {
+		return path.Join(s.MetricTypePrefix, s.Namespace, m.Name(), key)
+	}
+
+	name := m.Name() + "_" + key
+	if s.Namespace != "" {
+		name = s.Namespace + "_" + m.Name() + "_" + key
+	}
+
+	var kind string
+	switch metricType {
+	case telegraf.Gauge:
+		kind = "gauge"
+	case telegraf.Untyped:
+		kind = "unknown"
+	case telegraf.Counter:
+		kind = "counter"
+	case telegraf.Histogram:
+		kind = "histogram"
+	default:
+		kind = ""
+	}
+
+	return path.Join(s.MetricTypePrefix, name, kind)
+}
+
+func (s *Stackdriver) generateHistogramName(m telegraf.Metric) string {
+	if s.MetricNameFormat == "path" {
+		return path.Join(s.MetricTypePrefix, s.Namespace, m.Name())
+	}
+
+	name := m.Name()
+	if s.Namespace != "" {
+		name = s.Namespace + "_" + m.Name()
+	}
+
+	return path.Join(s.MetricTypePrefix, name, "histogram")
+}
+
+func getStackdriverIntervalEndpoints(
+	kind metricpb.MetricDescriptor_MetricKind,
+	value *monitoringpb.TypedValue,
+	m telegraf.Metric,
+	f *telegraf.Field,
+	cc *counterCache,
+) (start, end *timestamppb.Timestamp) {
+	endTime := timestamppb.New(m.Time())
+	var startTime *timestamppb.Timestamp
+	if kind == metricpb.MetricDescriptor_CUMULATIVE {
+		// Interval starts for stackdriver CUMULATIVE metrics must reset any time
+		// the counter resets, so we keep a cache of the start times and last
+		// observed values for each counter in the batch.
+		startTime = cc.GetStartTime(GetCounterCacheKey(m, f), value, endTime)
+	}
+	return startTime, endTime
+}
+
+func getStackdriverTimeInterval(m metricpb.MetricDescriptor_MetricKind, startTime, endTime *timestamppb.Timestamp) (*monitoringpb.TimeInterval, error) {
+	switch m {
+	case metricpb.MetricDescriptor_GAUGE:
+		return &monitoringpb.TimeInterval{
+			EndTime: endTime,
+		}, nil
+	case metricpb.MetricDescriptor_CUMULATIVE:
+		return &monitoringpb.TimeInterval{
+			StartTime: startTime,
+			EndTime:   endTime,
+		}, nil
+	case metricpb.MetricDescriptor_DELTA, metricpb.MetricDescriptor_METRIC_KIND_UNSPECIFIED:
+		fallthrough
+	default:
+		return nil, fmt.Errorf("unsupported metric kind %T", m)
+	}
+}
+
+func getStackdriverMetricKind(vt telegraf.ValueType) (metricpb.MetricDescriptor_MetricKind, error) {
+	switch vt {
+	case telegraf.Untyped:
+		return metricpb.MetricDescriptor_GAUGE, nil
+	case telegraf.Gauge:
+		return metricpb.MetricDescriptor_GAUGE, nil
+	case telegraf.Counter:
+		return metricpb.MetricDescriptor_CUMULATIVE, nil
+	case telegraf.Histogram:
+		return metricpb.MetricDescriptor_CUMULATIVE, nil
+	case telegraf.Summary:
+		fallthrough
+	default:
+		return metricpb.MetricDescriptor_METRIC_KIND_UNSPECIFIED, fmt.Errorf("unsupported telegraf value type: %T", vt)
+	}
+}
+
+func (s *Stackdriver) getStackdriverTypedValue(value interface{}) (*monitoringpb.TypedValue, error) {
+	if s.MetricDataType == "double" {
+		v, err := internal.ToFloat64(value)
+		if err != nil {
+			return nil, err
+		}
+
+		return &monitoringpb.TypedValue{
+			Value: &monitoringpb.TypedValue_DoubleValue{
+				DoubleValue: v,
+			},
+		}, nil
+	}
+
+	switch v := value.(type) {
+	case uint64:
+		if v <= uint64(MaxInt) {
+			return &monitoringpb.TypedValue{
+				Value: &monitoringpb.TypedValue_Int64Value{
+					Int64Value: int64(v),
+				},
+			}, nil
+		}
+		return &monitoringpb.TypedValue{
+			Value: &monitoringpb.TypedValue_Int64Value{
+				Int64Value: int64(MaxInt),
+			},
+		}, nil
+	case int64:
+		return &monitoringpb.TypedValue{
+			Value: &monitoringpb.TypedValue_Int64Value{
+				Int64Value: v,
+			},
+		}, nil
+	case float64:
+		return &monitoringpb.TypedValue{
+			Value: &monitoringpb.TypedValue_DoubleValue{
+				DoubleValue: v,
+			},
+		}, nil
+	case bool:
+		return &monitoringpb.TypedValue{
+			Value: &monitoringpb.TypedValue_BoolValue{
+				BoolValue: v,
+			},
+		}, nil
+	case string:
+		// String value types are not available for custom metrics
+		return nil, nil
+	default:
+		return nil, fmt.Errorf("value type \"%T\" not supported for stackdriver custom metrics", v)
+	}
+}
+
+func buildHistogram(m telegraf.Metric) (*monitoringpb.TypedValue, error) {
+	sumInter, ok := m.GetField("sum")
+	if !ok {
+		return nil, errors.New("no sum field present")
+	}
+	sum, err := internal.ToFloat64(sumInter)
+	if err != nil {
+		return nil, fmt.Errorf("unable to convert sum value to float64: %w", err)
+	}
+	m.RemoveField("sum")
+
+	countInter, ok := m.GetField("count")
+	if !ok {
+		return nil, errors.New("no count field present")
+	}
+	count, err := internal.ToFloat64(countInter)
+	if err != nil {
+		return nil, fmt.Errorf("unable to convert count value to float64: %w", err)
+	}
+	m.RemoveField("count")
+
+	// Build map of the buckets and their values
+	buckets := make([]float64, 0)
+	bucketCounts := make([]int64, 0)
+	for _, field := range m.FieldList() {
+		// Add the +inf value to bucket counts, no need to define a bound
+		if strings.Contains(strings.ToLower(field.Key), "+inf") {
+			count, err := internal.ToInt64(field.Value)
+			if err != nil {
+				continue
+			}
+			bucketCounts = append(bucketCounts, count)
+			continue
+		}
+
+		bucket, err := strconv.ParseFloat(field.Key, 64)
+		if err != nil {
+			continue
+		}
+
+		count, err := internal.ToInt64(field.Value)
+		if err != nil {
+			continue
+		}
+
+		buckets = append(buckets, bucket)
+		bucketCounts = append(bucketCounts, count)
+	}
+
+	sort.Slice(buckets, func(i, j int) bool {
+		return buckets[i] < buckets[j]
+	})
+	sort.Slice(bucketCounts, func(i, j int) bool {
+		return bucketCounts[i] < bucketCounts[j]
+	})
+
+	// Bucket counts contain the count for a specific bucket, not the running
+	// total like Prometheus histograms use. Loop backwards to determine the
+	// count of each bucket rather than the running total count.
+	for i := len(bucketCounts) - 1; i > 0; i-- {
+		bucketCounts[i] = bucketCounts[i] - bucketCounts[i-1]
+	}
+
+	v := &monitoringpb.TypedValue{
+		Value: &monitoringpb.TypedValue_DistributionValue{
+			DistributionValue: &distribution.Distribution{
+				Count:        int64(count),
+				Mean:         sum / count,
+				BucketCounts: bucketCounts,
+				BucketOptions: &distribution.Distribution_BucketOptions{
+					Options: &distribution.Distribution_BucketOptions_ExplicitBuckets{
+						ExplicitBuckets: &distribution.Distribution_BucketOptions_Explicit{
+							Bounds: buckets,
+						},
+					},
+				},
+			},
+		},
+	}
+
+	return v, nil
+}
+
+func (s *Stackdriver) getStackdriverLabels(tags []*telegraf.Tag) map[string]string {
+	labels := make(map[string]string)
+	for _, t := range tags {
+		labels[t.Key] = t.Value
+	}
+	for k, v := range labels {
+		if len(k) > QuotaStringLengthForLabelKey {
+			s.Log.Warnf("Removing tag %q key exceeds string length for label key [%d]", k, QuotaStringLengthForLabelKey)
+			delete(labels, k)
+			continue
+		}
+		if len(v) > QuotaStringLengthForLabelValue {
+			s.Log.Warnf("Removing tag %q value exceeds string length for label value [%d]", k, QuotaStringLengthForLabelValue)
+			delete(labels, k)
+			continue
+		}
+	}
+	if len(labels) > QuotaLabelsPerMetricDescriptor {
+		excess := len(labels) - QuotaLabelsPerMetricDescriptor
+		s.Log.Warnf("Tag count [%d] exceeds quota for stackdriver labels [%d] removing [%d] random tags", len(labels), QuotaLabelsPerMetricDescriptor, excess)
+		for k := range labels {
+			if excess == 0 {
+				break
+			}
+			excess--
+			delete(labels, k)
+		}
+	}
+
+	return labels
+}
+
+// Close will terminate the session to the backend, returning error if an issue arises.
+func (s *Stackdriver) Close() error {
+	return s.client.Close()
+}
+
+func newStackdriver() *Stackdriver {
+	return &Stackdriver{}
+}
+
+func init() {
+	outputs.Add("stackdriver", func() telegraf.Output {
+		return newStackdriver()
+	})
+}
--- a/plugins/outputs/stackdriver/stackdriver_test.go
+++ b/plugins/outputs/stackdriver/stackdriver_test.go