Adding upstream version 1.34.4.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
e393c3af3f
commit
4978089aab
4963 changed files with 677545 additions and 0 deletions
401
models/running_output.go
Normal file
401
models/running_output.go
Normal file
|
@ -0,0 +1,401 @@
|
|||
package models
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/internal"
|
||||
logging "github.com/influxdata/telegraf/logger"
|
||||
"github.com/influxdata/telegraf/selfstat"
|
||||
)
|
||||
|
||||
const (
|
||||
// Default size of metrics batch size.
|
||||
DefaultMetricBatchSize = 1000
|
||||
|
||||
// Default number of metrics kept. It should be a multiple of batch size.
|
||||
DefaultMetricBufferLimit = 10000
|
||||
)
|
||||
|
||||
// OutputConfig containing name and filter
|
||||
type OutputConfig struct {
|
||||
Name string
|
||||
Source string
|
||||
Alias string
|
||||
ID string
|
||||
StartupErrorBehavior string
|
||||
Filter Filter
|
||||
|
||||
FlushInterval time.Duration
|
||||
FlushJitter time.Duration
|
||||
MetricBufferLimit int
|
||||
MetricBatchSize int
|
||||
|
||||
NameOverride string
|
||||
NamePrefix string
|
||||
NameSuffix string
|
||||
|
||||
BufferStrategy string
|
||||
BufferDirectory string
|
||||
|
||||
LogLevel string
|
||||
}
|
||||
|
||||
// RunningOutput contains the output configuration
|
||||
type RunningOutput struct {
|
||||
// Must be 64-bit aligned
|
||||
newMetricsCount int64
|
||||
droppedMetrics int64
|
||||
|
||||
Output telegraf.Output
|
||||
Config *OutputConfig
|
||||
MetricBufferLimit int
|
||||
MetricBatchSize int
|
||||
|
||||
MetricsFiltered selfstat.Stat
|
||||
WriteTime selfstat.Stat
|
||||
StartupErrors selfstat.Stat
|
||||
|
||||
BatchReady chan time.Time
|
||||
|
||||
buffer Buffer
|
||||
log telegraf.Logger
|
||||
|
||||
started bool
|
||||
retries uint64
|
||||
|
||||
aggMutex sync.Mutex
|
||||
}
|
||||
|
||||
func NewRunningOutput(output telegraf.Output, config *OutputConfig, batchSize, bufferLimit int) *RunningOutput {
|
||||
tags := map[string]string{"output": config.Name}
|
||||
if config.Alias != "" {
|
||||
tags["alias"] = config.Alias
|
||||
}
|
||||
|
||||
writeErrorsRegister := selfstat.Register("write", "errors", tags)
|
||||
logger := logging.New("outputs", config.Name, config.Alias)
|
||||
logger.RegisterErrorCallback(func() {
|
||||
writeErrorsRegister.Incr(1)
|
||||
})
|
||||
if err := logger.SetLogLevel(config.LogLevel); err != nil {
|
||||
logger.Error(err)
|
||||
}
|
||||
SetLoggerOnPlugin(output, logger)
|
||||
|
||||
if config.MetricBufferLimit > 0 {
|
||||
bufferLimit = config.MetricBufferLimit
|
||||
}
|
||||
if bufferLimit == 0 {
|
||||
bufferLimit = DefaultMetricBufferLimit
|
||||
}
|
||||
if config.MetricBatchSize > 0 {
|
||||
batchSize = config.MetricBatchSize
|
||||
}
|
||||
if batchSize == 0 {
|
||||
batchSize = DefaultMetricBatchSize
|
||||
}
|
||||
|
||||
b, err := NewBuffer(config.Name, config.ID, config.Alias, bufferLimit, config.BufferStrategy, config.BufferDirectory)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
ro := &RunningOutput{
|
||||
buffer: b,
|
||||
BatchReady: make(chan time.Time, 1),
|
||||
Output: output,
|
||||
Config: config,
|
||||
MetricBufferLimit: bufferLimit,
|
||||
MetricBatchSize: batchSize,
|
||||
MetricsFiltered: selfstat.Register(
|
||||
"write",
|
||||
"metrics_filtered",
|
||||
tags,
|
||||
),
|
||||
WriteTime: selfstat.RegisterTiming(
|
||||
"write",
|
||||
"write_time_ns",
|
||||
tags,
|
||||
),
|
||||
StartupErrors: selfstat.Register(
|
||||
"write",
|
||||
"startup_errors",
|
||||
tags,
|
||||
),
|
||||
log: logger,
|
||||
}
|
||||
|
||||
return ro
|
||||
}
|
||||
|
||||
func (r *RunningOutput) LogName() string {
|
||||
return logName("outputs", r.Config.Name, r.Config.Alias)
|
||||
}
|
||||
|
||||
func (r *RunningOutput) metricFiltered(metric telegraf.Metric) {
|
||||
r.MetricsFiltered.Incr(1)
|
||||
metric.Drop()
|
||||
}
|
||||
|
||||
func (r *RunningOutput) ID() string {
|
||||
if p, ok := r.Output.(telegraf.PluginWithID); ok {
|
||||
return p.ID()
|
||||
}
|
||||
return r.Config.ID
|
||||
}
|
||||
|
||||
func (r *RunningOutput) Init() error {
|
||||
switch r.Config.StartupErrorBehavior {
|
||||
case "", "error", "retry", "ignore":
|
||||
default:
|
||||
return fmt.Errorf("invalid 'startup_error_behavior' setting %q", r.Config.StartupErrorBehavior)
|
||||
}
|
||||
|
||||
if p, ok := r.Output.(telegraf.Initializer); ok {
|
||||
err := p.Init()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *RunningOutput) Connect() error {
|
||||
// Try to connect and exit early on success
|
||||
err := r.Output.Connect()
|
||||
if err == nil {
|
||||
r.started = true
|
||||
return nil
|
||||
}
|
||||
r.StartupErrors.Incr(1)
|
||||
|
||||
// Check if the plugin reports a retry-able error, otherwise we exit.
|
||||
var serr *internal.StartupError
|
||||
if !errors.As(err, &serr) || !serr.Retry {
|
||||
return err
|
||||
}
|
||||
|
||||
// Handle the retry-able error depending on the configured behavior
|
||||
switch r.Config.StartupErrorBehavior {
|
||||
case "", "error": // fall-trough to return the actual error
|
||||
case "retry":
|
||||
r.log.Infof("Connect failed: %v; retrying...", err)
|
||||
return nil
|
||||
case "ignore":
|
||||
return &internal.FatalError{Err: serr}
|
||||
default:
|
||||
r.log.Errorf("Invalid 'startup_error_behavior' setting %q", r.Config.StartupErrorBehavior)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// Close closes the output
|
||||
func (r *RunningOutput) Close() {
|
||||
if err := r.Output.Close(); err != nil {
|
||||
r.log.Errorf("Error closing output: %v", err)
|
||||
}
|
||||
|
||||
if err := r.buffer.Close(); err != nil {
|
||||
r.log.Errorf("Error closing output buffer: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// AddMetric adds a metric to the output.
|
||||
// The given metric will be copied if the output selects the metric.
|
||||
func (r *RunningOutput) AddMetric(metric telegraf.Metric) {
|
||||
ok, err := r.Config.Filter.Select(metric)
|
||||
if err != nil {
|
||||
r.log.Errorf("filtering failed: %v", err)
|
||||
} else if !ok {
|
||||
r.MetricsFiltered.Incr(1)
|
||||
return
|
||||
}
|
||||
|
||||
r.add(metric.Copy())
|
||||
}
|
||||
|
||||
// AddMetricNoCopy adds a metric to the output.
|
||||
// Takes ownership of metric regardless of whether the output selects it for outputting.
|
||||
func (r *RunningOutput) AddMetricNoCopy(metric telegraf.Metric) {
|
||||
ok, err := r.Config.Filter.Select(metric)
|
||||
if err != nil {
|
||||
r.log.Errorf("filtering failed: %v", err)
|
||||
} else if !ok {
|
||||
r.metricFiltered(metric)
|
||||
return
|
||||
}
|
||||
|
||||
r.add(metric)
|
||||
}
|
||||
|
||||
func (r *RunningOutput) add(metric telegraf.Metric) {
|
||||
r.Config.Filter.Modify(metric)
|
||||
if len(metric.FieldList()) == 0 {
|
||||
r.metricFiltered(metric)
|
||||
return
|
||||
}
|
||||
|
||||
if output, ok := r.Output.(telegraf.AggregatingOutput); ok {
|
||||
r.aggMutex.Lock()
|
||||
output.Add(metric)
|
||||
r.aggMutex.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
if len(r.Config.NameOverride) > 0 {
|
||||
metric.SetName(r.Config.NameOverride)
|
||||
}
|
||||
|
||||
if len(r.Config.NamePrefix) > 0 {
|
||||
metric.AddPrefix(r.Config.NamePrefix)
|
||||
}
|
||||
|
||||
if len(r.Config.NameSuffix) > 0 {
|
||||
metric.AddSuffix(r.Config.NameSuffix)
|
||||
}
|
||||
|
||||
dropped := r.buffer.Add(metric)
|
||||
atomic.AddInt64(&r.droppedMetrics, int64(dropped))
|
||||
|
||||
count := atomic.AddInt64(&r.newMetricsCount, 1)
|
||||
if count == int64(r.MetricBatchSize) {
|
||||
atomic.StoreInt64(&r.newMetricsCount, 0)
|
||||
select {
|
||||
case r.BatchReady <- time.Now():
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write writes all metrics to the output, stopping when all have been sent on
|
||||
// or error.
|
||||
func (r *RunningOutput) Write() error {
|
||||
// Try to connect if we are not yet started up
|
||||
if !r.started {
|
||||
r.retries++
|
||||
if err := r.Output.Connect(); err != nil {
|
||||
var serr *internal.StartupError
|
||||
if !errors.As(err, &serr) || !serr.Retry || !serr.Partial {
|
||||
r.StartupErrors.Incr(1)
|
||||
return internal.ErrNotConnected
|
||||
}
|
||||
r.log.Debugf("Partially connected after %d attempts", r.retries)
|
||||
} else {
|
||||
r.started = true
|
||||
r.log.Debugf("Successfully connected after %d attempts", r.retries)
|
||||
}
|
||||
}
|
||||
|
||||
if output, ok := r.Output.(telegraf.AggregatingOutput); ok {
|
||||
r.aggMutex.Lock()
|
||||
metrics := output.Push()
|
||||
r.buffer.Add(metrics...)
|
||||
output.Reset()
|
||||
r.aggMutex.Unlock()
|
||||
}
|
||||
|
||||
atomic.StoreInt64(&r.newMetricsCount, 0)
|
||||
|
||||
// Only process the metrics in the buffer now. Metrics added while we are
|
||||
// writing will be sent on the next call.
|
||||
nBuffer := r.buffer.Len()
|
||||
nBatches := nBuffer/r.MetricBatchSize + 1
|
||||
for i := 0; i < nBatches; i++ {
|
||||
tx := r.buffer.BeginTransaction(r.MetricBatchSize)
|
||||
if len(tx.Batch) == 0 {
|
||||
return nil
|
||||
}
|
||||
err := r.writeMetrics(tx.Batch)
|
||||
r.updateTransaction(tx, err)
|
||||
r.buffer.EndTransaction(tx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// WriteBatch writes a single batch of metrics to the output.
|
||||
func (r *RunningOutput) WriteBatch() error {
|
||||
// Try to connect if we are not yet started up
|
||||
if !r.started {
|
||||
r.retries++
|
||||
if err := r.Output.Connect(); err != nil {
|
||||
r.StartupErrors.Incr(1)
|
||||
return internal.ErrNotConnected
|
||||
}
|
||||
r.started = true
|
||||
r.log.Debugf("Successfully connected after %d attempts", r.retries)
|
||||
}
|
||||
|
||||
tx := r.buffer.BeginTransaction(r.MetricBatchSize)
|
||||
if len(tx.Batch) == 0 {
|
||||
return nil
|
||||
}
|
||||
err := r.writeMetrics(tx.Batch)
|
||||
r.updateTransaction(tx, err)
|
||||
r.buffer.EndTransaction(tx)
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (r *RunningOutput) writeMetrics(metrics []telegraf.Metric) error {
|
||||
dropped := atomic.LoadInt64(&r.droppedMetrics)
|
||||
if dropped > 0 {
|
||||
r.log.Warnf("Metric buffer overflow; %d metrics have been dropped", dropped)
|
||||
atomic.StoreInt64(&r.droppedMetrics, 0)
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
err := r.Output.Write(metrics)
|
||||
elapsed := time.Since(start)
|
||||
r.WriteTime.Incr(elapsed.Nanoseconds())
|
||||
|
||||
if err == nil {
|
||||
r.log.Debugf("Wrote batch of %d metrics in %s", len(metrics), elapsed)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (*RunningOutput) updateTransaction(tx *Transaction, err error) {
|
||||
// No error indicates all metrics were written successfully
|
||||
if err == nil {
|
||||
tx.AcceptAll()
|
||||
return
|
||||
}
|
||||
|
||||
// A non-partial-write-error indicated none of the metrics were written
|
||||
// successfully and we should keep them for the next write cycle
|
||||
var writeErr *internal.PartialWriteError
|
||||
if !errors.As(err, &writeErr) {
|
||||
tx.KeepAll()
|
||||
return
|
||||
}
|
||||
|
||||
// Transfer the accepted and rejected indices based on the write error values
|
||||
tx.Accept = writeErr.MetricsAccept
|
||||
tx.Reject = writeErr.MetricsReject
|
||||
}
|
||||
|
||||
func (r *RunningOutput) LogBufferStatus() {
|
||||
nBuffer := r.buffer.Len()
|
||||
if r.Config.BufferStrategy == "disk" {
|
||||
r.log.Debugf("Buffer fullness: %d metrics", nBuffer)
|
||||
} else {
|
||||
r.log.Debugf("Buffer fullness: %d / %d metrics", nBuffer, r.MetricBufferLimit)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *RunningOutput) Log() telegraf.Logger {
|
||||
return r.log
|
||||
}
|
||||
|
||||
func (r *RunningOutput) BufferLength() int {
|
||||
return r.buffer.Len()
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue