1
0
Fork 0

Adding upstream version 1.34.4.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-05-24 07:26:29 +02:00
parent e393c3af3f
commit 4978089aab
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
4963 changed files with 677545 additions and 0 deletions

View file

@ -0,0 +1,337 @@
//go:generate ../../../tools/readme_config_includer/generator
package parquet
import (
_ "embed"
"errors"
"fmt"
"os"
"strconv"
"time"
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/array"
"github.com/apache/arrow-go/v18/arrow/memory"
"github.com/apache/arrow-go/v18/parquet"
"github.com/apache/arrow-go/v18/parquet/pqarrow"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/config"
"github.com/influxdata/telegraf/plugins/outputs"
)
//go:embed sample.conf
var sampleConfig string
var defaultTimestampFieldName = "timestamp"
type metricGroup struct {
filename string
builder *array.RecordBuilder
schema *arrow.Schema
writer *pqarrow.FileWriter
}
type Parquet struct {
Directory string `toml:"directory"`
RotationInterval config.Duration `toml:"rotation_interval"`
TimestampFieldName string `toml:"timestamp_field_name"`
Log telegraf.Logger `toml:"-"`
metricGroups map[string]*metricGroup
}
func (*Parquet) SampleConfig() string {
return sampleConfig
}
func (p *Parquet) Init() error {
if p.Directory == "" {
p.Directory = "."
}
stat, err := os.Stat(p.Directory)
if os.IsNotExist(err) {
if err := os.MkdirAll(p.Directory, 0750); err != nil {
return fmt.Errorf("failed to create directory %q: %w", p.Directory, err)
}
} else if !stat.IsDir() {
return fmt.Errorf("provided directory %q is not a directory", p.Directory)
}
p.metricGroups = make(map[string]*metricGroup)
return nil
}
func (*Parquet) Connect() error {
return nil
}
func (p *Parquet) Close() error {
var errorOccurred bool
for _, metrics := range p.metricGroups {
if err := metrics.writer.Close(); err != nil {
p.Log.Errorf("failed to close file %q: %v", metrics.filename, err)
errorOccurred = true
}
}
if errorOccurred {
return errors.New("failed closing one or more parquet files")
}
return nil
}
func (p *Parquet) Write(metrics []telegraf.Metric) error {
groupedMetrics := make(map[string][]telegraf.Metric)
for _, metric := range metrics {
groupedMetrics[metric.Name()] = append(groupedMetrics[metric.Name()], metric)
}
now := time.Now()
for name, metrics := range groupedMetrics {
if _, ok := p.metricGroups[name]; !ok {
filename := fmt.Sprintf("%s/%s-%s-%s.parquet", p.Directory, name, now.Format("2006-01-02"), strconv.FormatInt(now.Unix(), 10))
schema, err := p.createSchema(metrics)
if err != nil {
return fmt.Errorf("failed to create schema for file %q: %w", name, err)
}
writer, err := p.createWriter(name, filename, schema)
if err != nil {
return fmt.Errorf("failed to create writer for file %q: %w", name, err)
}
p.metricGroups[name] = &metricGroup{
builder: array.NewRecordBuilder(memory.DefaultAllocator, schema),
filename: filename,
schema: schema,
writer: writer,
}
}
if p.RotationInterval != 0 {
if err := p.rotateIfNeeded(name); err != nil {
return fmt.Errorf("failed to rotate file %q: %w", p.metricGroups[name].filename, err)
}
}
record, err := p.createRecord(metrics, p.metricGroups[name].builder, p.metricGroups[name].schema)
if err != nil {
return fmt.Errorf("failed to create record for file %q: %w", p.metricGroups[name].filename, err)
}
if err = p.metricGroups[name].writer.WriteBuffered(record); err != nil {
return fmt.Errorf("failed to write to file %q: %w", p.metricGroups[name].filename, err)
}
record.Release()
}
return nil
}
func (p *Parquet) rotateIfNeeded(name string) error {
fileInfo, err := os.Stat(p.metricGroups[name].filename)
if err != nil {
return fmt.Errorf("failed to stat file %q: %w", p.metricGroups[name].filename, err)
}
expireTime := fileInfo.ModTime().Add(time.Duration(p.RotationInterval))
if time.Now().Before(expireTime) {
return nil
}
if err := p.metricGroups[name].writer.Close(); err != nil {
return fmt.Errorf("failed to close file for rotation %q: %w", p.metricGroups[name].filename, err)
}
writer, err := p.createWriter(name, p.metricGroups[name].filename, p.metricGroups[name].schema)
if err != nil {
return fmt.Errorf("failed to create new writer for file %q: %w", p.metricGroups[name].filename, err)
}
p.metricGroups[name].writer = writer
return nil
}
func (p *Parquet) createRecord(metrics []telegraf.Metric, builder *array.RecordBuilder, schema *arrow.Schema) (arrow.Record, error) {
for index, col := range schema.Fields() {
for _, m := range metrics {
if p.TimestampFieldName != "" && col.Name == p.TimestampFieldName {
builder.Field(index).(*array.Int64Builder).Append(m.Time().UnixNano())
continue
}
// Try to get the value from a field first, then from a tag.
var value any
var ok bool
value, ok = m.GetField(col.Name)
if !ok {
value, ok = m.GetTag(col.Name)
}
// if neither field nor tag exists, append a null value
if !ok {
switch col.Type {
case arrow.PrimitiveTypes.Int8:
builder.Field(index).(*array.Int8Builder).AppendNull()
case arrow.PrimitiveTypes.Int16:
builder.Field(index).(*array.Int16Builder).AppendNull()
case arrow.PrimitiveTypes.Int32:
builder.Field(index).(*array.Int32Builder).AppendNull()
case arrow.PrimitiveTypes.Int64:
builder.Field(index).(*array.Int64Builder).AppendNull()
case arrow.PrimitiveTypes.Uint8:
builder.Field(index).(*array.Uint8Builder).AppendNull()
case arrow.PrimitiveTypes.Uint16:
builder.Field(index).(*array.Uint16Builder).AppendNull()
case arrow.PrimitiveTypes.Uint32:
builder.Field(index).(*array.Uint32Builder).AppendNull()
case arrow.PrimitiveTypes.Uint64:
builder.Field(index).(*array.Uint64Builder).AppendNull()
case arrow.PrimitiveTypes.Float32:
builder.Field(index).(*array.Float32Builder).AppendNull()
case arrow.PrimitiveTypes.Float64:
builder.Field(index).(*array.Float64Builder).AppendNull()
case arrow.BinaryTypes.String:
builder.Field(index).(*array.StringBuilder).AppendNull()
case arrow.FixedWidthTypes.Boolean:
builder.Field(index).(*array.BooleanBuilder).AppendNull()
default:
return nil, fmt.Errorf("unsupported type: %T", value)
}
continue
}
switch col.Type {
case arrow.PrimitiveTypes.Int8:
builder.Field(index).(*array.Int8Builder).Append(value.(int8))
case arrow.PrimitiveTypes.Int16:
builder.Field(index).(*array.Int16Builder).Append(value.(int16))
case arrow.PrimitiveTypes.Int32:
builder.Field(index).(*array.Int32Builder).Append(value.(int32))
case arrow.PrimitiveTypes.Int64:
builder.Field(index).(*array.Int64Builder).Append(value.(int64))
case arrow.PrimitiveTypes.Uint8:
builder.Field(index).(*array.Uint8Builder).Append(value.(uint8))
case arrow.PrimitiveTypes.Uint16:
builder.Field(index).(*array.Uint16Builder).Append(value.(uint16))
case arrow.PrimitiveTypes.Uint32:
builder.Field(index).(*array.Uint32Builder).Append(value.(uint32))
case arrow.PrimitiveTypes.Uint64:
builder.Field(index).(*array.Uint64Builder).Append(value.(uint64))
case arrow.PrimitiveTypes.Float32:
builder.Field(index).(*array.Float32Builder).Append(value.(float32))
case arrow.PrimitiveTypes.Float64:
builder.Field(index).(*array.Float64Builder).Append(value.(float64))
case arrow.BinaryTypes.String:
builder.Field(index).(*array.StringBuilder).Append(value.(string))
case arrow.FixedWidthTypes.Boolean:
builder.Field(index).(*array.BooleanBuilder).Append(value.(bool))
default:
return nil, fmt.Errorf("unsupported type: %T", value)
}
}
}
record := builder.NewRecord()
return record, nil
}
func (p *Parquet) createSchema(metrics []telegraf.Metric) (*arrow.Schema, error) {
rawFields := make(map[string]arrow.DataType, 0)
for _, metric := range metrics {
for _, field := range metric.FieldList() {
if _, ok := rawFields[field.Key]; !ok {
arrowType, err := goToArrowType(field.Value)
if err != nil {
return nil, fmt.Errorf("error converting '%s=%s' field to arrow type: %w", field.Key, field.Value, err)
}
rawFields[field.Key] = arrowType
}
}
for _, tag := range metric.TagList() {
if _, ok := rawFields[tag.Key]; !ok {
rawFields[tag.Key] = arrow.BinaryTypes.String
}
}
}
fields := make([]arrow.Field, 0)
for key, value := range rawFields {
fields = append(fields, arrow.Field{
Name: key,
Type: value,
})
}
if p.TimestampFieldName != "" {
fields = append(fields, arrow.Field{
Name: p.TimestampFieldName,
Type: arrow.PrimitiveTypes.Int64,
})
}
return arrow.NewSchema(fields, nil), nil
}
func (p *Parquet) createWriter(name, filename string, schema *arrow.Schema) (*pqarrow.FileWriter, error) {
if _, err := os.Stat(filename); err == nil {
now := time.Now()
rotatedFilename := fmt.Sprintf("%s/%s-%s-%s.parquet", p.Directory, name, now.Format("2006-01-02"), strconv.FormatInt(now.Unix(), 10))
if err := os.Rename(filename, rotatedFilename); err != nil {
return nil, fmt.Errorf("failed to rename file %q: %w", filename, err)
}
}
file, err := os.Create(filename)
if err != nil {
return nil, fmt.Errorf("failed to create file %q: %w", filename, err)
}
writer, err := pqarrow.NewFileWriter(schema, file, parquet.NewWriterProperties(), pqarrow.DefaultWriterProps())
if err != nil {
return nil, fmt.Errorf("failed to create parquet writer for file %q: %w", filename, err)
}
return writer, nil
}
func goToArrowType(value interface{}) (arrow.DataType, error) {
switch value.(type) {
case int8:
return arrow.PrimitiveTypes.Int8, nil
case int16:
return arrow.PrimitiveTypes.Int16, nil
case int32:
return arrow.PrimitiveTypes.Int32, nil
case int64, int:
return arrow.PrimitiveTypes.Int64, nil
case uint8:
return arrow.PrimitiveTypes.Uint8, nil
case uint16:
return arrow.PrimitiveTypes.Uint16, nil
case uint32:
return arrow.PrimitiveTypes.Uint32, nil
case uint64, uint:
return arrow.PrimitiveTypes.Uint64, nil
case float32:
return arrow.PrimitiveTypes.Float32, nil
case float64:
return arrow.PrimitiveTypes.Float64, nil
case string:
return arrow.BinaryTypes.String, nil
case bool:
return arrow.FixedWidthTypes.Boolean, nil
default:
return nil, fmt.Errorf("unsupported type: %T", value)
}
}
func init() {
outputs.Add("parquet", func() telegraf.Output {
return &Parquet{
TimestampFieldName: defaultTimestampFieldName,
}
})
}