Adding upstream version 1.34.4.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
e393c3af3f
commit
4978089aab
4963 changed files with 677545 additions and 0 deletions
337
plugins/outputs/parquet/parquet.go
Normal file
337
plugins/outputs/parquet/parquet.go
Normal file
|
@ -0,0 +1,337 @@
|
|||
//go:generate ../../../tools/readme_config_includer/generator
|
||||
package parquet
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/apache/arrow-go/v18/arrow"
|
||||
"github.com/apache/arrow-go/v18/arrow/array"
|
||||
"github.com/apache/arrow-go/v18/arrow/memory"
|
||||
"github.com/apache/arrow-go/v18/parquet"
|
||||
"github.com/apache/arrow-go/v18/parquet/pqarrow"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/config"
|
||||
"github.com/influxdata/telegraf/plugins/outputs"
|
||||
)
|
||||
|
||||
//go:embed sample.conf
|
||||
var sampleConfig string
|
||||
|
||||
var defaultTimestampFieldName = "timestamp"
|
||||
|
||||
type metricGroup struct {
|
||||
filename string
|
||||
builder *array.RecordBuilder
|
||||
schema *arrow.Schema
|
||||
writer *pqarrow.FileWriter
|
||||
}
|
||||
|
||||
type Parquet struct {
|
||||
Directory string `toml:"directory"`
|
||||
RotationInterval config.Duration `toml:"rotation_interval"`
|
||||
TimestampFieldName string `toml:"timestamp_field_name"`
|
||||
Log telegraf.Logger `toml:"-"`
|
||||
|
||||
metricGroups map[string]*metricGroup
|
||||
}
|
||||
|
||||
func (*Parquet) SampleConfig() string {
|
||||
return sampleConfig
|
||||
}
|
||||
|
||||
func (p *Parquet) Init() error {
|
||||
if p.Directory == "" {
|
||||
p.Directory = "."
|
||||
}
|
||||
|
||||
stat, err := os.Stat(p.Directory)
|
||||
if os.IsNotExist(err) {
|
||||
if err := os.MkdirAll(p.Directory, 0750); err != nil {
|
||||
return fmt.Errorf("failed to create directory %q: %w", p.Directory, err)
|
||||
}
|
||||
} else if !stat.IsDir() {
|
||||
return fmt.Errorf("provided directory %q is not a directory", p.Directory)
|
||||
}
|
||||
|
||||
p.metricGroups = make(map[string]*metricGroup)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (*Parquet) Connect() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Parquet) Close() error {
|
||||
var errorOccurred bool
|
||||
|
||||
for _, metrics := range p.metricGroups {
|
||||
if err := metrics.writer.Close(); err != nil {
|
||||
p.Log.Errorf("failed to close file %q: %v", metrics.filename, err)
|
||||
errorOccurred = true
|
||||
}
|
||||
}
|
||||
|
||||
if errorOccurred {
|
||||
return errors.New("failed closing one or more parquet files")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Parquet) Write(metrics []telegraf.Metric) error {
|
||||
groupedMetrics := make(map[string][]telegraf.Metric)
|
||||
for _, metric := range metrics {
|
||||
groupedMetrics[metric.Name()] = append(groupedMetrics[metric.Name()], metric)
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
for name, metrics := range groupedMetrics {
|
||||
if _, ok := p.metricGroups[name]; !ok {
|
||||
filename := fmt.Sprintf("%s/%s-%s-%s.parquet", p.Directory, name, now.Format("2006-01-02"), strconv.FormatInt(now.Unix(), 10))
|
||||
schema, err := p.createSchema(metrics)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create schema for file %q: %w", name, err)
|
||||
}
|
||||
writer, err := p.createWriter(name, filename, schema)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create writer for file %q: %w", name, err)
|
||||
}
|
||||
p.metricGroups[name] = &metricGroup{
|
||||
builder: array.NewRecordBuilder(memory.DefaultAllocator, schema),
|
||||
filename: filename,
|
||||
schema: schema,
|
||||
writer: writer,
|
||||
}
|
||||
}
|
||||
|
||||
if p.RotationInterval != 0 {
|
||||
if err := p.rotateIfNeeded(name); err != nil {
|
||||
return fmt.Errorf("failed to rotate file %q: %w", p.metricGroups[name].filename, err)
|
||||
}
|
||||
}
|
||||
|
||||
record, err := p.createRecord(metrics, p.metricGroups[name].builder, p.metricGroups[name].schema)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create record for file %q: %w", p.metricGroups[name].filename, err)
|
||||
}
|
||||
if err = p.metricGroups[name].writer.WriteBuffered(record); err != nil {
|
||||
return fmt.Errorf("failed to write to file %q: %w", p.metricGroups[name].filename, err)
|
||||
}
|
||||
record.Release()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Parquet) rotateIfNeeded(name string) error {
|
||||
fileInfo, err := os.Stat(p.metricGroups[name].filename)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to stat file %q: %w", p.metricGroups[name].filename, err)
|
||||
}
|
||||
|
||||
expireTime := fileInfo.ModTime().Add(time.Duration(p.RotationInterval))
|
||||
if time.Now().Before(expireTime) {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := p.metricGroups[name].writer.Close(); err != nil {
|
||||
return fmt.Errorf("failed to close file for rotation %q: %w", p.metricGroups[name].filename, err)
|
||||
}
|
||||
|
||||
writer, err := p.createWriter(name, p.metricGroups[name].filename, p.metricGroups[name].schema)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create new writer for file %q: %w", p.metricGroups[name].filename, err)
|
||||
}
|
||||
p.metricGroups[name].writer = writer
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Parquet) createRecord(metrics []telegraf.Metric, builder *array.RecordBuilder, schema *arrow.Schema) (arrow.Record, error) {
|
||||
for index, col := range schema.Fields() {
|
||||
for _, m := range metrics {
|
||||
if p.TimestampFieldName != "" && col.Name == p.TimestampFieldName {
|
||||
builder.Field(index).(*array.Int64Builder).Append(m.Time().UnixNano())
|
||||
continue
|
||||
}
|
||||
|
||||
// Try to get the value from a field first, then from a tag.
|
||||
var value any
|
||||
var ok bool
|
||||
value, ok = m.GetField(col.Name)
|
||||
if !ok {
|
||||
value, ok = m.GetTag(col.Name)
|
||||
}
|
||||
|
||||
// if neither field nor tag exists, append a null value
|
||||
if !ok {
|
||||
switch col.Type {
|
||||
case arrow.PrimitiveTypes.Int8:
|
||||
builder.Field(index).(*array.Int8Builder).AppendNull()
|
||||
case arrow.PrimitiveTypes.Int16:
|
||||
builder.Field(index).(*array.Int16Builder).AppendNull()
|
||||
case arrow.PrimitiveTypes.Int32:
|
||||
builder.Field(index).(*array.Int32Builder).AppendNull()
|
||||
case arrow.PrimitiveTypes.Int64:
|
||||
builder.Field(index).(*array.Int64Builder).AppendNull()
|
||||
case arrow.PrimitiveTypes.Uint8:
|
||||
builder.Field(index).(*array.Uint8Builder).AppendNull()
|
||||
case arrow.PrimitiveTypes.Uint16:
|
||||
builder.Field(index).(*array.Uint16Builder).AppendNull()
|
||||
case arrow.PrimitiveTypes.Uint32:
|
||||
builder.Field(index).(*array.Uint32Builder).AppendNull()
|
||||
case arrow.PrimitiveTypes.Uint64:
|
||||
builder.Field(index).(*array.Uint64Builder).AppendNull()
|
||||
case arrow.PrimitiveTypes.Float32:
|
||||
builder.Field(index).(*array.Float32Builder).AppendNull()
|
||||
case arrow.PrimitiveTypes.Float64:
|
||||
builder.Field(index).(*array.Float64Builder).AppendNull()
|
||||
case arrow.BinaryTypes.String:
|
||||
builder.Field(index).(*array.StringBuilder).AppendNull()
|
||||
case arrow.FixedWidthTypes.Boolean:
|
||||
builder.Field(index).(*array.BooleanBuilder).AppendNull()
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported type: %T", value)
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
switch col.Type {
|
||||
case arrow.PrimitiveTypes.Int8:
|
||||
builder.Field(index).(*array.Int8Builder).Append(value.(int8))
|
||||
case arrow.PrimitiveTypes.Int16:
|
||||
builder.Field(index).(*array.Int16Builder).Append(value.(int16))
|
||||
case arrow.PrimitiveTypes.Int32:
|
||||
builder.Field(index).(*array.Int32Builder).Append(value.(int32))
|
||||
case arrow.PrimitiveTypes.Int64:
|
||||
builder.Field(index).(*array.Int64Builder).Append(value.(int64))
|
||||
case arrow.PrimitiveTypes.Uint8:
|
||||
builder.Field(index).(*array.Uint8Builder).Append(value.(uint8))
|
||||
case arrow.PrimitiveTypes.Uint16:
|
||||
builder.Field(index).(*array.Uint16Builder).Append(value.(uint16))
|
||||
case arrow.PrimitiveTypes.Uint32:
|
||||
builder.Field(index).(*array.Uint32Builder).Append(value.(uint32))
|
||||
case arrow.PrimitiveTypes.Uint64:
|
||||
builder.Field(index).(*array.Uint64Builder).Append(value.(uint64))
|
||||
case arrow.PrimitiveTypes.Float32:
|
||||
builder.Field(index).(*array.Float32Builder).Append(value.(float32))
|
||||
case arrow.PrimitiveTypes.Float64:
|
||||
builder.Field(index).(*array.Float64Builder).Append(value.(float64))
|
||||
case arrow.BinaryTypes.String:
|
||||
builder.Field(index).(*array.StringBuilder).Append(value.(string))
|
||||
case arrow.FixedWidthTypes.Boolean:
|
||||
builder.Field(index).(*array.BooleanBuilder).Append(value.(bool))
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported type: %T", value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
record := builder.NewRecord()
|
||||
return record, nil
|
||||
}
|
||||
|
||||
func (p *Parquet) createSchema(metrics []telegraf.Metric) (*arrow.Schema, error) {
|
||||
rawFields := make(map[string]arrow.DataType, 0)
|
||||
for _, metric := range metrics {
|
||||
for _, field := range metric.FieldList() {
|
||||
if _, ok := rawFields[field.Key]; !ok {
|
||||
arrowType, err := goToArrowType(field.Value)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error converting '%s=%s' field to arrow type: %w", field.Key, field.Value, err)
|
||||
}
|
||||
rawFields[field.Key] = arrowType
|
||||
}
|
||||
}
|
||||
for _, tag := range metric.TagList() {
|
||||
if _, ok := rawFields[tag.Key]; !ok {
|
||||
rawFields[tag.Key] = arrow.BinaryTypes.String
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fields := make([]arrow.Field, 0)
|
||||
for key, value := range rawFields {
|
||||
fields = append(fields, arrow.Field{
|
||||
Name: key,
|
||||
Type: value,
|
||||
})
|
||||
}
|
||||
|
||||
if p.TimestampFieldName != "" {
|
||||
fields = append(fields, arrow.Field{
|
||||
Name: p.TimestampFieldName,
|
||||
Type: arrow.PrimitiveTypes.Int64,
|
||||
})
|
||||
}
|
||||
|
||||
return arrow.NewSchema(fields, nil), nil
|
||||
}
|
||||
|
||||
func (p *Parquet) createWriter(name, filename string, schema *arrow.Schema) (*pqarrow.FileWriter, error) {
|
||||
if _, err := os.Stat(filename); err == nil {
|
||||
now := time.Now()
|
||||
rotatedFilename := fmt.Sprintf("%s/%s-%s-%s.parquet", p.Directory, name, now.Format("2006-01-02"), strconv.FormatInt(now.Unix(), 10))
|
||||
if err := os.Rename(filename, rotatedFilename); err != nil {
|
||||
return nil, fmt.Errorf("failed to rename file %q: %w", filename, err)
|
||||
}
|
||||
}
|
||||
file, err := os.Create(filename)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create file %q: %w", filename, err)
|
||||
}
|
||||
|
||||
writer, err := pqarrow.NewFileWriter(schema, file, parquet.NewWriterProperties(), pqarrow.DefaultWriterProps())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create parquet writer for file %q: %w", filename, err)
|
||||
}
|
||||
|
||||
return writer, nil
|
||||
}
|
||||
|
||||
func goToArrowType(value interface{}) (arrow.DataType, error) {
|
||||
switch value.(type) {
|
||||
case int8:
|
||||
return arrow.PrimitiveTypes.Int8, nil
|
||||
case int16:
|
||||
return arrow.PrimitiveTypes.Int16, nil
|
||||
case int32:
|
||||
return arrow.PrimitiveTypes.Int32, nil
|
||||
case int64, int:
|
||||
return arrow.PrimitiveTypes.Int64, nil
|
||||
case uint8:
|
||||
return arrow.PrimitiveTypes.Uint8, nil
|
||||
case uint16:
|
||||
return arrow.PrimitiveTypes.Uint16, nil
|
||||
case uint32:
|
||||
return arrow.PrimitiveTypes.Uint32, nil
|
||||
case uint64, uint:
|
||||
return arrow.PrimitiveTypes.Uint64, nil
|
||||
case float32:
|
||||
return arrow.PrimitiveTypes.Float32, nil
|
||||
case float64:
|
||||
return arrow.PrimitiveTypes.Float64, nil
|
||||
case string:
|
||||
return arrow.BinaryTypes.String, nil
|
||||
case bool:
|
||||
return arrow.FixedWidthTypes.Boolean, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported type: %T", value)
|
||||
}
|
||||
}
|
||||
|
||||
func init() {
|
||||
outputs.Add("parquet", func() telegraf.Output {
|
||||
return &Parquet{
|
||||
TimestampFieldName: defaultTimestampFieldName,
|
||||
}
|
||||
})
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue