Adding upstream version 1.34.4.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
e393c3af3f
commit
4978089aab
4963 changed files with 677545 additions and 0 deletions
122
plugins/outputs/parquet/README.md
Normal file
122
plugins/outputs/parquet/README.md
Normal file
|
@ -0,0 +1,122 @@
|
|||
# Parquet Output Plugin
|
||||
|
||||
This plugin writes metrics to [parquet][parquet] files. By default, metrics are
|
||||
grouped by metric name and written all to the same file.
|
||||
|
||||
> [!IMPORTANT]
|
||||
> If a metric schema does not match the schema in the file it will be dropped.
|
||||
|
||||
To lean more about the parquet format, check out the [parquet docs][docs] as
|
||||
well as a blog post on [querying parquet][querying].
|
||||
|
||||
⭐ Telegraf v1.32.0
|
||||
🏷️ datastore
|
||||
💻 all
|
||||
|
||||
[parquet]: https://parquet.apache.org
|
||||
[docs]: https://parquet.apache.org/docs/
|
||||
[querying]: https://www.influxdata.com/blog/querying-parquet-millisecond-latency/
|
||||
|
||||
## Global configuration options <!-- @/docs/includes/plugin_config.md -->
|
||||
|
||||
In addition to the plugin-specific configuration settings, plugins support
|
||||
additional global and plugin configuration settings. These settings are used to
|
||||
modify metrics, tags, and field or create aliases and configure ordering, etc.
|
||||
See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
||||
|
||||
[CONFIGURATION.md]: ../../../docs/CONFIGURATION.md#plugins
|
||||
|
||||
## Configuration
|
||||
|
||||
```toml @sample.conf
|
||||
# A plugin that writes metrics to parquet files
|
||||
[[outputs.parquet]]
|
||||
## Directory to write parquet files in. If a file already exists the output
|
||||
## will attempt to continue using the existing file.
|
||||
# directory = "."
|
||||
|
||||
## Files are rotated after the time interval specified. When set to 0 no time
|
||||
## based rotation is performed.
|
||||
# rotation_interval = "0h"
|
||||
|
||||
## Timestamp field name
|
||||
## Field name to use to store the timestamp. If set to an empty string, then
|
||||
## the timestamp is omitted.
|
||||
# timestamp_field_name = "timestamp"
|
||||
```
|
||||
|
||||
## Building Parquet Files
|
||||
|
||||
### Schema
|
||||
|
||||
Parquet files require a schema when writing files. To generate a schema,
|
||||
Telegraf will go through all grouped metrics and generate an Apache Arrow schema
|
||||
based on the union of all fields and tags. If a field and tag have the same name
|
||||
then the field takes precedence.
|
||||
|
||||
The consequence of schema generation is that the very first flush sequence a
|
||||
metric is seen takes much longer due to the additional looping through the
|
||||
metrics to generate the schema. Subsequent flush intervals are significantly
|
||||
faster.
|
||||
|
||||
When writing to a file, the schema is used to look for each value and if it is
|
||||
not present a null value is added. The result is that if additional fields are
|
||||
present after the first metric flush those fields are omitted.
|
||||
|
||||
### Write
|
||||
|
||||
The plugin makes use of the buffered writer. This may buffer some metrics into
|
||||
memory before writing it to disk. This method is used as it can more compactly
|
||||
write multiple flushes of metrics into a single Parquet row group.
|
||||
|
||||
Additionally, the Parquet format requires a proper footer, so close must be
|
||||
called on the file to ensure it is properly formatted.
|
||||
|
||||
### Close
|
||||
|
||||
Parquet files must close properly or the file will not be readable. The parquet
|
||||
format requires a footer at the end of the file and if that footer is not
|
||||
present then the file cannot be read correctly.
|
||||
|
||||
If Telegraf were to crash while writing parquet files there is the possibility
|
||||
of this occurring.
|
||||
|
||||
## File Rotation
|
||||
|
||||
If a file with the same target name exists at start, the existing file is
|
||||
rotated to avoid over-writing it or conflicting schema.
|
||||
|
||||
File rotation is available via a time based interval that a user can optionally
|
||||
set. Due to the usage of a buffered writer, a size based rotation is not
|
||||
possible as the file may not actually get data at each interval.
|
||||
|
||||
## Explore Parquet Files
|
||||
|
||||
If a user wishes to explore a schema or data in a Parquet file quickly, then
|
||||
consider the options below:
|
||||
|
||||
### CLI
|
||||
|
||||
The Arrow repo contains a Go CLI tool to read and parse Parquet files:
|
||||
|
||||
```s
|
||||
go install github.com/apache/arrow-go/v18/parquet/cmd/parquet_reader@latest
|
||||
parquet_reader <file>
|
||||
```
|
||||
|
||||
### Python
|
||||
|
||||
Users can also use the [pyarrow][] library to quick open and explore Parquet
|
||||
files:
|
||||
|
||||
```python
|
||||
import pyarrow.parquet as pq
|
||||
|
||||
table = pq.read_table('example.parquet')
|
||||
```
|
||||
|
||||
Once created, a user can look the various [pyarrow.Table][] functions to further
|
||||
explore the data.
|
||||
|
||||
[pyarrow]: https://arrow.apache.org/docs/python/generated/pyarrow.parquet.read_table.html
|
||||
[pyarrow.Table]: https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table
|
337
plugins/outputs/parquet/parquet.go
Normal file
337
plugins/outputs/parquet/parquet.go
Normal file
|
@ -0,0 +1,337 @@
|
|||
//go:generate ../../../tools/readme_config_includer/generator
|
||||
package parquet
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/apache/arrow-go/v18/arrow"
|
||||
"github.com/apache/arrow-go/v18/arrow/array"
|
||||
"github.com/apache/arrow-go/v18/arrow/memory"
|
||||
"github.com/apache/arrow-go/v18/parquet"
|
||||
"github.com/apache/arrow-go/v18/parquet/pqarrow"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/config"
|
||||
"github.com/influxdata/telegraf/plugins/outputs"
|
||||
)
|
||||
|
||||
//go:embed sample.conf
|
||||
var sampleConfig string
|
||||
|
||||
var defaultTimestampFieldName = "timestamp"
|
||||
|
||||
type metricGroup struct {
|
||||
filename string
|
||||
builder *array.RecordBuilder
|
||||
schema *arrow.Schema
|
||||
writer *pqarrow.FileWriter
|
||||
}
|
||||
|
||||
type Parquet struct {
|
||||
Directory string `toml:"directory"`
|
||||
RotationInterval config.Duration `toml:"rotation_interval"`
|
||||
TimestampFieldName string `toml:"timestamp_field_name"`
|
||||
Log telegraf.Logger `toml:"-"`
|
||||
|
||||
metricGroups map[string]*metricGroup
|
||||
}
|
||||
|
||||
func (*Parquet) SampleConfig() string {
|
||||
return sampleConfig
|
||||
}
|
||||
|
||||
func (p *Parquet) Init() error {
|
||||
if p.Directory == "" {
|
||||
p.Directory = "."
|
||||
}
|
||||
|
||||
stat, err := os.Stat(p.Directory)
|
||||
if os.IsNotExist(err) {
|
||||
if err := os.MkdirAll(p.Directory, 0750); err != nil {
|
||||
return fmt.Errorf("failed to create directory %q: %w", p.Directory, err)
|
||||
}
|
||||
} else if !stat.IsDir() {
|
||||
return fmt.Errorf("provided directory %q is not a directory", p.Directory)
|
||||
}
|
||||
|
||||
p.metricGroups = make(map[string]*metricGroup)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (*Parquet) Connect() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Parquet) Close() error {
|
||||
var errorOccurred bool
|
||||
|
||||
for _, metrics := range p.metricGroups {
|
||||
if err := metrics.writer.Close(); err != nil {
|
||||
p.Log.Errorf("failed to close file %q: %v", metrics.filename, err)
|
||||
errorOccurred = true
|
||||
}
|
||||
}
|
||||
|
||||
if errorOccurred {
|
||||
return errors.New("failed closing one or more parquet files")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Parquet) Write(metrics []telegraf.Metric) error {
|
||||
groupedMetrics := make(map[string][]telegraf.Metric)
|
||||
for _, metric := range metrics {
|
||||
groupedMetrics[metric.Name()] = append(groupedMetrics[metric.Name()], metric)
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
for name, metrics := range groupedMetrics {
|
||||
if _, ok := p.metricGroups[name]; !ok {
|
||||
filename := fmt.Sprintf("%s/%s-%s-%s.parquet", p.Directory, name, now.Format("2006-01-02"), strconv.FormatInt(now.Unix(), 10))
|
||||
schema, err := p.createSchema(metrics)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create schema for file %q: %w", name, err)
|
||||
}
|
||||
writer, err := p.createWriter(name, filename, schema)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create writer for file %q: %w", name, err)
|
||||
}
|
||||
p.metricGroups[name] = &metricGroup{
|
||||
builder: array.NewRecordBuilder(memory.DefaultAllocator, schema),
|
||||
filename: filename,
|
||||
schema: schema,
|
||||
writer: writer,
|
||||
}
|
||||
}
|
||||
|
||||
if p.RotationInterval != 0 {
|
||||
if err := p.rotateIfNeeded(name); err != nil {
|
||||
return fmt.Errorf("failed to rotate file %q: %w", p.metricGroups[name].filename, err)
|
||||
}
|
||||
}
|
||||
|
||||
record, err := p.createRecord(metrics, p.metricGroups[name].builder, p.metricGroups[name].schema)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create record for file %q: %w", p.metricGroups[name].filename, err)
|
||||
}
|
||||
if err = p.metricGroups[name].writer.WriteBuffered(record); err != nil {
|
||||
return fmt.Errorf("failed to write to file %q: %w", p.metricGroups[name].filename, err)
|
||||
}
|
||||
record.Release()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Parquet) rotateIfNeeded(name string) error {
|
||||
fileInfo, err := os.Stat(p.metricGroups[name].filename)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to stat file %q: %w", p.metricGroups[name].filename, err)
|
||||
}
|
||||
|
||||
expireTime := fileInfo.ModTime().Add(time.Duration(p.RotationInterval))
|
||||
if time.Now().Before(expireTime) {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := p.metricGroups[name].writer.Close(); err != nil {
|
||||
return fmt.Errorf("failed to close file for rotation %q: %w", p.metricGroups[name].filename, err)
|
||||
}
|
||||
|
||||
writer, err := p.createWriter(name, p.metricGroups[name].filename, p.metricGroups[name].schema)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create new writer for file %q: %w", p.metricGroups[name].filename, err)
|
||||
}
|
||||
p.metricGroups[name].writer = writer
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Parquet) createRecord(metrics []telegraf.Metric, builder *array.RecordBuilder, schema *arrow.Schema) (arrow.Record, error) {
|
||||
for index, col := range schema.Fields() {
|
||||
for _, m := range metrics {
|
||||
if p.TimestampFieldName != "" && col.Name == p.TimestampFieldName {
|
||||
builder.Field(index).(*array.Int64Builder).Append(m.Time().UnixNano())
|
||||
continue
|
||||
}
|
||||
|
||||
// Try to get the value from a field first, then from a tag.
|
||||
var value any
|
||||
var ok bool
|
||||
value, ok = m.GetField(col.Name)
|
||||
if !ok {
|
||||
value, ok = m.GetTag(col.Name)
|
||||
}
|
||||
|
||||
// if neither field nor tag exists, append a null value
|
||||
if !ok {
|
||||
switch col.Type {
|
||||
case arrow.PrimitiveTypes.Int8:
|
||||
builder.Field(index).(*array.Int8Builder).AppendNull()
|
||||
case arrow.PrimitiveTypes.Int16:
|
||||
builder.Field(index).(*array.Int16Builder).AppendNull()
|
||||
case arrow.PrimitiveTypes.Int32:
|
||||
builder.Field(index).(*array.Int32Builder).AppendNull()
|
||||
case arrow.PrimitiveTypes.Int64:
|
||||
builder.Field(index).(*array.Int64Builder).AppendNull()
|
||||
case arrow.PrimitiveTypes.Uint8:
|
||||
builder.Field(index).(*array.Uint8Builder).AppendNull()
|
||||
case arrow.PrimitiveTypes.Uint16:
|
||||
builder.Field(index).(*array.Uint16Builder).AppendNull()
|
||||
case arrow.PrimitiveTypes.Uint32:
|
||||
builder.Field(index).(*array.Uint32Builder).AppendNull()
|
||||
case arrow.PrimitiveTypes.Uint64:
|
||||
builder.Field(index).(*array.Uint64Builder).AppendNull()
|
||||
case arrow.PrimitiveTypes.Float32:
|
||||
builder.Field(index).(*array.Float32Builder).AppendNull()
|
||||
case arrow.PrimitiveTypes.Float64:
|
||||
builder.Field(index).(*array.Float64Builder).AppendNull()
|
||||
case arrow.BinaryTypes.String:
|
||||
builder.Field(index).(*array.StringBuilder).AppendNull()
|
||||
case arrow.FixedWidthTypes.Boolean:
|
||||
builder.Field(index).(*array.BooleanBuilder).AppendNull()
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported type: %T", value)
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
switch col.Type {
|
||||
case arrow.PrimitiveTypes.Int8:
|
||||
builder.Field(index).(*array.Int8Builder).Append(value.(int8))
|
||||
case arrow.PrimitiveTypes.Int16:
|
||||
builder.Field(index).(*array.Int16Builder).Append(value.(int16))
|
||||
case arrow.PrimitiveTypes.Int32:
|
||||
builder.Field(index).(*array.Int32Builder).Append(value.(int32))
|
||||
case arrow.PrimitiveTypes.Int64:
|
||||
builder.Field(index).(*array.Int64Builder).Append(value.(int64))
|
||||
case arrow.PrimitiveTypes.Uint8:
|
||||
builder.Field(index).(*array.Uint8Builder).Append(value.(uint8))
|
||||
case arrow.PrimitiveTypes.Uint16:
|
||||
builder.Field(index).(*array.Uint16Builder).Append(value.(uint16))
|
||||
case arrow.PrimitiveTypes.Uint32:
|
||||
builder.Field(index).(*array.Uint32Builder).Append(value.(uint32))
|
||||
case arrow.PrimitiveTypes.Uint64:
|
||||
builder.Field(index).(*array.Uint64Builder).Append(value.(uint64))
|
||||
case arrow.PrimitiveTypes.Float32:
|
||||
builder.Field(index).(*array.Float32Builder).Append(value.(float32))
|
||||
case arrow.PrimitiveTypes.Float64:
|
||||
builder.Field(index).(*array.Float64Builder).Append(value.(float64))
|
||||
case arrow.BinaryTypes.String:
|
||||
builder.Field(index).(*array.StringBuilder).Append(value.(string))
|
||||
case arrow.FixedWidthTypes.Boolean:
|
||||
builder.Field(index).(*array.BooleanBuilder).Append(value.(bool))
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported type: %T", value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
record := builder.NewRecord()
|
||||
return record, nil
|
||||
}
|
||||
|
||||
func (p *Parquet) createSchema(metrics []telegraf.Metric) (*arrow.Schema, error) {
|
||||
rawFields := make(map[string]arrow.DataType, 0)
|
||||
for _, metric := range metrics {
|
||||
for _, field := range metric.FieldList() {
|
||||
if _, ok := rawFields[field.Key]; !ok {
|
||||
arrowType, err := goToArrowType(field.Value)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error converting '%s=%s' field to arrow type: %w", field.Key, field.Value, err)
|
||||
}
|
||||
rawFields[field.Key] = arrowType
|
||||
}
|
||||
}
|
||||
for _, tag := range metric.TagList() {
|
||||
if _, ok := rawFields[tag.Key]; !ok {
|
||||
rawFields[tag.Key] = arrow.BinaryTypes.String
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fields := make([]arrow.Field, 0)
|
||||
for key, value := range rawFields {
|
||||
fields = append(fields, arrow.Field{
|
||||
Name: key,
|
||||
Type: value,
|
||||
})
|
||||
}
|
||||
|
||||
if p.TimestampFieldName != "" {
|
||||
fields = append(fields, arrow.Field{
|
||||
Name: p.TimestampFieldName,
|
||||
Type: arrow.PrimitiveTypes.Int64,
|
||||
})
|
||||
}
|
||||
|
||||
return arrow.NewSchema(fields, nil), nil
|
||||
}
|
||||
|
||||
func (p *Parquet) createWriter(name, filename string, schema *arrow.Schema) (*pqarrow.FileWriter, error) {
|
||||
if _, err := os.Stat(filename); err == nil {
|
||||
now := time.Now()
|
||||
rotatedFilename := fmt.Sprintf("%s/%s-%s-%s.parquet", p.Directory, name, now.Format("2006-01-02"), strconv.FormatInt(now.Unix(), 10))
|
||||
if err := os.Rename(filename, rotatedFilename); err != nil {
|
||||
return nil, fmt.Errorf("failed to rename file %q: %w", filename, err)
|
||||
}
|
||||
}
|
||||
file, err := os.Create(filename)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create file %q: %w", filename, err)
|
||||
}
|
||||
|
||||
writer, err := pqarrow.NewFileWriter(schema, file, parquet.NewWriterProperties(), pqarrow.DefaultWriterProps())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create parquet writer for file %q: %w", filename, err)
|
||||
}
|
||||
|
||||
return writer, nil
|
||||
}
|
||||
|
||||
func goToArrowType(value interface{}) (arrow.DataType, error) {
|
||||
switch value.(type) {
|
||||
case int8:
|
||||
return arrow.PrimitiveTypes.Int8, nil
|
||||
case int16:
|
||||
return arrow.PrimitiveTypes.Int16, nil
|
||||
case int32:
|
||||
return arrow.PrimitiveTypes.Int32, nil
|
||||
case int64, int:
|
||||
return arrow.PrimitiveTypes.Int64, nil
|
||||
case uint8:
|
||||
return arrow.PrimitiveTypes.Uint8, nil
|
||||
case uint16:
|
||||
return arrow.PrimitiveTypes.Uint16, nil
|
||||
case uint32:
|
||||
return arrow.PrimitiveTypes.Uint32, nil
|
||||
case uint64, uint:
|
||||
return arrow.PrimitiveTypes.Uint64, nil
|
||||
case float32:
|
||||
return arrow.PrimitiveTypes.Float32, nil
|
||||
case float64:
|
||||
return arrow.PrimitiveTypes.Float64, nil
|
||||
case string:
|
||||
return arrow.BinaryTypes.String, nil
|
||||
case bool:
|
||||
return arrow.FixedWidthTypes.Boolean, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported type: %T", value)
|
||||
}
|
||||
}
|
||||
|
||||
func init() {
|
||||
outputs.Add("parquet", func() telegraf.Output {
|
||||
return &Parquet{
|
||||
TimestampFieldName: defaultTimestampFieldName,
|
||||
}
|
||||
})
|
||||
}
|
247
plugins/outputs/parquet/parquet_test.go
Normal file
247
plugins/outputs/parquet/parquet_test.go
Normal file
|
@ -0,0 +1,247 @@
|
|||
package parquet
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/apache/arrow-go/v18/parquet/file"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/config"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
)
|
||||
|
||||
func TestCases(t *testing.T) {
|
||||
type testcase struct {
|
||||
name string
|
||||
metrics []telegraf.Metric
|
||||
numRows int
|
||||
numColumns int
|
||||
}
|
||||
|
||||
var testcases = []testcase{
|
||||
{
|
||||
name: "basic single metric",
|
||||
metrics: []telegraf.Metric{
|
||||
testutil.MustMetric(
|
||||
"test",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": 1.0,
|
||||
},
|
||||
time.Now(),
|
||||
),
|
||||
},
|
||||
numRows: 1,
|
||||
numColumns: 2,
|
||||
},
|
||||
{
|
||||
name: "mix of tags and fields",
|
||||
metrics: []telegraf.Metric{
|
||||
testutil.MustMetric(
|
||||
"test",
|
||||
map[string]string{
|
||||
"tag": "tag",
|
||||
},
|
||||
map[string]interface{}{
|
||||
"value": 1.0,
|
||||
},
|
||||
time.Now(),
|
||||
),
|
||||
testutil.MustMetric(
|
||||
"test",
|
||||
map[string]string{
|
||||
"tag": "tag2",
|
||||
},
|
||||
map[string]interface{}{
|
||||
"value": 2.0,
|
||||
},
|
||||
time.Now(),
|
||||
),
|
||||
},
|
||||
numRows: 2,
|
||||
numColumns: 3,
|
||||
},
|
||||
{
|
||||
name: "null values",
|
||||
metrics: []telegraf.Metric{
|
||||
testutil.MustMetric(
|
||||
"test",
|
||||
map[string]string{
|
||||
"host": "tag",
|
||||
},
|
||||
map[string]interface{}{
|
||||
"value_old": 1.0,
|
||||
},
|
||||
time.Now(),
|
||||
),
|
||||
testutil.MustMetric(
|
||||
"test",
|
||||
map[string]string{
|
||||
"tag": "tag2",
|
||||
},
|
||||
map[string]interface{}{
|
||||
"value_new": 2.0,
|
||||
},
|
||||
time.Now(),
|
||||
),
|
||||
},
|
||||
numRows: 2,
|
||||
numColumns: 5,
|
||||
},
|
||||
{
|
||||
name: "data types",
|
||||
metrics: []telegraf.Metric{
|
||||
testutil.MustMetric(
|
||||
"test",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"int": int(0),
|
||||
"int8": int8(1),
|
||||
"int16": int16(2),
|
||||
"int32": int32(3),
|
||||
"int64": int64(4),
|
||||
"uint": uint(5),
|
||||
"uint8": uint8(6),
|
||||
"uint16": uint16(7),
|
||||
"uint32": uint32(8),
|
||||
"uint64": uint64(9),
|
||||
"float32": float32(10.0),
|
||||
"float64": float64(11.0),
|
||||
"string": "string",
|
||||
"bool": true,
|
||||
},
|
||||
time.Now(),
|
||||
),
|
||||
},
|
||||
numRows: 1,
|
||||
numColumns: 15,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testcases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
testDir := t.TempDir()
|
||||
plugin := &Parquet{
|
||||
Directory: testDir,
|
||||
TimestampFieldName: defaultTimestampFieldName,
|
||||
}
|
||||
require.NoError(t, plugin.Init())
|
||||
require.NoError(t, plugin.Connect())
|
||||
require.NoError(t, plugin.Write(tc.metrics))
|
||||
require.NoError(t, plugin.Close())
|
||||
|
||||
// Read metrics from parquet file
|
||||
files, err := os.ReadDir(testDir)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, files, 1)
|
||||
reader, err := file.OpenParquetFile(filepath.Join(testDir, files[0].Name()), false)
|
||||
require.NoError(t, err)
|
||||
defer reader.Close()
|
||||
|
||||
metadata := reader.MetaData()
|
||||
require.Equal(t, tc.numRows, int(metadata.NumRows))
|
||||
require.Equal(t, tc.numColumns, metadata.Schema.NumColumns())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRotation(t *testing.T) {
|
||||
metrics := []telegraf.Metric{
|
||||
testutil.MustMetric(
|
||||
"test",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": 1.0,
|
||||
},
|
||||
time.Now(),
|
||||
),
|
||||
}
|
||||
|
||||
testDir := t.TempDir()
|
||||
plugin := &Parquet{
|
||||
Directory: testDir,
|
||||
RotationInterval: config.Duration(1 * time.Second),
|
||||
TimestampFieldName: defaultTimestampFieldName,
|
||||
}
|
||||
|
||||
require.NoError(t, plugin.Init())
|
||||
require.NoError(t, plugin.Connect())
|
||||
require.Eventually(t, func() bool {
|
||||
require.NoError(t, plugin.Write(metrics))
|
||||
files, err := os.ReadDir(testDir)
|
||||
require.NoError(t, err)
|
||||
return len(files) == 2
|
||||
}, 5*time.Second, time.Second)
|
||||
require.NoError(t, plugin.Close())
|
||||
}
|
||||
|
||||
func TestOmitTimestamp(t *testing.T) {
|
||||
metrics := []telegraf.Metric{
|
||||
testutil.MustMetric(
|
||||
"test",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": 1.0,
|
||||
},
|
||||
time.Now(),
|
||||
),
|
||||
}
|
||||
|
||||
testDir := t.TempDir()
|
||||
plugin := &Parquet{
|
||||
Directory: testDir,
|
||||
}
|
||||
require.NoError(t, plugin.Init())
|
||||
require.NoError(t, plugin.Connect())
|
||||
require.NoError(t, plugin.Write(metrics))
|
||||
require.NoError(t, plugin.Close())
|
||||
|
||||
files, err := os.ReadDir(testDir)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, files, 1)
|
||||
reader, err := file.OpenParquetFile(filepath.Join(testDir, files[0].Name()), false)
|
||||
require.NoError(t, err)
|
||||
defer reader.Close()
|
||||
|
||||
metadata := reader.MetaData()
|
||||
require.Equal(t, 1, int(metadata.NumRows))
|
||||
require.Equal(t, 1, metadata.Schema.NumColumns())
|
||||
}
|
||||
|
||||
func TestTimestampDifferentName(t *testing.T) {
|
||||
metrics := []telegraf.Metric{
|
||||
testutil.MustMetric(
|
||||
"test",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"value": 1.0,
|
||||
},
|
||||
time.Now(),
|
||||
),
|
||||
}
|
||||
|
||||
testDir := t.TempDir()
|
||||
plugin := &Parquet{
|
||||
Directory: testDir,
|
||||
TimestampFieldName: "time",
|
||||
}
|
||||
require.NoError(t, plugin.Init())
|
||||
require.NoError(t, plugin.Connect())
|
||||
require.NoError(t, plugin.Write(metrics))
|
||||
require.NoError(t, plugin.Close())
|
||||
|
||||
files, err := os.ReadDir(testDir)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, files, 1)
|
||||
reader, err := file.OpenParquetFile(filepath.Join(testDir, files[0].Name()), false)
|
||||
require.NoError(t, err)
|
||||
defer reader.Close()
|
||||
|
||||
metadata := reader.MetaData()
|
||||
require.Equal(t, 1, int(metadata.NumRows))
|
||||
require.Equal(t, 2, metadata.Schema.NumColumns())
|
||||
}
|
14
plugins/outputs/parquet/sample.conf
Normal file
14
plugins/outputs/parquet/sample.conf
Normal file
|
@ -0,0 +1,14 @@
|
|||
# A plugin that writes metrics to parquet files
|
||||
[[outputs.parquet]]
|
||||
## Directory to write parquet files in. If a file already exists the output
|
||||
## will attempt to continue using the existing file.
|
||||
# directory = "."
|
||||
|
||||
## Files are rotated after the time interval specified. When set to 0 no time
|
||||
## based rotation is performed.
|
||||
# rotation_interval = "0h"
|
||||
|
||||
## Timestamp field name
|
||||
## Field name to use to store the timestamp. If set to an empty string, then
|
||||
## the timestamp is omitted.
|
||||
# timestamp_field_name = "timestamp"
|
Loading…
Add table
Add a link
Reference in a new issue