Adding upstream version 1.34.4.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
e393c3af3f
commit
4978089aab
4963 changed files with 677545 additions and 0 deletions
51
plugins/parsers/parquet/README.md
Normal file
51
plugins/parsers/parquet/README.md
Normal file
|
@ -0,0 +1,51 @@
|
|||
# Parquet Parser Plugin
|
||||
|
||||
The Parquet parser allows for the parsing of Parquet files that were read in.
|
||||
|
||||
## Configuration
|
||||
|
||||
```toml
|
||||
[[inputs.file]]
|
||||
files = ["example"]
|
||||
|
||||
## Data format to consume.
|
||||
## Each data format has its own unique set of configuration options, read
|
||||
## more about them here:
|
||||
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
|
||||
data_format = "parquet"
|
||||
|
||||
## Tag column is an array of columns that should be added as tags.
|
||||
# tag_columns = []
|
||||
|
||||
## Name column is the column to use as the measurement name.
|
||||
# measurement_column = ""
|
||||
|
||||
## Timestamp column is the column containing the time that should be used to
|
||||
## create the metric. If not set, then the time of parsing is used.
|
||||
# timestamp_column = ""
|
||||
|
||||
## Timestamp format is the time layout that should be used to interpret the
|
||||
## timestamp_column. The time must be `unix`, `unix_ms`, `unix_us`, `unix_ns`,
|
||||
## or a time in the "reference time". To define a different format, arrange
|
||||
## the values from the "reference time" in the example to match the format
|
||||
## you will be using. For more information on the "reference time", visit
|
||||
## https://golang.org/pkg/time/#Time.Format
|
||||
## ex: timestamp_format = "Mon Jan 2 15:04:05 -0700 MST 2006"
|
||||
## timestamp_format = "2006-01-02T15:04:05Z07:00"
|
||||
## timestamp_format = "01/02/2006 15:04:05"
|
||||
## timestamp_format = "unix"
|
||||
## timestamp_format = "unix_ms"
|
||||
# timestamp_format = ""
|
||||
|
||||
## Timezone allows you to provide an override for timestamps that
|
||||
## do not already include an offset
|
||||
## e.g. 04/06/2016 12:41:45
|
||||
##
|
||||
## Default: "" which renders UTC
|
||||
## Options are as follows:
|
||||
## 1. Local -- interpret based on machine localtime
|
||||
## 2. "America/New_York" -- Unix TZ values like those found in
|
||||
## https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
|
||||
## 3. UTC -- or blank/unspecified, will return timestamp in UTC
|
||||
# timestamp_timezone = ""
|
||||
```
|
126
plugins/parsers/parquet/columns.go
Normal file
126
plugins/parsers/parquet/columns.go
Normal file
|
@ -0,0 +1,126 @@
|
|||
package parquet
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/apache/arrow-go/v18/parquet"
|
||||
"github.com/apache/arrow-go/v18/parquet/file"
|
||||
)
|
||||
|
||||
func newColumnParser(reader file.ColumnChunkReader) *columnParser {
|
||||
batchSize := 128
|
||||
|
||||
var valueBuffer interface{}
|
||||
switch reader.(type) {
|
||||
case *file.BooleanColumnChunkReader:
|
||||
valueBuffer = make([]bool, batchSize)
|
||||
case *file.Int32ColumnChunkReader:
|
||||
valueBuffer = make([]int32, batchSize)
|
||||
case *file.Int64ColumnChunkReader:
|
||||
valueBuffer = make([]int64, batchSize)
|
||||
case *file.Float32ColumnChunkReader:
|
||||
valueBuffer = make([]float32, batchSize)
|
||||
case *file.Float64ColumnChunkReader:
|
||||
valueBuffer = make([]float64, batchSize)
|
||||
case *file.ByteArrayColumnChunkReader:
|
||||
valueBuffer = make([]parquet.ByteArray, batchSize)
|
||||
case *file.FixedLenByteArrayColumnChunkReader:
|
||||
valueBuffer = make([]parquet.FixedLenByteArray, batchSize)
|
||||
}
|
||||
|
||||
return &columnParser{
|
||||
name: reader.Descriptor().Name(),
|
||||
reader: reader,
|
||||
batchSize: int64(batchSize),
|
||||
defLevels: make([]int16, batchSize),
|
||||
repLevels: make([]int16, batchSize),
|
||||
valueBuffer: valueBuffer,
|
||||
}
|
||||
}
|
||||
|
||||
type columnParser struct {
|
||||
name string
|
||||
reader file.ColumnChunkReader
|
||||
batchSize int64
|
||||
valueOffset int
|
||||
valuesBuffered int
|
||||
|
||||
levelOffset int64
|
||||
levelsBuffered int64
|
||||
defLevels []int16
|
||||
repLevels []int16
|
||||
|
||||
valueBuffer interface{}
|
||||
}
|
||||
|
||||
func (c *columnParser) readNextBatch() error {
|
||||
var err error
|
||||
|
||||
switch reader := c.reader.(type) {
|
||||
case *file.BooleanColumnChunkReader:
|
||||
values := c.valueBuffer.([]bool)
|
||||
c.levelsBuffered, c.valuesBuffered, err = reader.ReadBatch(c.batchSize, values, c.defLevels, c.repLevels)
|
||||
case *file.Int32ColumnChunkReader:
|
||||
values := c.valueBuffer.([]int32)
|
||||
c.levelsBuffered, c.valuesBuffered, err = reader.ReadBatch(c.batchSize, values, c.defLevels, c.repLevels)
|
||||
case *file.Int64ColumnChunkReader:
|
||||
values := c.valueBuffer.([]int64)
|
||||
c.levelsBuffered, c.valuesBuffered, err = reader.ReadBatch(c.batchSize, values, c.defLevels, c.repLevels)
|
||||
case *file.Float32ColumnChunkReader:
|
||||
values := c.valueBuffer.([]float32)
|
||||
c.levelsBuffered, c.valuesBuffered, err = reader.ReadBatch(c.batchSize, values, c.defLevels, c.repLevels)
|
||||
case *file.Float64ColumnChunkReader:
|
||||
values := c.valueBuffer.([]float64)
|
||||
c.levelsBuffered, c.valuesBuffered, err = reader.ReadBatch(c.batchSize, values, c.defLevels, c.repLevels)
|
||||
case *file.ByteArrayColumnChunkReader:
|
||||
values := c.valueBuffer.([]parquet.ByteArray)
|
||||
c.levelsBuffered, c.valuesBuffered, err = reader.ReadBatch(c.batchSize, values, c.defLevels, c.repLevels)
|
||||
case *file.FixedLenByteArrayColumnChunkReader:
|
||||
values := c.valueBuffer.([]parquet.FixedLenByteArray)
|
||||
c.levelsBuffered, c.valuesBuffered, err = reader.ReadBatch(c.batchSize, values, c.defLevels, c.repLevels)
|
||||
}
|
||||
|
||||
c.valueOffset = 0
|
||||
c.levelOffset = 0
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *columnParser) HasNext() bool {
|
||||
return c.levelOffset < c.levelsBuffered || c.reader.HasNext()
|
||||
}
|
||||
|
||||
func (c *columnParser) Next() (interface{}, bool) {
|
||||
if c.levelOffset == c.levelsBuffered {
|
||||
if !c.HasNext() {
|
||||
return nil, false
|
||||
}
|
||||
if err := c.readNextBatch(); err != nil {
|
||||
return nil, false
|
||||
}
|
||||
if c.levelsBuffered == 0 {
|
||||
return nil, false
|
||||
}
|
||||
}
|
||||
|
||||
defLevel := c.defLevels[int(c.levelOffset)]
|
||||
c.levelOffset++
|
||||
|
||||
if defLevel < c.reader.Descriptor().MaxDefinitionLevel() {
|
||||
return nil, true
|
||||
}
|
||||
|
||||
vb := reflect.ValueOf(c.valueBuffer)
|
||||
val := vb.Index(c.valueOffset).Interface()
|
||||
c.valueOffset++
|
||||
|
||||
// Convert byte arrays to strings
|
||||
switch v := val.(type) {
|
||||
case parquet.ByteArray:
|
||||
val = string(v)
|
||||
case parquet.FixedLenByteArray:
|
||||
val = string(v)
|
||||
}
|
||||
|
||||
return val, true
|
||||
}
|
149
plugins/parsers/parquet/parser.go
Normal file
149
plugins/parsers/parquet/parser.go
Normal file
|
@ -0,0 +1,149 @@
|
|||
package parquet
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"slices"
|
||||
"time"
|
||||
|
||||
"github.com/apache/arrow-go/v18/parquet/file"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/internal"
|
||||
"github.com/influxdata/telegraf/metric"
|
||||
"github.com/influxdata/telegraf/plugins/parsers"
|
||||
)
|
||||
|
||||
type Parser struct {
|
||||
MeasurementColumn string `toml:"measurement_column"`
|
||||
TagColumns []string `toml:"tag_columns"`
|
||||
TimestampColumn string `toml:"timestamp_column"`
|
||||
TimestampFormat string `toml:"timestamp_format"`
|
||||
TimestampTimezone string `toml:"timestamp_timezone"`
|
||||
|
||||
defaultTags map[string]string
|
||||
location *time.Location
|
||||
metricName string
|
||||
}
|
||||
|
||||
func (p *Parser) Init() error {
|
||||
if p.TimestampFormat == "" {
|
||||
p.TimestampFormat = "unix"
|
||||
}
|
||||
if p.TimestampTimezone == "" {
|
||||
p.location = time.UTC
|
||||
} else {
|
||||
loc, err := time.LoadLocation(p.TimestampTimezone)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid location %s: %w", p.TimestampTimezone, err)
|
||||
}
|
||||
p.location = loc
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Parser) Parse(buf []byte) ([]telegraf.Metric, error) {
|
||||
reader := bytes.NewReader(buf)
|
||||
parquetReader, err := file.NewParquetReader(reader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to create parquet reader: %w", err)
|
||||
}
|
||||
metadata := parquetReader.MetaData()
|
||||
|
||||
now := time.Now()
|
||||
metrics := make([]telegraf.Metric, 0, metadata.NumRows)
|
||||
for i := 0; i < parquetReader.NumRowGroups(); i++ {
|
||||
rowGroup := parquetReader.RowGroup(i)
|
||||
scanners := make([]*columnParser, metadata.Schema.NumColumns())
|
||||
for colIndex := range metadata.Schema.NumColumns() {
|
||||
col, err := rowGroup.Column(colIndex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to fetch column %q: %w", colIndex, err)
|
||||
}
|
||||
|
||||
scanners[colIndex] = newColumnParser(col)
|
||||
}
|
||||
|
||||
rowIndex := 0
|
||||
rowGroupMetrics := make([]telegraf.Metric, rowGroup.NumRows())
|
||||
for _, s := range scanners {
|
||||
for s.HasNext() {
|
||||
if rowIndex%int(rowGroup.NumRows()) == 0 {
|
||||
rowIndex = 0
|
||||
}
|
||||
|
||||
val, ok := s.Next()
|
||||
if !ok || val == nil {
|
||||
rowIndex++
|
||||
continue
|
||||
}
|
||||
|
||||
if rowGroupMetrics[rowIndex] == nil {
|
||||
rowGroupMetrics[rowIndex] = metric.New(p.metricName, p.defaultTags, nil, now)
|
||||
}
|
||||
|
||||
if p.MeasurementColumn != "" && s.name == p.MeasurementColumn {
|
||||
valStr, err := internal.ToString(val)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not convert value to string: %w", err)
|
||||
}
|
||||
rowGroupMetrics[rowIndex].SetName(valStr)
|
||||
} else if p.TagColumns != nil && slices.Contains(p.TagColumns, s.name) {
|
||||
valStr, err := internal.ToString(val)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not convert value to string: %w", err)
|
||||
}
|
||||
rowGroupMetrics[rowIndex].AddTag(s.name, valStr)
|
||||
} else if p.TimestampColumn != "" && s.name == p.TimestampColumn {
|
||||
valStr, err := internal.ToString(val)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not convert value to string: %w", err)
|
||||
}
|
||||
timestamp, err := internal.ParseTimestamp(p.TimestampFormat, valStr, p.location)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not parse '%s' to '%s'", valStr, p.TimestampFormat)
|
||||
}
|
||||
rowGroupMetrics[rowIndex].SetTime(timestamp)
|
||||
} else {
|
||||
rowGroupMetrics[rowIndex].AddField(s.name, val)
|
||||
}
|
||||
|
||||
rowIndex++
|
||||
}
|
||||
}
|
||||
|
||||
metrics = append(metrics, rowGroupMetrics...)
|
||||
}
|
||||
|
||||
return metrics, nil
|
||||
}
|
||||
|
||||
func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
|
||||
metrics, err := p.Parse([]byte(line))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(metrics) < 1 {
|
||||
return nil, nil
|
||||
}
|
||||
if len(metrics) > 1 {
|
||||
return nil, errors.New("line contains multiple metrics")
|
||||
}
|
||||
|
||||
return metrics[0], nil
|
||||
}
|
||||
|
||||
func (p *Parser) SetDefaultTags(tags map[string]string) {
|
||||
p.defaultTags = tags
|
||||
}
|
||||
|
||||
func init() {
|
||||
parsers.Add("parquet",
|
||||
func(defaultMetricName string) telegraf.Parser {
|
||||
return &Parser{metricName: defaultMetricName}
|
||||
},
|
||||
)
|
||||
}
|
76
plugins/parsers/parquet/parser_test.go
Normal file
76
plugins/parsers/parquet/parser_test.go
Normal file
|
@ -0,0 +1,76 @@
|
|||
package parquet
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/google/go-cmp/cmp/cmpopts"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/influxdata/telegraf/config"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
test "github.com/influxdata/telegraf/testutil/plugin_input"
|
||||
)
|
||||
|
||||
func TestCases(t *testing.T) {
|
||||
folders, err := os.ReadDir("testcases")
|
||||
require.NoError(t, err)
|
||||
require.NotEmpty(t, folders)
|
||||
|
||||
for _, f := range folders {
|
||||
testcasePath := filepath.Join("testcases", f.Name())
|
||||
configFilename := filepath.Join(testcasePath, "telegraf.conf")
|
||||
t.Run(f.Name(), func(t *testing.T) {
|
||||
// Configure the plugin
|
||||
cfg := config.NewConfig()
|
||||
require.NoError(t, cfg.LoadConfig(configFilename))
|
||||
require.NoError(t, err)
|
||||
require.Len(t, cfg.Inputs, 1)
|
||||
|
||||
// Tune the test-plugin
|
||||
plugin := cfg.Inputs[0].Input.(*test.Plugin)
|
||||
plugin.Path = testcasePath
|
||||
require.NoError(t, plugin.Init())
|
||||
|
||||
// Gather the metrics and check for potential errors
|
||||
var acc testutil.Accumulator
|
||||
err := plugin.Gather(&acc)
|
||||
switch len(plugin.ExpectedErrors) {
|
||||
case 0:
|
||||
require.NoError(t, err)
|
||||
case 1:
|
||||
require.ErrorContains(t, err, plugin.ExpectedErrors[0])
|
||||
default:
|
||||
require.Contains(t, plugin.ExpectedErrors, err.Error())
|
||||
}
|
||||
|
||||
// Determine checking options
|
||||
options := []cmp.Option{
|
||||
cmpopts.EquateApprox(0, 1e-6),
|
||||
testutil.SortMetrics(),
|
||||
}
|
||||
if plugin.ShouldIgnoreTimestamp {
|
||||
options = append(options, testutil.IgnoreTime())
|
||||
}
|
||||
|
||||
// Process expected metrics and compare with resulting metrics
|
||||
actual := acc.GetTelegrafMetrics()
|
||||
testutil.RequireMetricsEqual(t, plugin.Expected, actual, options...)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkParsing(b *testing.B) {
|
||||
plugin := &Parser{}
|
||||
|
||||
benchmarkData, err := os.ReadFile("testcases/benchmark/input.parquet")
|
||||
require.NoError(b, err)
|
||||
|
||||
b.ResetTimer()
|
||||
for n := 0; n < b.N; n++ {
|
||||
//nolint:errcheck // Benchmarking so skip the error check to avoid the unnecessary operations
|
||||
plugin.Parse(benchmarkData)
|
||||
}
|
||||
}
|
1
plugins/parsers/parquet/testcases/benchmark/expected.out
Normal file
1
plugins/parsers/parquet/testcases/benchmark/expected.out
Normal file
|
@ -0,0 +1 @@
|
|||
test value=42i 1710683608143228692
|
11
plugins/parsers/parquet/testcases/benchmark/generate.py
Normal file
11
plugins/parsers/parquet/testcases/benchmark/generate.py
Normal file
|
@ -0,0 +1,11 @@
|
|||
#!/usr/bin/env python
|
||||
import pandas
|
||||
import pyarrow
|
||||
import pyarrow.parquet
|
||||
|
||||
df = pandas.DataFrame({
|
||||
'value': [42],
|
||||
'timestamp': ["1710683608143228692"]
|
||||
})
|
||||
|
||||
pyarrow.parquet.write_table(pyarrow.Table.from_pandas(df), "input.parquet")
|
BIN
plugins/parsers/parquet/testcases/benchmark/input.parquet
Normal file
BIN
plugins/parsers/parquet/testcases/benchmark/input.parquet
Normal file
Binary file not shown.
|
@ -0,0 +1,6 @@
|
|||
[[inputs.test]]
|
||||
files = ["input.parquet"]
|
||||
data_format = "parquet"
|
||||
|
||||
timestamp_column = "timestamp"
|
||||
timestamp_format = "unix_ns"
|
2
plugins/parsers/parquet/testcases/datatypes/expected.out
Normal file
2
plugins/parsers/parquet/testcases/datatypes/expected.out
Normal file
|
@ -0,0 +1,2 @@
|
|||
row1,byteArray=Short,fixedLengthByteArray=STRING boolean=true,float32=1,float64=64,int32=-2147483648i,int64=-9223372036854775808i 1710697199000000000
|
||||
row2,byteArray=Much\ longer\ string\ here...,fixedLengthByteArray=FOOBAR boolean=false,float32=1.1234568357467651,float64=65.1234567891212,int32=2147483647i,int64=9223372036854775807i 551812924000000000
|
33
plugins/parsers/parquet/testcases/datatypes/generate.py
Normal file
33
plugins/parsers/parquet/testcases/datatypes/generate.py
Normal file
|
@ -0,0 +1,33 @@
|
|||
#!/usr/bin/env python
|
||||
import pandas
|
||||
import pyarrow
|
||||
import pyarrow.parquet
|
||||
|
||||
df = pandas.DataFrame({
|
||||
'boolean': [True, False],
|
||||
'string': ["row1", "row2"],
|
||||
'int32': [-2147483648, 2147483647],
|
||||
'int64': [-9223372036854775808, 9223372036854775807],
|
||||
'float32': [1.000000001, 1.123456789],
|
||||
'float64': [64.00000000000000001, 65.12345678912121212],
|
||||
'byteArray': ["Short", "Much longer string here..."],
|
||||
'fixedLengthByteArray': ["STRING", "FOOBAR"],
|
||||
'timestamp': [
|
||||
"Sun, 17 Mar 2024 10:39:59 MST",
|
||||
"Sat, 27 Jun 1987 10:22:04 MST",
|
||||
]
|
||||
})
|
||||
|
||||
schema = pyarrow.schema([
|
||||
pyarrow.field('boolean', pyarrow.bool_()),
|
||||
pyarrow.field('string', pyarrow.string()),
|
||||
pyarrow.field('int32', pyarrow.int32()),
|
||||
pyarrow.field('int64', pyarrow.int64()),
|
||||
pyarrow.field('float32', pyarrow.float32()),
|
||||
pyarrow.field('float64', pyarrow.float64()),
|
||||
pyarrow.field('byteArray', pyarrow.binary()),
|
||||
pyarrow.field('fixedLengthByteArray', pyarrow.binary(6)),
|
||||
pyarrow.field('timestamp', pyarrow.binary())
|
||||
])
|
||||
|
||||
pyarrow.parquet.write_table(pyarrow.Table.from_pandas(df, schema), "input.parquet")
|
BIN
plugins/parsers/parquet/testcases/datatypes/input.parquet
Normal file
BIN
plugins/parsers/parquet/testcases/datatypes/input.parquet
Normal file
Binary file not shown.
|
@ -0,0 +1,9 @@
|
|||
[[inputs.test]]
|
||||
files = ["input.parquet"]
|
||||
data_format = "parquet"
|
||||
|
||||
measurement_column = "string"
|
||||
tag_columns = ["byteArray", "fixedLengthByteArray"]
|
||||
timestamp_column = "timestamp"
|
||||
timestamp_format = "Mon, 02 Jan 2006 15:04:05 MST"
|
||||
timestamp_timezone = "MST"
|
7
plugins/parsers/parquet/testcases/dense/expected.out
Normal file
7
plugins/parsers/parquet/testcases/dense/expected.out
Normal file
|
@ -0,0 +1,7 @@
|
|||
a,tag=row1 float_field=64 1710683695000000000
|
||||
b,tag=row1 float_field=65 1710683695000000000
|
||||
c,tag=row1 float_field=66 1710683695000000000
|
||||
d,tag=row1 float_field=67 1710683695000000000
|
||||
e,tag=row1 float_field=68 1710683695000000000
|
||||
f,tag=row1 float_field=69 1710683695000000000
|
||||
g,tag=row1 float_field=70 1710683695000000000
|
16
plugins/parsers/parquet/testcases/dense/generate.py
Normal file
16
plugins/parsers/parquet/testcases/dense/generate.py
Normal file
|
@ -0,0 +1,16 @@
|
|||
#!/usr/bin/env python
|
||||
import pandas
|
||||
import pyarrow
|
||||
import pyarrow.parquet
|
||||
|
||||
df = pandas.DataFrame({
|
||||
'tag': ["row1", "row1", "row1", "row1", "row1", "row1", "row1"],
|
||||
'float_field': [64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0],
|
||||
'str_field': ["a", "b", "c", "d", "e", "f", "g"],
|
||||
'timestamp': [
|
||||
1710683695, 1710683695, 1710683695, 1710683695, 1710683695,
|
||||
1710683695, 1710683695,
|
||||
]
|
||||
})
|
||||
|
||||
pyarrow.parquet.write_table(pyarrow.Table.from_pandas(df), "input.parquet")
|
BIN
plugins/parsers/parquet/testcases/dense/input.parquet
Normal file
BIN
plugins/parsers/parquet/testcases/dense/input.parquet
Normal file
Binary file not shown.
8
plugins/parsers/parquet/testcases/dense/telegraf.conf
Normal file
8
plugins/parsers/parquet/testcases/dense/telegraf.conf
Normal file
|
@ -0,0 +1,8 @@
|
|||
[[inputs.test]]
|
||||
files = ["input.parquet"]
|
||||
data_format = "parquet"
|
||||
|
||||
measurement_column = "str_field"
|
||||
tag_columns = ["tag"]
|
||||
timestamp_column = "timestamp"
|
||||
timestamp_format = "unix"
|
0
plugins/parsers/parquet/testcases/empty/expected.out
Normal file
0
plugins/parsers/parquet/testcases/empty/expected.out
Normal file
6
plugins/parsers/parquet/testcases/empty/generate.py
Normal file
6
plugins/parsers/parquet/testcases/empty/generate.py
Normal file
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python
|
||||
import pandas
|
||||
import pyarrow
|
||||
import pyarrow.parquet
|
||||
|
||||
pyarrow.parquet.write_table(pyarrow.Table.from_pandas(pandas.DataFrame()), "input.parquet")
|
BIN
plugins/parsers/parquet/testcases/empty/input.parquet
Normal file
BIN
plugins/parsers/parquet/testcases/empty/input.parquet
Normal file
Binary file not shown.
3
plugins/parsers/parquet/testcases/empty/telegraf.conf
Normal file
3
plugins/parsers/parquet/testcases/empty/telegraf.conf
Normal file
|
@ -0,0 +1,3 @@
|
|||
[[inputs.test]]
|
||||
files = ["input.parquet"]
|
||||
data_format = "parquet"
|
21
plugins/parsers/parquet/testcases/multitable/expected.out
Normal file
21
plugins/parsers/parquet/testcases/multitable/expected.out
Normal file
|
@ -0,0 +1,21 @@
|
|||
test,tag=row1 float_field=64 1710683608143228692
|
||||
test,tag=row1 float_field=65 1710683608143228692
|
||||
test,tag=row1 float_field=66 1710683608143228692
|
||||
test,tag=row1 float_field=67 1710683608143228692
|
||||
test,tag=row1 float_field=68 1710683608143228692
|
||||
test,tag=row1 float_field=69 1710683608143228692
|
||||
test,tag=row1 float_field=70 1710683608143228692
|
||||
test,tag=row1 float_field=64 1710683608143228693
|
||||
test,tag=row1 float_field=65 1710683608143228693
|
||||
test,tag=row1 float_field=66 1710683608143228693
|
||||
test,tag=row1 float_field=67 1710683608143228693
|
||||
test,tag=row1 float_field=68 1710683608143228693
|
||||
test,tag=row1 float_field=69 1710683608143228693
|
||||
test,tag=row1 float_field=70 1710683608143228693
|
||||
test,tag=row1 float_field=64 1710683608143228694
|
||||
test,tag=row1 float_field=65 1710683608143228694
|
||||
test,tag=row1 float_field=66 1710683608143228694
|
||||
test,tag=row1 float_field=67 1710683608143228694
|
||||
test,tag=row1 float_field=68 1710683608143228694
|
||||
test,tag=row1 float_field=69 1710683608143228694
|
||||
test,tag=row1 float_field=70 1710683608143228694
|
39
plugins/parsers/parquet/testcases/multitable/generate.py
Normal file
39
plugins/parsers/parquet/testcases/multitable/generate.py
Normal file
|
@ -0,0 +1,39 @@
|
|||
#!/usr/bin/env python
|
||||
import pandas
|
||||
import pyarrow
|
||||
import pyarrow.parquet
|
||||
|
||||
df1 = pandas.DataFrame({
|
||||
'tag': ["row1", "row1", "row1", "row1", "row1", "row1", "row1"],
|
||||
'float_field': [64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0],
|
||||
'timestamp': [
|
||||
"1710683608143228692", "1710683608143228692", "1710683608143228692",
|
||||
"1710683608143228692", "1710683608143228692", "1710683608143228692",
|
||||
"1710683608143228692",
|
||||
]
|
||||
})
|
||||
|
||||
df2 = pandas.DataFrame({
|
||||
'tag': ["row1", "row1", "row1", "row1", "row1", "row1", "row1"],
|
||||
'float_field': [64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0],
|
||||
'timestamp': [
|
||||
"1710683608143228693", "1710683608143228693", "1710683608143228693",
|
||||
"1710683608143228693", "1710683608143228693", "1710683608143228693",
|
||||
"1710683608143228693",
|
||||
]
|
||||
})
|
||||
|
||||
df3 = pandas.DataFrame({
|
||||
'tag': ["row1", "row1", "row1", "row1", "row1", "row1", "row1"],
|
||||
'float_field': [64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0],
|
||||
'timestamp': [
|
||||
"1710683608143228694", "1710683608143228694", "1710683608143228694",
|
||||
"1710683608143228694", "1710683608143228694", "1710683608143228694",
|
||||
"1710683608143228694",
|
||||
]
|
||||
})
|
||||
|
||||
with pyarrow.parquet.ParquetWriter('input.parquet', pyarrow.Table.from_pandas(df1).schema) as writer:
|
||||
writer.write_table(pyarrow.Table.from_pandas(df1))
|
||||
writer.write_table(pyarrow.Table.from_pandas(df2))
|
||||
writer.write_table(pyarrow.Table.from_pandas(df3))
|
BIN
plugins/parsers/parquet/testcases/multitable/input.parquet
Normal file
BIN
plugins/parsers/parquet/testcases/multitable/input.parquet
Normal file
Binary file not shown.
|
@ -0,0 +1,8 @@
|
|||
[[inputs.test]]
|
||||
files = ["input.parquet"]
|
||||
data_format = "parquet"
|
||||
|
||||
measurement_column = "str_field"
|
||||
tag_columns = ["tag"]
|
||||
timestamp_column = "timestamp"
|
||||
timestamp_format = "unix_ns"
|
6
plugins/parsers/parquet/testcases/sparse/expected.out
Normal file
6
plugins/parsers/parquet/testcases/sparse/expected.out
Normal file
|
@ -0,0 +1,6 @@
|
|||
test,tag=row1 float_field=64 1709313032000000000
|
||||
test,tag=row2 float_field=65 1709399432000000000
|
||||
test,tag=row3 int_field=65 1709485832000000000
|
||||
test,tag=row4 uint_field=5 1709572232000000000
|
||||
test,tag=row5 bool_field=true 1709658632000000000
|
||||
test,str_field=blargh,tag=multi_field bool_field=false 1709831432000000000
|
20
plugins/parsers/parquet/testcases/sparse/generate.py
Normal file
20
plugins/parsers/parquet/testcases/sparse/generate.py
Normal file
|
@ -0,0 +1,20 @@
|
|||
#!/usr/bin/env python
|
||||
import pandas
|
||||
import pyarrow
|
||||
import pyarrow.parquet
|
||||
|
||||
df = pandas.DataFrame({
|
||||
'tag': ["row1", "row2", "row3", "row4", "row5", "row6", "multi_field"],
|
||||
'float_field': [64.0, 65.0, None, None, None, None, None],
|
||||
'int_field': [None, None, 65, None, None, None, None],
|
||||
'uint_field': [None, None, None, 5, None, None, None],
|
||||
'bool_field': [None, None, None, None, True, None, False],
|
||||
'str_field': [None, None, None, None, None, "blargh", "blargh"],
|
||||
'timestamp': [
|
||||
"2024-03-01T17:10:32", "2024-03-02T17:10:32", "2024-03-03T17:10:32",
|
||||
"2024-03-04T17:10:32", "2024-03-05T17:10:32", "2024-03-06T17:10:32",
|
||||
"2024-03-07T17:10:32",
|
||||
]
|
||||
})
|
||||
|
||||
pyarrow.parquet.write_table(pyarrow.Table.from_pandas(df), "input.parquet")
|
BIN
plugins/parsers/parquet/testcases/sparse/input.parquet
Normal file
BIN
plugins/parsers/parquet/testcases/sparse/input.parquet
Normal file
Binary file not shown.
7
plugins/parsers/parquet/testcases/sparse/telegraf.conf
Normal file
7
plugins/parsers/parquet/testcases/sparse/telegraf.conf
Normal file
|
@ -0,0 +1,7 @@
|
|||
[[inputs.test]]
|
||||
files = ["input.parquet"]
|
||||
data_format = "parquet"
|
||||
|
||||
tag_columns = ["tag", "str_field"]
|
||||
timestamp_column = "timestamp"
|
||||
timestamp_format = "2006-01-02T15:04:05"
|
3
plugins/parsers/parquet/testcases/timestamp/expected.out
Normal file
3
plugins/parsers/parquet/testcases/timestamp/expected.out
Normal file
|
@ -0,0 +1,3 @@
|
|||
test value=1.1 1710511506000000000
|
||||
test value=2.2 1710597906000000000
|
||||
test value=3.3 1710684306000000000
|
14
plugins/parsers/parquet/testcases/timestamp/generate.py
Normal file
14
plugins/parsers/parquet/testcases/timestamp/generate.py
Normal file
|
@ -0,0 +1,14 @@
|
|||
#!/usr/bin/env python
|
||||
import pandas
|
||||
import pyarrow
|
||||
import pyarrow.parquet
|
||||
|
||||
df = pandas.DataFrame({
|
||||
'value': [1.1, 2.2, 3.3],
|
||||
'timestamp': [
|
||||
"2024-03-15T14:05:06+00:00", "2024-03-16T14:05:06+00:00",
|
||||
"2024-03-17T14:05:06+00:00",
|
||||
]
|
||||
})
|
||||
|
||||
pyarrow.parquet.write_table(pyarrow.Table.from_pandas(df), "input.parquet")
|
BIN
plugins/parsers/parquet/testcases/timestamp/input.parquet
Normal file
BIN
plugins/parsers/parquet/testcases/timestamp/input.parquet
Normal file
Binary file not shown.
|
@ -0,0 +1,7 @@
|
|||
[[inputs.test]]
|
||||
files = ["input.parquet"]
|
||||
data_format = "parquet"
|
||||
|
||||
tag_columns = ["tag", "str_field"]
|
||||
timestamp_column = "timestamp"
|
||||
timestamp_format = "2006-01-02T15:04:05Z07:00"
|
Loading…
Add table
Add a link
Reference in a new issue