1
0
Fork 0

Adding upstream version 1.34.4.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-05-24 07:26:29 +02:00
parent e393c3af3f
commit 4978089aab
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
4963 changed files with 677545 additions and 0 deletions

View file

@ -0,0 +1,106 @@
# Directory Monitor Input Plugin
This plugin monitors a single directory (traversing sub-directories), and
processes each file placed in the directory. The plugin will gather all files in
the directory at the configured interval, and parse the ones that haven't been
picked up yet.
> [!NOTE]
> Files should not be used by another process or the plugin may fail.
> Furthermore, files should not be written _live_ to the monitored directory.
> If you absolutely must write files directly, they must be guaranteed to finish
> writing before `directory_duration_threshold`.
⭐ Telegraf v1.18.0
🏷️ system
💻 all
## Global configuration options <!-- @/docs/includes/plugin_config.md -->
In addition to the plugin-specific configuration settings, plugins support
additional global and plugin configuration settings. These settings are used to
modify metrics, tags, and field or create aliases and configure ordering, etc.
See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
[CONFIGURATION.md]: ../../../docs/CONFIGURATION.md#plugins
## Configuration
```toml @sample.conf
# Ingests files in a directory and then moves them to a target directory.
[[inputs.directory_monitor]]
## The directory to monitor and read files from (including sub-directories if "recursive" is true).
directory = ""
#
## The directory to move finished files to (maintaining directory hierarchy from source).
finished_directory = ""
#
## Setting recursive to true will make the plugin recursively walk the directory and process all sub-directories.
# recursive = false
#
## The directory to move files to upon file error.
## If not provided, erroring files will stay in the monitored directory.
# error_directory = ""
#
## The amount of time a file is allowed to sit in the directory before it is picked up.
## This time can generally be low but if you choose to have a very large file written to the directory and it's potentially slow,
## set this higher so that the plugin will wait until the file is fully copied to the directory.
# directory_duration_threshold = "50ms"
#
## A list of the only file names to monitor, if necessary. Supports regex. If left blank, all files are ingested.
# files_to_monitor = ["^.*\\.csv"]
#
## A list of files to ignore, if necessary. Supports regex.
# files_to_ignore = [".DS_Store"]
#
## Maximum lines of the file to process that have not yet be written by the
## output. For best throughput set to the size of the output's metric_buffer_limit.
## Warning: setting this number higher than the output's metric_buffer_limit can cause dropped metrics.
# max_buffered_metrics = 10000
#
## The maximum amount of file paths to queue up for processing at once, before waiting until files are processed to find more files.
## Lowering this value will result in *slightly* less memory use, with a potential sacrifice in speed efficiency, if absolutely necessary.
# file_queue_size = 100000
#
## Name a tag containing the name of the file the data was parsed from. Leave empty
## to disable. Cautious when file name variation is high, this can increase the cardinality
## significantly. Read more about cardinality here:
## https://docs.influxdata.com/influxdb/cloud/reference/glossary/#series-cardinality
# file_tag = ""
#
## Specify if the file can be read completely at once or if it needs to be read line by line (default).
## Possible values: "line-by-line", "at-once"
# parse_method = "line-by-line"
#
## The dataformat to be read from the files.
## Each data format has its own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "influx"
```
## Metrics
The format of metrics produced by this plugin depends on the content and data
format of the file.
When the [internal][] input is enabled:
- internal_directory_monitor
- fields:
- files_processed - How many files have been processed (counter)
- files_dropped - How many files have been dropped (counter)
- internal_directory_monitor
- tags:
- directory - The monitored directory
- fields:
- files_processed_per_dir - How many files have been processed (counter)
- files_dropped_per_dir - How many files have been dropped (counter)
- files_queue_per_dir - How many files to be processed (gauge)
## Example Output
The metrics produced by this plugin depends on the content and data
format of the file.
[internal]: /plugins/inputs/internal

View file

@ -0,0 +1,479 @@
//go:generate ../../../tools/readme_config_includer/generator
package directory_monitor
import (
"bufio"
"compress/gzip"
"context"
_ "embed"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"strings"
"sync"
"time"
"github.com/djherbis/times"
"golang.org/x/sync/semaphore"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/config"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/internal/choice"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/parsers"
"github.com/influxdata/telegraf/selfstat"
)
//go:embed sample.conf
var sampleConfig string
var once sync.Once
const (
defaultMaxBufferedMetrics = 10000
defaultDirectoryDurationThreshold = config.Duration(0 * time.Millisecond)
defaultFileQueueSize = 100000
defaultParseMethod = "line-by-line"
)
type DirectoryMonitor struct {
Directory string `toml:"directory"`
FinishedDirectory string `toml:"finished_directory"`
Recursive bool `toml:"recursive"`
ErrorDirectory string `toml:"error_directory"`
FileTag string `toml:"file_tag"`
FilesToMonitor []string `toml:"files_to_monitor"`
FilesToIgnore []string `toml:"files_to_ignore"`
MaxBufferedMetrics int `toml:"max_buffered_metrics"`
DirectoryDurationThreshold config.Duration `toml:"directory_duration_threshold"`
Log telegraf.Logger `toml:"-"`
FileQueueSize int `toml:"file_queue_size"`
ParseMethod string `toml:"parse_method"`
filesInUse sync.Map
cancel context.CancelFunc
context context.Context
parserFunc telegraf.ParserFunc
filesProcessed selfstat.Stat
filesProcessedDir selfstat.Stat
filesDropped selfstat.Stat
filesDroppedDir selfstat.Stat
filesQueuedDir selfstat.Stat
waitGroup *sync.WaitGroup
acc telegraf.TrackingAccumulator
sem *semaphore.Weighted
fileRegexesToMatch []*regexp.Regexp
fileRegexesToIgnore []*regexp.Regexp
filesToProcess chan string
}
func (*DirectoryMonitor) SampleConfig() string {
return sampleConfig
}
func (monitor *DirectoryMonitor) SetParserFunc(fn telegraf.ParserFunc) {
monitor.parserFunc = fn
}
func (monitor *DirectoryMonitor) Init() error {
if monitor.Directory == "" || monitor.FinishedDirectory == "" {
return errors.New("missing one of the following required config options: directory, finished_directory")
}
if monitor.FileQueueSize <= 0 {
return errors.New("file queue size needs to be more than 0")
}
// Finished directory can be created if not exists for convenience.
if _, err := os.Stat(monitor.FinishedDirectory); os.IsNotExist(err) {
err = os.Mkdir(monitor.FinishedDirectory, 0750)
if err != nil {
return err
}
}
tags := map[string]string{
"directory": monitor.Directory,
}
monitor.filesDropped = selfstat.Register("directory_monitor", "files_dropped", make(map[string]string))
monitor.filesDroppedDir = selfstat.Register("directory_monitor", "files_dropped_per_dir", tags)
monitor.filesProcessed = selfstat.Register("directory_monitor", "files_processed", make(map[string]string))
monitor.filesProcessedDir = selfstat.Register("directory_monitor", "files_processed_per_dir", tags)
monitor.filesQueuedDir = selfstat.Register("directory_monitor", "files_queue_per_dir", tags)
// If an error directory should be used but has not been configured yet, create one ourselves.
if monitor.ErrorDirectory != "" {
if _, err := os.Stat(monitor.ErrorDirectory); os.IsNotExist(err) {
err := os.Mkdir(monitor.ErrorDirectory, 0750)
if err != nil {
return err
}
}
}
monitor.waitGroup = &sync.WaitGroup{}
monitor.sem = semaphore.NewWeighted(int64(monitor.MaxBufferedMetrics))
monitor.context, monitor.cancel = context.WithCancel(context.Background())
monitor.filesToProcess = make(chan string, monitor.FileQueueSize)
// Establish file matching / exclusion regexes.
for _, matcher := range monitor.FilesToMonitor {
regex, err := regexp.Compile(matcher)
if err != nil {
return err
}
monitor.fileRegexesToMatch = append(monitor.fileRegexesToMatch, regex)
}
for _, matcher := range monitor.FilesToIgnore {
regex, err := regexp.Compile(matcher)
if err != nil {
return err
}
monitor.fileRegexesToIgnore = append(monitor.fileRegexesToIgnore, regex)
}
if err := choice.Check(monitor.ParseMethod, []string{"line-by-line", "at-once"}); err != nil {
return fmt.Errorf("config option parse_method: %w", err)
}
return nil
}
func (monitor *DirectoryMonitor) Start(acc telegraf.Accumulator) error {
// Use tracking to determine when more metrics can be added without overflowing the outputs.
monitor.acc = acc.WithTracking(monitor.MaxBufferedMetrics)
go func() {
for range monitor.acc.Delivered() {
monitor.sem.Release(1)
}
}()
// Monitor the files channel and read what they receive.
monitor.waitGroup.Add(1)
go func() {
monitor.monitor()
monitor.waitGroup.Done()
}()
return nil
}
func (monitor *DirectoryMonitor) Gather(_ telegraf.Accumulator) error {
processFile := func(path string) error {
// We've been cancelled via Stop().
if monitor.context.Err() != nil {
return io.EOF
}
stat, err := times.Stat(path)
if err != nil {
return nil //nolint:nilerr // don't stop traversing if there is an error
}
timeThresholdExceeded := time.Since(stat.AccessTime()) >= time.Duration(monitor.DirectoryDurationThreshold)
// If file is decaying, process it.
if timeThresholdExceeded {
monitor.processFile(path)
}
return nil
}
if monitor.Recursive {
err := filepath.Walk(monitor.Directory,
func(path string, _ os.FileInfo, err error) error {
if err != nil {
return err
}
return processFile(path)
})
// We've been cancelled via Stop().
if errors.Is(err, io.EOF) {
return nil
}
if err != nil {
return err
}
} else {
// Get all files sitting in the directory.
files, err := os.ReadDir(monitor.Directory)
if err != nil {
return fmt.Errorf("unable to monitor the targeted directory: %w", err)
}
for _, file := range files {
if file.IsDir() {
continue
}
path := monitor.Directory + "/" + file.Name()
err := processFile(path)
// We've been cancelled via Stop().
if errors.Is(err, io.EOF) {
return nil
}
}
}
return nil
}
func (monitor *DirectoryMonitor) Stop() {
// Before stopping, wrap up all file-reading routines.
monitor.cancel()
close(monitor.filesToProcess)
monitor.Log.Warnf("Exiting the Directory Monitor plugin. Waiting to quit until all current files are finished.")
monitor.waitGroup.Wait()
}
func (monitor *DirectoryMonitor) monitor() {
for filePath := range monitor.filesToProcess {
if monitor.context.Err() != nil {
return
}
// Prevent goroutines from taking the same file as another.
if _, exists := monitor.filesInUse.LoadOrStore(filePath, true); exists {
continue
}
monitor.read(filePath)
// We've finished reading the file and moved it away, delete it from files in use.
monitor.filesInUse.Delete(filePath)
// Keep track of how many files still to process
monitor.filesQueuedDir.Set(int64(len(monitor.filesToProcess)))
}
}
func (monitor *DirectoryMonitor) processFile(path string) {
basePath := strings.Replace(path, monitor.Directory, "", 1)
// File must be configured to be monitored, if any configuration...
if !monitor.isMonitoredFile(basePath) {
return
}
// ...and should not be configured to be ignored.
if monitor.isIgnoredFile(basePath) {
return
}
select {
case monitor.filesToProcess <- path:
default:
}
}
func (monitor *DirectoryMonitor) read(filePath string) {
// Open, read, and parse the contents of the file.
err := monitor.ingestFile(filePath)
var pathErr *os.PathError
if errors.As(err, &pathErr) {
return
}
// Handle a file read error. We don't halt execution but do document, log, and move the problematic file.
if err != nil {
monitor.Log.Errorf("Error while reading file: %q: %v", filePath, err)
monitor.filesDropped.Incr(1)
monitor.filesDroppedDir.Incr(1)
if monitor.ErrorDirectory != "" {
monitor.moveFile(filePath, monitor.ErrorDirectory)
}
return
}
// File is finished, move it to the 'finished' directory.
monitor.moveFile(filePath, monitor.FinishedDirectory)
monitor.filesProcessed.Incr(1)
monitor.filesProcessedDir.Incr(1)
}
func (monitor *DirectoryMonitor) ingestFile(filePath string) error {
file, err := os.Open(filePath)
if err != nil {
return err
}
defer file.Close()
parser, err := monitor.parserFunc()
if err != nil {
return fmt.Errorf("creating parser: %w", err)
}
// Handle gzipped files.
var reader io.Reader
if filepath.Ext(filePath) == ".gz" {
reader, err = gzip.NewReader(file)
if err != nil {
return err
}
} else {
reader = file
}
return monitor.parseFile(parser, reader, file.Name())
}
func (monitor *DirectoryMonitor) parseFile(parser telegraf.Parser, reader io.Reader, fileName string) error {
var splitter bufio.SplitFunc
// Decide on how to split the file
switch monitor.ParseMethod {
case "at-once":
return monitor.parseAtOnce(parser, reader, fileName)
case "line-by-line":
splitter = bufio.ScanLines
default:
return fmt.Errorf("unknown parse method %q", monitor.ParseMethod)
}
scanner := bufio.NewScanner(reader)
scanner.Split(splitter)
for scanner.Scan() {
metrics, err := monitor.parseMetrics(parser, scanner.Bytes(), fileName)
if err != nil {
return err
}
if err := monitor.sendMetrics(metrics); err != nil {
return err
}
}
return scanner.Err()
}
func (monitor *DirectoryMonitor) parseAtOnce(parser telegraf.Parser, reader io.Reader, fileName string) error {
bytes, err := io.ReadAll(reader)
if err != nil {
return err
}
metrics, err := monitor.parseMetrics(parser, bytes, fileName)
if err != nil {
return err
}
return monitor.sendMetrics(metrics)
}
func (monitor *DirectoryMonitor) parseMetrics(parser telegraf.Parser, line []byte, fileName string) (metrics []telegraf.Metric, err error) {
metrics, err = parser.Parse(line)
if err != nil {
if errors.Is(err, parsers.ErrEOF) {
return nil, nil
}
return nil, err
}
if len(metrics) == 0 {
once.Do(func() {
monitor.Log.Debug(internal.NoMetricsCreatedMsg)
})
}
if monitor.FileTag != "" {
for _, m := range metrics {
m.AddTag(monitor.FileTag, filepath.Base(fileName))
}
}
return metrics, err
}
func (monitor *DirectoryMonitor) sendMetrics(metrics []telegraf.Metric) error {
// Report the metrics for the file.
for _, m := range metrics {
// Block until metric can be written.
if err := monitor.sem.Acquire(monitor.context, 1); err != nil {
return err
}
monitor.acc.AddTrackingMetricGroup([]telegraf.Metric{m})
}
return nil
}
func (monitor *DirectoryMonitor) moveFile(srcPath, dstBaseDir string) {
// Appends any subdirectories in the srcPath to the dstBaseDir and
// creates those subdirectories.
basePath := strings.Replace(srcPath, monitor.Directory, "", 1)
dstPath := filepath.Join(dstBaseDir, basePath)
err := os.MkdirAll(filepath.Dir(dstPath), 0750)
if err != nil {
monitor.Log.Errorf("Error creating directory hierarchy for %q: %v", srcPath, err)
}
inputFile, err := os.Open(srcPath)
if err != nil {
monitor.Log.Errorf("Could not open input file: %s", err)
}
outputFile, err := os.Create(dstPath)
if err != nil {
monitor.Log.Errorf("Could not open output file: %s", err)
}
defer outputFile.Close()
_, err = io.Copy(outputFile, inputFile)
if err != nil {
monitor.Log.Errorf("Writing to output file failed: %s", err)
}
// We need to close the file for remove on Windows as we otherwise
// will run into a "being used by another process" error
// (see https://github.com/influxdata/telegraf/issues/12287)
if err := inputFile.Close(); err != nil {
monitor.Log.Errorf("Could not close input file: %s", err)
}
if err := os.Remove(srcPath); err != nil {
monitor.Log.Errorf("Failed removing original file: %s", err)
}
}
func (monitor *DirectoryMonitor) isMonitoredFile(fileName string) bool {
if len(monitor.fileRegexesToMatch) == 0 {
return true
}
// Only monitor matching files.
for _, regex := range monitor.fileRegexesToMatch {
if regex.MatchString(fileName) {
return true
}
}
return false
}
func (monitor *DirectoryMonitor) isIgnoredFile(fileName string) bool {
// Skip files that are set to be ignored.
for _, regex := range monitor.fileRegexesToIgnore {
if regex.MatchString(fileName) {
return true
}
}
return false
}
func init() {
inputs.Add("directory_monitor", func() telegraf.Input {
return &DirectoryMonitor{
MaxBufferedMetrics: defaultMaxBufferedMetrics,
DirectoryDurationThreshold: defaultDirectoryDurationThreshold,
FileQueueSize: defaultFileQueueSize,
ParseMethod: defaultParseMethod,
}
})
}

View file

@ -0,0 +1,682 @@
package directory_monitor
import (
"bytes"
"compress/gzip"
"os"
"path/filepath"
"runtime"
"testing"
"github.com/stretchr/testify/require"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/parsers/csv"
"github.com/influxdata/telegraf/plugins/parsers/json"
"github.com/influxdata/telegraf/testutil"
)
func TestCreator(t *testing.T) {
creator, found := inputs.Inputs["directory_monitor"]
require.True(t, found)
expected := &DirectoryMonitor{
MaxBufferedMetrics: defaultMaxBufferedMetrics,
DirectoryDurationThreshold: defaultDirectoryDurationThreshold,
FileQueueSize: defaultFileQueueSize,
ParseMethod: defaultParseMethod,
}
require.Equal(t, expected, creator())
}
func TestCSVGZImport(t *testing.T) {
acc := testutil.Accumulator{}
testCsvFile := "test.csv"
testCsvGzFile := "test.csv.gz"
// Establish process directory and finished directory.
finishedDirectory := t.TempDir()
processDirectory := t.TempDir()
// Init plugin.
r := DirectoryMonitor{
Directory: processDirectory,
FinishedDirectory: finishedDirectory,
MaxBufferedMetrics: defaultMaxBufferedMetrics,
FileQueueSize: defaultFileQueueSize,
ParseMethod: defaultParseMethod,
}
err := r.Init()
require.NoError(t, err)
r.SetParserFunc(func() (telegraf.Parser, error) {
parser := csv.Parser{
HeaderRowCount: 1,
}
err := parser.Init()
return &parser, err
})
r.Log = testutil.Logger{}
// Write csv file to process into the 'process' directory.
f, err := os.Create(filepath.Join(processDirectory, testCsvFile))
require.NoError(t, err)
_, err = f.WriteString("thing,color\nsky,blue\ngrass,green\nclifford,red\n")
require.NoError(t, err)
err = f.Close()
require.NoError(t, err)
// Write csv.gz file to process into the 'process' directory.
var b bytes.Buffer
w := gzip.NewWriter(&b)
_, err = w.Write([]byte("thing,color\nsky,blue\ngrass,green\nclifford,red\n"))
require.NoError(t, err)
err = w.Close()
require.NoError(t, err)
err = os.WriteFile(filepath.Join(processDirectory, testCsvGzFile), b.Bytes(), 0640)
require.NoError(t, err)
// Start plugin before adding file.
err = r.Start(&acc)
require.NoError(t, err)
err = r.Gather(&acc)
require.NoError(t, err)
acc.Wait(6)
r.Stop()
// Verify that we read both files once.
require.Len(t, acc.Metrics, 6)
// File should have gone back to the test directory, as we configured.
_, err = os.Stat(filepath.Join(finishedDirectory, testCsvFile))
require.NoError(t, err)
_, err = os.Stat(filepath.Join(finishedDirectory, testCsvGzFile))
require.NoError(t, err)
}
func TestCSVGZImportWithHeader(t *testing.T) {
acc := testutil.Accumulator{}
testCsvFile := "test.csv"
testCsvGzFile := "test.csv.gz"
// Establish process directory and finished directory.
finishedDirectory := t.TempDir()
processDirectory := t.TempDir()
// Init plugin.
r := DirectoryMonitor{
Directory: processDirectory,
FinishedDirectory: finishedDirectory,
MaxBufferedMetrics: defaultMaxBufferedMetrics,
FileQueueSize: defaultFileQueueSize,
ParseMethod: defaultParseMethod,
}
err := r.Init()
require.NoError(t, err)
r.SetParserFunc(func() (telegraf.Parser, error) {
parser := csv.Parser{
HeaderRowCount: 1,
SkipRows: 1,
}
err := parser.Init()
return &parser, err
})
r.Log = testutil.Logger{}
// Write csv file to process into the 'process' directory.
f, err := os.Create(filepath.Join(processDirectory, testCsvFile))
require.NoError(t, err)
_, err = f.WriteString("This is some garbage to be skipped\n")
require.NoError(t, err)
_, err = f.WriteString("thing,color\nsky,blue\ngrass,green\nclifford,red\n")
require.NoError(t, err)
err = f.Close()
require.NoError(t, err)
// Write csv.gz file to process into the 'process' directory.
var b bytes.Buffer
w := gzip.NewWriter(&b)
_, err = w.Write([]byte("This is some garbage to be skipped\n"))
require.NoError(t, err)
_, err = w.Write([]byte("thing,color\nsky,blue\ngrass,green\nclifford,red\n"))
require.NoError(t, err)
err = w.Close()
require.NoError(t, err)
err = os.WriteFile(filepath.Join(processDirectory, testCsvGzFile), b.Bytes(), 0640)
require.NoError(t, err)
// Start plugin before adding file.
err = r.Start(&acc)
require.NoError(t, err)
err = r.Gather(&acc)
require.NoError(t, err)
acc.Wait(6)
r.Stop()
// Verify that we read both files once.
require.Len(t, acc.Metrics, 6)
// File should have gone back to the test directory, as we configured.
_, err = os.Stat(filepath.Join(finishedDirectory, testCsvFile))
require.NoError(t, err)
_, err = os.Stat(filepath.Join(finishedDirectory, testCsvGzFile))
require.NoError(t, err)
}
func TestMultipleJSONFileImports(t *testing.T) {
acc := testutil.Accumulator{}
testJSONFile := "test.json"
// Establish process directory and finished directory.
finishedDirectory := t.TempDir()
processDirectory := t.TempDir()
// Init plugin.
r := DirectoryMonitor{
Directory: processDirectory,
FinishedDirectory: finishedDirectory,
MaxBufferedMetrics: defaultMaxBufferedMetrics,
FileQueueSize: defaultFileQueueSize,
ParseMethod: defaultParseMethod,
}
err := r.Init()
require.NoError(t, err)
r.SetParserFunc(func() (telegraf.Parser, error) {
p := &json.Parser{NameKey: "Name"}
err := p.Init()
return p, err
})
// Let's drop a 5-line LINE-DELIMITED json.
// Write csv file to process into the 'process' directory.
f, err := os.Create(filepath.Join(processDirectory, testJSONFile))
require.NoError(t, err)
_, err = f.WriteString(
"{\"Name\": \"event1\",\"Speed\": 100.1,\"Length\": 20.1}\n{\"Name\": \"event2\",\"Speed\": 500,\"Length\": 1.4}\n" +
"{\"Name\": " + "\"event3\",\"Speed\": 200,\"Length\": 10.23}\n{\"Name\": \"event4\",\"Speed\": 80,\"Length\": 250}\n" +
"{\"Name\": \"event5\",\"Speed\": 120.77,\"Length\": 25.97}",
)
require.NoError(t, err)
err = f.Close()
require.NoError(t, err)
err = r.Start(&acc)
r.Log = testutil.Logger{}
require.NoError(t, err)
err = r.Gather(&acc)
require.NoError(t, err)
acc.Wait(5)
r.Stop()
// Verify that we read each JSON line once to a single metric.
require.Len(t, acc.Metrics, 5)
}
func TestFileTag(t *testing.T) {
acc := testutil.Accumulator{}
testJSONFile := "test.json"
// Establish process directory and finished directory.
finishedDirectory := t.TempDir()
processDirectory := t.TempDir()
// Init plugin.
r := DirectoryMonitor{
Directory: processDirectory,
FinishedDirectory: finishedDirectory,
FileTag: "filename",
MaxBufferedMetrics: defaultMaxBufferedMetrics,
FileQueueSize: defaultFileQueueSize,
ParseMethod: defaultParseMethod,
}
err := r.Init()
require.NoError(t, err)
r.SetParserFunc(func() (telegraf.Parser, error) {
p := &json.Parser{NameKey: "Name"}
err := p.Init()
return p, err
})
// Let's drop a 1-line LINE-DELIMITED json.
// Write csv file to process into the 'process' directory.
f, err := os.Create(filepath.Join(processDirectory, testJSONFile))
require.NoError(t, err)
_, err = f.WriteString("{\"Name\": \"event1\",\"Speed\": 100.1,\"Length\": 20.1}")
require.NoError(t, err)
err = f.Close()
require.NoError(t, err)
err = r.Start(&acc)
r.Log = testutil.Logger{}
require.NoError(t, err)
err = r.Gather(&acc)
require.NoError(t, err)
acc.Wait(1)
r.Stop()
// Verify that we read each JSON line once to a single metric.
require.Len(t, acc.Metrics, 1)
for _, m := range acc.Metrics {
for key, value := range m.Tags {
require.Equal(t, r.FileTag, key)
require.Equal(t, filepath.Base(testJSONFile), value)
}
}
}
func TestCSVNoSkipRows(t *testing.T) {
acc := testutil.Accumulator{}
testCsvFile := "test.csv"
// Establish process directory and finished directory.
finishedDirectory := t.TempDir()
processDirectory := t.TempDir()
// Init plugin.
r := DirectoryMonitor{
Directory: processDirectory,
FinishedDirectory: finishedDirectory,
MaxBufferedMetrics: defaultMaxBufferedMetrics,
FileQueueSize: defaultFileQueueSize,
ParseMethod: defaultParseMethod,
}
err := r.Init()
require.NoError(t, err)
r.SetParserFunc(func() (telegraf.Parser, error) {
parser := csv.Parser{
HeaderRowCount: 1,
SkipRows: 0,
TagColumns: []string{"line1"},
}
err := parser.Init()
return &parser, err
})
r.Log = testutil.Logger{}
testCSV := `line1,line2,line3
hello,80,test_name2`
expectedFields := map[string]interface{}{
"line2": int64(80),
"line3": "test_name2",
}
// Write csv file to process into the 'process' directory.
f, err := os.Create(filepath.Join(processDirectory, testCsvFile))
require.NoError(t, err)
_, err = f.WriteString(testCSV)
require.NoError(t, err)
err = f.Close()
require.NoError(t, err)
// Start plugin before adding file.
err = r.Start(&acc)
require.NoError(t, err)
err = r.Gather(&acc)
require.NoError(t, err)
acc.Wait(1)
r.Stop()
// Verify that we read both files once.
require.Len(t, acc.Metrics, 1)
// File should have gone back to the test directory, as we configured.
_, err = os.Stat(filepath.Join(finishedDirectory, testCsvFile))
require.NoError(t, err)
for _, m := range acc.Metrics {
for key, value := range m.Tags {
require.Equal(t, "line1", key)
require.Equal(t, "hello", value)
}
require.Equal(t, expectedFields, m.Fields)
}
}
func TestCSVSkipRows(t *testing.T) {
acc := testutil.Accumulator{}
testCsvFile := "test.csv"
// Establish process directory and finished directory.
finishedDirectory := t.TempDir()
processDirectory := t.TempDir()
// Init plugin.
r := DirectoryMonitor{
Directory: processDirectory,
FinishedDirectory: finishedDirectory,
MaxBufferedMetrics: defaultMaxBufferedMetrics,
FileQueueSize: defaultFileQueueSize,
ParseMethod: defaultParseMethod,
}
err := r.Init()
require.NoError(t, err)
r.SetParserFunc(func() (telegraf.Parser, error) {
parser := csv.Parser{
HeaderRowCount: 1,
SkipRows: 2,
TagColumns: []string{"line1"},
}
err := parser.Init()
return &parser, err
})
r.Log = testutil.Logger{}
testCSV := `garbage nonsense 1
garbage,nonsense,2
line1,line2,line3
hello,80,test_name2`
expectedFields := map[string]interface{}{
"line2": int64(80),
"line3": "test_name2",
}
// Write csv file to process into the 'process' directory.
f, err := os.Create(filepath.Join(processDirectory, testCsvFile))
require.NoError(t, err)
_, err = f.WriteString(testCSV)
require.NoError(t, err)
err = f.Close()
require.NoError(t, err)
// Start plugin before adding file.
err = r.Start(&acc)
require.NoError(t, err)
err = r.Gather(&acc)
require.NoError(t, err)
acc.Wait(1)
r.Stop()
// Verify that we read both files once.
require.Len(t, acc.Metrics, 1)
// File should have gone back to the test directory, as we configured.
_, err = os.Stat(filepath.Join(finishedDirectory, testCsvFile))
require.NoError(t, err)
for _, m := range acc.Metrics {
for key, value := range m.Tags {
require.Equal(t, "line1", key)
require.Equal(t, "hello", value)
}
require.Equal(t, expectedFields, m.Fields)
}
}
func TestCSVMultiHeader(t *testing.T) {
acc := testutil.Accumulator{}
testCsvFile := "test.csv"
// Establish process directory and finished directory.
finishedDirectory := t.TempDir()
processDirectory := t.TempDir()
// Init plugin.
r := DirectoryMonitor{
Directory: processDirectory,
FinishedDirectory: finishedDirectory,
MaxBufferedMetrics: defaultMaxBufferedMetrics,
FileQueueSize: defaultFileQueueSize,
ParseMethod: defaultParseMethod,
}
err := r.Init()
require.NoError(t, err)
r.SetParserFunc(func() (telegraf.Parser, error) {
parser := csv.Parser{
HeaderRowCount: 2,
TagColumns: []string{"line1"},
}
err := parser.Init()
return &parser, err
})
r.Log = testutil.Logger{}
testCSV := `line,line,line
1,2,3
hello,80,test_name2`
expectedFields := map[string]interface{}{
"line2": int64(80),
"line3": "test_name2",
}
// Write csv file to process into the 'process' directory.
f, err := os.Create(filepath.Join(processDirectory, testCsvFile))
require.NoError(t, err)
_, err = f.WriteString(testCSV)
require.NoError(t, err)
err = f.Close()
require.NoError(t, err)
// Start plugin before adding file.
err = r.Start(&acc)
require.NoError(t, err)
err = r.Gather(&acc)
require.NoError(t, err)
acc.Wait(1)
r.Stop()
// Verify that we read both files once.
require.Len(t, acc.Metrics, 1)
// File should have gone back to the test directory, as we configured.
_, err = os.Stat(filepath.Join(finishedDirectory, testCsvFile))
require.NoError(t, err)
for _, m := range acc.Metrics {
for key, value := range m.Tags {
require.Equal(t, "line1", key)
require.Equal(t, "hello", value)
}
require.Equal(t, expectedFields, m.Fields)
}
}
func TestParseCompleteFile(t *testing.T) {
acc := testutil.Accumulator{}
// Establish process directory and finished directory.
finishedDirectory := t.TempDir()
processDirectory := t.TempDir()
// Init plugin.
r := DirectoryMonitor{
Directory: processDirectory,
FinishedDirectory: finishedDirectory,
MaxBufferedMetrics: defaultMaxBufferedMetrics,
FileQueueSize: defaultFileQueueSize,
ParseMethod: "at-once",
}
err := r.Init()
require.NoError(t, err)
r.Log = testutil.Logger{}
r.SetParserFunc(func() (telegraf.Parser, error) {
parser := &json.Parser{
NameKey: "name",
TagKeys: []string{"tag1"},
}
err := parser.Init()
return parser, err
})
testJSON := `{
"name": "test1",
"value": 100.1,
"tag1": "value1"
}`
// Write json file to process into the 'process' directory.
f, err := os.CreateTemp(processDirectory, "test.json")
require.NoError(t, err)
_, err = f.WriteString(testJSON)
require.NoError(t, err)
f.Close()
err = r.Start(&acc)
require.NoError(t, err)
err = r.Gather(&acc)
require.NoError(t, err)
acc.Wait(1)
r.Stop()
require.NoError(t, acc.FirstError())
require.Len(t, acc.Metrics, 1)
testutil.RequireMetricEqual(t, testutil.TestMetric(100.1), acc.GetTelegrafMetrics()[0], testutil.IgnoreTime())
}
func TestParseSubdirectories(t *testing.T) {
acc := testutil.Accumulator{}
// Establish process directory and finished directory.
finishedDirectory := t.TempDir()
processDirectory := t.TempDir()
// Init plugin.
r := DirectoryMonitor{
Directory: processDirectory,
FinishedDirectory: finishedDirectory,
Recursive: true,
MaxBufferedMetrics: defaultMaxBufferedMetrics,
FileQueueSize: defaultFileQueueSize,
ParseMethod: "at-once",
}
err := r.Init()
require.NoError(t, err)
r.Log = testutil.Logger{}
r.SetParserFunc(func() (telegraf.Parser, error) {
parser := &json.Parser{
NameKey: "name",
TagKeys: []string{"tag1"},
}
err := parser.Init()
return parser, err
})
testJSON := `{
"name": "test1",
"value": 100.1,
"tag1": "value1"
}`
// Write json file to process into the 'process' directory.
testJSONFile := "test.json"
f, err := os.Create(filepath.Join(processDirectory, testJSONFile))
require.NoError(t, err)
_, err = f.WriteString(testJSON)
require.NoError(t, err)
err = f.Close()
require.NoError(t, err)
// Write json file to process into a subdirectory in the 'process' directory.
err = os.Mkdir(filepath.Join(processDirectory, "sub"), 0750)
require.NoError(t, err)
f, err = os.Create(filepath.Join(processDirectory, "sub", testJSONFile))
require.NoError(t, err)
_, err = f.WriteString(testJSON)
require.NoError(t, err)
err = f.Close()
require.NoError(t, err)
err = r.Start(&acc)
require.NoError(t, err)
err = r.Gather(&acc)
require.NoError(t, err)
acc.Wait(2)
r.Stop()
require.NoError(t, acc.FirstError())
require.Len(t, acc.Metrics, 2)
testutil.RequireMetricEqual(t, testutil.TestMetric(100.1), acc.GetTelegrafMetrics()[0], testutil.IgnoreTime())
// File should have gone back to the test directory, as we configured.
_, err = os.Stat(filepath.Join(finishedDirectory, testJSONFile))
require.NoError(t, err)
_, err = os.Stat(filepath.Join(finishedDirectory, "sub", testJSONFile))
require.NoError(t, err)
}
func TestParseSubdirectoriesFilesIgnore(t *testing.T) {
acc := testutil.Accumulator{}
// Establish process directory and finished directory.
finishedDirectory := t.TempDir()
processDirectory := t.TempDir()
filesToIgnore := `sub/test.json`
if runtime.GOOS == "windows" {
filesToIgnore = `\\sub\\test.json`
}
// Init plugin.
r := DirectoryMonitor{
Directory: processDirectory,
FinishedDirectory: finishedDirectory,
Recursive: true,
MaxBufferedMetrics: defaultMaxBufferedMetrics,
FileQueueSize: defaultFileQueueSize,
ParseMethod: "at-once",
FilesToIgnore: []string{filesToIgnore},
}
err := r.Init()
require.NoError(t, err)
r.Log = testutil.Logger{}
r.SetParserFunc(func() (telegraf.Parser, error) {
parser := &json.Parser{
NameKey: "name",
TagKeys: []string{"tag1"},
}
err := parser.Init()
return parser, err
})
testJSON := `{
"name": "test1",
"value": 100.1,
"tag1": "value1"
}`
// Write json file to process into the 'process' directory.
testJSONFile := "test.json"
f, err := os.Create(filepath.Join(processDirectory, testJSONFile))
require.NoError(t, err)
_, err = f.WriteString(testJSON)
require.NoError(t, err)
err = f.Close()
require.NoError(t, err)
// Write json file to process into a subdirectory in the 'process' directory.
err = os.Mkdir(filepath.Join(processDirectory, "sub"), 0750)
require.NoError(t, err)
f, err = os.Create(filepath.Join(processDirectory, "sub", testJSONFile))
require.NoError(t, err)
_, err = f.WriteString(testJSON)
require.NoError(t, err)
err = f.Close()
require.NoError(t, err)
err = r.Start(&acc)
require.NoError(t, err)
err = r.Gather(&acc)
require.NoError(t, err)
acc.Wait(1)
r.Stop()
require.NoError(t, acc.FirstError())
require.Len(t, acc.Metrics, 1)
testutil.RequireMetricEqual(t, testutil.TestMetric(100.1), acc.GetTelegrafMetrics()[0], testutil.IgnoreTime())
// File should have gone back to the test directory, as we configured.
_, err = os.Stat(filepath.Join(finishedDirectory, testJSONFile))
require.NoError(t, err)
}

View file

@ -0,0 +1,50 @@
# Ingests files in a directory and then moves them to a target directory.
[[inputs.directory_monitor]]
## The directory to monitor and read files from (including sub-directories if "recursive" is true).
directory = ""
#
## The directory to move finished files to (maintaining directory hierarchy from source).
finished_directory = ""
#
## Setting recursive to true will make the plugin recursively walk the directory and process all sub-directories.
# recursive = false
#
## The directory to move files to upon file error.
## If not provided, erroring files will stay in the monitored directory.
# error_directory = ""
#
## The amount of time a file is allowed to sit in the directory before it is picked up.
## This time can generally be low but if you choose to have a very large file written to the directory and it's potentially slow,
## set this higher so that the plugin will wait until the file is fully copied to the directory.
# directory_duration_threshold = "50ms"
#
## A list of the only file names to monitor, if necessary. Supports regex. If left blank, all files are ingested.
# files_to_monitor = ["^.*\\.csv"]
#
## A list of files to ignore, if necessary. Supports regex.
# files_to_ignore = [".DS_Store"]
#
## Maximum lines of the file to process that have not yet be written by the
## output. For best throughput set to the size of the output's metric_buffer_limit.
## Warning: setting this number higher than the output's metric_buffer_limit can cause dropped metrics.
# max_buffered_metrics = 10000
#
## The maximum amount of file paths to queue up for processing at once, before waiting until files are processed to find more files.
## Lowering this value will result in *slightly* less memory use, with a potential sacrifice in speed efficiency, if absolutely necessary.
# file_queue_size = 100000
#
## Name a tag containing the name of the file the data was parsed from. Leave empty
## to disable. Cautious when file name variation is high, this can increase the cardinality
## significantly. Read more about cardinality here:
## https://docs.influxdata.com/influxdb/cloud/reference/glossary/#series-cardinality
# file_tag = ""
#
## Specify if the file can be read completely at once or if it needs to be read line by line (default).
## Possible values: "line-by-line", "at-once"
# parse_method = "line-by-line"
#
## The dataformat to be read from the files.
## Each data format has its own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "influx"