1
0
Fork 0

Adding upstream version 1.34.4.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-05-24 07:26:29 +02:00
parent e393c3af3f
commit 4978089aab
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
4963 changed files with 677545 additions and 0 deletions

View file

@ -0,0 +1,126 @@
# Google BigQuery Output Plugin
This plugin writes metrics to the [Google Cloud BigQuery][big_query] service
and requires [authentication][authentication] with Google Cloud using either a
service account or user credentials.
> [!IMPORTANT]
> Be aware that this plugin accesses APIs that are [chargeable][pricing] and
> might incur costs.
[authentication]: https://cloud.google.com/bigquery/docs/authentication
[big_query]: https://cloud.google.com/bigquery
[pricing]: https://cloud.google.com/bigquery/pricing
⭐ Telegraf v1.18.0
🏷️ cloud, datastore
💻 all
## Global configuration options <!-- @/docs/includes/plugin_config.md -->
In addition to the plugin-specific configuration settings, plugins support
additional global and plugin configuration settings. These settings are used to
modify metrics, tags, and field or create aliases and configure ordering, etc.
See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
[CONFIGURATION.md]: ../../../docs/CONFIGURATION.md#plugins
## Configuration
```toml @sample.conf
# Configuration for Google Cloud BigQuery to send entries
[[outputs.bigquery]]
## Credentials File
credentials_file = "/path/to/service/account/key.json"
## Google Cloud Platform Project
# project = ""
## The namespace for the metric descriptor
dataset = "telegraf"
## Timeout for BigQuery operations.
# timeout = "5s"
## Character to replace hyphens on Metric name
# replace_hyphen_to = "_"
## Write all metrics in a single compact table
# compact_table = ""
```
Leaving `project` empty indicates the plugin will try to retrieve the project
from the credentials file.
Requires `dataset` to specify under which BigQuery dataset the corresponding
metrics tables reside.
Each metric should have a corresponding table to BigQuery. The schema of the
table on BigQuery:
* Should contain the field `timestamp` which is the timestamp of a telegraph
metrics
* Should contain the metric's tags with the same name and the column type should
be set to string.
* Should contain the metric's fields with the same name and the column type
should match the field type.
## Compact table
When enabling the compact table, all metrics are inserted to the given table
with the following schema:
```json
[
{
"mode": "REQUIRED",
"name": "timestamp",
"type": "TIMESTAMP"
},
{
"mode": "REQUIRED",
"name": "name",
"type": "STRING"
},
{
"mode": "REQUIRED",
"name": "tags",
"type": "JSON"
},
{
"mode": "REQUIRED",
"name": "fields",
"type": "JSON"
}
]
```
## Restrictions
Avoid hyphens on BigQuery tables, underlying SDK cannot handle streaming inserts
to Table with hyphens.
In cases of metrics with hyphens please use the [Rename Processor
Plugin][rename].
In case of a metric with hyphen by default hyphens shall be replaced with
underscores (_). This can be altered using the `replace_hyphen_to`
configuration property.
Available data type options are:
* integer
* float or long
* string
* boolean
All field naming restrictions that apply to BigQuery should apply to the
measurements to be imported.
Tables on BigQuery should be created beforehand and they are not created during
persistence
Pay attention to the column `timestamp` since it is reserved upfront and cannot
change. If partitioning is required make sure it is applied beforehand.
[rename]: ../../processors/rename/README.md

View file

@ -0,0 +1,323 @@
//go:generate ../../../tools/readme_config_includer/generator
package bigquery
import (
"context"
_ "embed"
"encoding/json"
"errors"
"fmt"
"math"
"reflect"
"strings"
"sync"
"time"
"cloud.google.com/go/bigquery"
"golang.org/x/oauth2/google"
"google.golang.org/api/option"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/config"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/plugins/outputs"
)
//go:embed sample.conf
var sampleConfig string
const timeStampFieldName = "timestamp"
var defaultTimeout = config.Duration(5 * time.Second)
type BigQuery struct {
CredentialsFile string `toml:"credentials_file"`
Project string `toml:"project"`
Dataset string `toml:"dataset"`
Timeout config.Duration `toml:"timeout"`
ReplaceHyphenTo string `toml:"replace_hyphen_to"`
CompactTable string `toml:"compact_table"`
Log telegraf.Logger `toml:"-"`
client *bigquery.Client
warnedOnHyphens map[string]bool
}
func (*BigQuery) SampleConfig() string {
return sampleConfig
}
func (b *BigQuery) Init() error {
if b.Project == "" {
b.Project = bigquery.DetectProjectID
}
if b.Dataset == "" {
return errors.New(`"dataset" is required`)
}
b.warnedOnHyphens = make(map[string]bool)
return nil
}
func (b *BigQuery) Connect() error {
if b.client == nil {
if err := b.setUpDefaultClient(); err != nil {
return err
}
}
if b.CompactTable != "" {
ctx := context.Background()
ctx, cancel := context.WithTimeout(ctx, time.Duration(b.Timeout))
defer cancel()
// Check if the compact table exists
_, err := b.client.Dataset(b.Dataset).Table(b.CompactTable).Metadata(ctx)
if err != nil {
return fmt.Errorf("compact table: %w", err)
}
}
return nil
}
func (b *BigQuery) setUpDefaultClient() error {
var credentialsOption option.ClientOption
// https://cloud.google.com/go/docs/reference/cloud.google.com/go/0.94.1#hdr-Timeouts_and_Cancellation
// Do not attempt to add timeout to this context for the bigquery client.
ctx := context.Background()
if b.CredentialsFile != "" {
credentialsOption = option.WithCredentialsFile(b.CredentialsFile)
} else {
creds, err := google.FindDefaultCredentials(ctx, bigquery.Scope)
if err != nil {
return fmt.Errorf(
"unable to find Google Cloud Platform Application Default Credentials: %w. "+
"Either set ADC or provide CredentialsFile config", err)
}
credentialsOption = option.WithCredentials(creds)
}
client, err := bigquery.NewClient(ctx, b.Project,
credentialsOption,
option.WithUserAgent(internal.ProductToken()),
)
b.client = client
return err
}
// Write the metrics to Google Cloud BigQuery.
func (b *BigQuery) Write(metrics []telegraf.Metric) error {
if b.CompactTable != "" {
return b.writeCompact(metrics)
}
groupedMetrics := groupByMetricName(metrics)
var wg sync.WaitGroup
for k, v := range groupedMetrics {
wg.Add(1)
go func(k string, v []bigquery.ValueSaver) {
defer wg.Done()
b.insertToTable(k, v)
}(k, v)
}
wg.Wait()
return nil
}
func (b *BigQuery) writeCompact(metrics []telegraf.Metric) error {
ctx := context.Background()
ctx, cancel := context.WithTimeout(ctx, time.Duration(b.Timeout))
defer cancel()
// Always returns an instance, even if table doesn't exist (anymore).
inserter := b.client.Dataset(b.Dataset).Table(b.CompactTable).Inserter()
var compactValues []*bigquery.ValuesSaver
for _, m := range metrics {
valueSaver, err := b.newCompactValuesSaver(m)
if err != nil {
b.Log.Warnf("could not prepare metric as compact value: %v", err)
} else {
compactValues = append(compactValues, valueSaver)
}
}
return inserter.Put(ctx, compactValues)
}
func groupByMetricName(metrics []telegraf.Metric) map[string][]bigquery.ValueSaver {
groupedMetrics := make(map[string][]bigquery.ValueSaver)
for _, m := range metrics {
bqm := newValuesSaver(m)
groupedMetrics[m.Name()] = append(groupedMetrics[m.Name()], bqm)
}
return groupedMetrics
}
func newValuesSaver(m telegraf.Metric) *bigquery.ValuesSaver {
s := make(bigquery.Schema, 0)
r := make([]bigquery.Value, 0)
timeSchema := timeStampFieldSchema()
s = append(s, timeSchema)
r = append(r, m.Time())
s, r = tagsSchemaAndValues(m, s, r)
s, r = valuesSchemaAndValues(m, s, r)
return &bigquery.ValuesSaver{
Schema: s.Relax(),
Row: r,
}
}
func (b *BigQuery) newCompactValuesSaver(m telegraf.Metric) (*bigquery.ValuesSaver, error) {
tags, err := json.Marshal(m.Tags())
if err != nil {
return nil, fmt.Errorf("serializing tags: %w", err)
}
rawFields := make(map[string]interface{}, len(m.FieldList()))
for _, field := range m.FieldList() {
if fv, ok := field.Value.(float64); ok {
// JSON does not support these special values
if math.IsNaN(fv) || math.IsInf(fv, 0) {
b.Log.Debugf("Ignoring unsupported field %s with value %q for metric %s", field.Key, field.Value, m.Name())
continue
}
}
rawFields[field.Key] = field.Value
}
fields, err := json.Marshal(rawFields)
if err != nil {
return nil, fmt.Errorf("serializing fields: %w", err)
}
return &bigquery.ValuesSaver{
Schema: bigquery.Schema{
timeStampFieldSchema(),
newStringFieldSchema("name"),
newJSONFieldSchema("tags"),
newJSONFieldSchema("fields"),
},
Row: []bigquery.Value{
m.Time(),
m.Name(),
string(tags),
string(fields),
},
}, nil
}
func timeStampFieldSchema() *bigquery.FieldSchema {
return &bigquery.FieldSchema{
Name: timeStampFieldName,
Type: bigquery.TimestampFieldType,
}
}
func newStringFieldSchema(name string) *bigquery.FieldSchema {
return &bigquery.FieldSchema{
Name: name,
Type: bigquery.StringFieldType,
}
}
func newJSONFieldSchema(name string) *bigquery.FieldSchema {
return &bigquery.FieldSchema{
Name: name,
Type: bigquery.JSONFieldType,
}
}
func tagsSchemaAndValues(m telegraf.Metric, s bigquery.Schema, r []bigquery.Value) ([]*bigquery.FieldSchema, []bigquery.Value) {
for _, t := range m.TagList() {
s = append(s, newStringFieldSchema(t.Key))
r = append(r, t.Value)
}
return s, r
}
func valuesSchemaAndValues(m telegraf.Metric, s bigquery.Schema, r []bigquery.Value) ([]*bigquery.FieldSchema, []bigquery.Value) {
for _, f := range m.FieldList() {
s = append(s, valuesSchema(f))
r = append(r, f.Value)
}
return s, r
}
func valuesSchema(f *telegraf.Field) *bigquery.FieldSchema {
return &bigquery.FieldSchema{
Name: f.Key,
Type: valueToBqType(f.Value),
}
}
func valueToBqType(v interface{}) bigquery.FieldType {
switch reflect.ValueOf(v).Kind() {
case reflect.Int, reflect.Int16, reflect.Int32, reflect.Int64:
return bigquery.IntegerFieldType
case reflect.Float32, reflect.Float64:
return bigquery.FloatFieldType
case reflect.Bool:
return bigquery.BooleanFieldType
default:
return bigquery.StringFieldType
}
}
func (b *BigQuery) insertToTable(metricName string, metrics []bigquery.ValueSaver) {
ctx := context.Background()
ctx, cancel := context.WithTimeout(ctx, time.Duration(b.Timeout))
defer cancel()
tableName := b.metricToTable(metricName)
table := b.client.Dataset(b.Dataset).Table(tableName)
inserter := table.Inserter()
if err := inserter.Put(ctx, metrics); err != nil {
b.Log.Errorf("inserting metric %q failed: %v", metricName, err)
}
}
func (b *BigQuery) metricToTable(metricName string) string {
if !strings.Contains(metricName, "-") {
return metricName
}
dhm := strings.ReplaceAll(metricName, "-", b.ReplaceHyphenTo)
if warned := b.warnedOnHyphens[metricName]; !warned {
b.Log.Warnf("Metric %q contains hyphens please consider using the rename processor plugin, falling back to %q", metricName, dhm)
b.warnedOnHyphens[metricName] = true
}
return dhm
}
// Close will terminate the session to the backend, returning error if an issue arises.
func (b *BigQuery) Close() error {
return b.client.Close()
}
func init() {
outputs.Add("bigquery", func() telegraf.Output {
return &BigQuery{
Timeout: defaultTimeout,
ReplaceHyphenTo: "_",
}
})
}

View file

@ -0,0 +1,304 @@
package bigquery
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"time"
"cloud.google.com/go/bigquery"
"github.com/stretchr/testify/require"
"google.golang.org/api/option"
"google.golang.org/api/option/internaloption"
"github.com/influxdata/telegraf/testutil"
)
const (
successfulResponse = `{"kind": "bigquery#tableDataInsertAllResponse"}`
)
var receivedBody map[string]json.RawMessage
type Row struct {
Tag1 string `json:"tag1"`
Timestamp string `json:"timestamp"`
Value float64 `json:"value"`
}
func TestInit(t *testing.T) {
tests := []struct {
name string
errorString string
plugin *BigQuery
}{
{
name: "dataset is not set",
errorString: `"dataset" is required`,
plugin: &BigQuery{},
},
{
name: "valid config",
plugin: &BigQuery{
Dataset: "test-dataset",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if tt.errorString != "" {
require.EqualError(t, tt.plugin.Init(), tt.errorString)
} else {
require.NoError(t, tt.plugin.Init())
}
})
}
}
func TestMetricToTable(t *testing.T) {
tests := []struct {
name string
replaceHyphenTo string
metricName string
expectedTable string
}{
{
name: "no rename",
replaceHyphenTo: "_",
metricName: "test",
expectedTable: "test",
},
{
name: "default config",
replaceHyphenTo: "_",
metricName: "table-with-hyphens",
expectedTable: "table_with_hyphens",
},
{
name: "custom hyphens",
replaceHyphenTo: "*",
metricName: "table-with-hyphens",
expectedTable: "table*with*hyphens",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
b := &BigQuery{
Dataset: "test-dataset",
ReplaceHyphenTo: tt.replaceHyphenTo,
Log: testutil.Logger{},
}
require.NoError(t, b.Init())
require.Equal(t, tt.expectedTable, b.metricToTable(tt.metricName))
if tt.metricName != tt.expectedTable {
require.Contains(t, b.warnedOnHyphens, tt.metricName)
require.True(t, b.warnedOnHyphens[tt.metricName])
} else {
require.NotContains(t, b.warnedOnHyphens, tt.metricName)
}
})
}
}
func TestConnect(t *testing.T) {
srv := localBigQueryServer(t)
defer srv.Close()
tests := []struct {
name string
compactTable string
errorString string
}{
{name: "normal"},
{
name: "compact table existing",
compactTable: "test-metrics",
},
{
name: "compact table not existing",
compactTable: "foobar",
errorString: "compact table: googleapi: got HTTP response code 404",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
b := &BigQuery{
Project: "test-project",
Dataset: "test-dataset",
Timeout: defaultTimeout,
CompactTable: tt.compactTable,
}
require.NoError(t, b.Init())
require.NoError(t, b.setUpTestClient(srv.URL))
if tt.errorString != "" {
require.ErrorContains(t, b.Connect(), tt.errorString)
} else {
require.NoError(t, b.Connect())
}
})
}
}
func TestWrite(t *testing.T) {
srv := localBigQueryServer(t)
defer srv.Close()
b := &BigQuery{
Project: "test-project",
Dataset: "test-dataset",
Timeout: defaultTimeout,
}
mockMetrics := testutil.MockMetrics()
require.NoError(t, b.Init())
require.NoError(t, b.setUpTestClient(srv.URL))
require.NoError(t, b.Connect())
require.NoError(t, b.Write(mockMetrics))
var rows []map[string]json.RawMessage
require.NoError(t, json.Unmarshal(receivedBody["rows"], &rows))
var row Row
require.NoError(t, json.Unmarshal(rows[0]["json"], &row))
pt, err := time.Parse(time.RFC3339, row.Timestamp)
require.NoError(t, err)
require.Equal(t, mockMetrics[0].Tags()["tag1"], row.Tag1)
require.Equal(t, mockMetrics[0].Time(), pt)
require.InDelta(t, mockMetrics[0].Fields()["value"], row.Value, testutil.DefaultDelta)
}
func TestWriteCompact(t *testing.T) {
srv := localBigQueryServer(t)
defer srv.Close()
b := &BigQuery{
Project: "test-project",
Dataset: "test-dataset",
Timeout: defaultTimeout,
CompactTable: "test-metrics",
}
mockMetrics := testutil.MockMetrics()
require.NoError(t, b.Init())
require.NoError(t, b.setUpTestClient(srv.URL))
require.NoError(t, b.Connect())
require.NoError(t, b.Write(mockMetrics))
var rows []map[string]json.RawMessage
require.NoError(t, json.Unmarshal(receivedBody["rows"], &rows))
require.Len(t, rows, 1)
require.Contains(t, rows[0], "json")
var row interface{}
require.NoError(t, json.Unmarshal(rows[0]["json"], &row))
require.Equal(t, map[string]interface{}{
"timestamp": "2009-11-10T23:00:00Z",
"name": "test1",
"tags": `{"tag1":"value1"}`,
"fields": `{"value":1}`,
}, row)
require.NoError(t, b.Close())
}
func TestAutoDetect(t *testing.T) {
srv := localBigQueryServer(t)
defer srv.Close()
b := &BigQuery{
Dataset: "test-dataset",
Timeout: defaultTimeout,
CompactTable: "test-metrics",
}
credentialsJSON := []byte(`{"type": "service_account", "project_id": "test-project"}`)
require.NoError(t, b.Init())
require.NoError(t, b.setUpTestClientWithJSON(srv.URL, credentialsJSON))
require.NoError(t, b.Connect())
require.NoError(t, b.Close())
}
func (b *BigQuery) setUpTestClient(endpointURL string) error {
noAuth := option.WithoutAuthentication()
endpoint := option.WithEndpoint(endpointURL)
ctx := context.Background()
c, err := bigquery.NewClient(ctx, b.Project, noAuth, endpoint)
if err != nil {
return err
}
b.client = c
return nil
}
func (b *BigQuery) setUpTestClientWithJSON(endpointURL string, credentialsJSON []byte) error {
noAuth := option.WithoutAuthentication()
endpoint := option.WithEndpoint(endpointURL)
credentials := option.WithCredentialsJSON(credentialsJSON)
skipValidate := internaloption.SkipDialSettingsValidation()
ctx := context.Background()
c, err := bigquery.NewClient(ctx, b.Project, credentials, noAuth, endpoint, skipValidate)
b.client = c
return err
}
func localBigQueryServer(t *testing.T) *httptest.Server {
srv := httptest.NewServer(http.NotFoundHandler())
srv.Config.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/projects/test-project/datasets/test-dataset/tables/test1/insertAll",
"/projects/test-project/datasets/test-dataset/tables/test-metrics/insertAll":
decoder := json.NewDecoder(r.Body)
if err := decoder.Decode(&receivedBody); err != nil {
w.WriteHeader(http.StatusInternalServerError)
t.Error(err)
return
}
w.WriteHeader(http.StatusOK)
if _, err := w.Write([]byte(successfulResponse)); err != nil {
w.WriteHeader(http.StatusInternalServerError)
t.Error(err)
return
}
case "/projects/test-project/datasets/test-dataset/tables/test-metrics":
w.WriteHeader(http.StatusOK)
if _, err := w.Write([]byte("{}")); err != nil {
w.WriteHeader(http.StatusInternalServerError)
t.Error(err)
return
}
default:
w.WriteHeader(http.StatusNotFound)
if _, err := w.Write([]byte(r.URL.String())); err != nil {
w.WriteHeader(http.StatusInternalServerError)
t.Error(err)
return
}
}
})
return srv
}

View file

@ -0,0 +1,19 @@
# Configuration for Google Cloud BigQuery to send entries
[[outputs.bigquery]]
## Credentials File
credentials_file = "/path/to/service/account/key.json"
## Google Cloud Platform Project
# project = ""
## The namespace for the metric descriptor
dataset = "telegraf"
## Timeout for BigQuery operations.
# timeout = "5s"
## Character to replace hyphens on Metric name
# replace_hyphen_to = "_"
## Write all metrics in a single compact table
# compact_table = ""