1
0
Fork 0
telegraf/plugins/inputs/jti_openconfig_telemetry/jti_openconfig_telemetry.go
Daniel Baumann 4978089aab
Adding upstream version 1.34.4.
Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-05-24 07:26:29 +02:00

461 lines
14 KiB
Go

//go:generate ../../../tools/readme_config_includer/generator
package jti_openconfig_telemetry
import (
"context"
_ "embed"
"errors"
"fmt"
"net"
"regexp"
"strings"
"sync"
"time"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/credentials/insecure"
"google.golang.org/grpc/keepalive"
"google.golang.org/grpc/metadata"
"google.golang.org/grpc/status"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/config"
common_tls "github.com/influxdata/telegraf/plugins/common/tls"
"github.com/influxdata/telegraf/plugins/inputs"
authentication "github.com/influxdata/telegraf/plugins/inputs/jti_openconfig_telemetry/auth"
telemetry "github.com/influxdata/telegraf/plugins/inputs/jti_openconfig_telemetry/oc"
)
//go:embed sample.conf
var sampleConfig string
var (
// Regex to match and extract data points from path value in received key
keyPathRegex = regexp.MustCompile(`/([^/]*)\[([A-Za-z0-9\-/]*=[^\[]*)]`)
)
type OpenConfigTelemetry struct {
Servers []string `toml:"servers"`
Sensors []string `toml:"sensors"`
Username string `toml:"username"`
Password string `toml:"password"`
ClientID string `toml:"client_id"`
TimestampSource string `toml:"timestamp_source"`
SampleFrequency config.Duration `toml:"sample_frequency"`
StrAsTags bool `toml:"str_as_tags"`
RetryDelay config.Duration `toml:"retry_delay"`
EnableTLS bool `toml:"enable_tls"`
KeepAlivePeriod config.Duration `toml:"keep_alive_period"`
common_tls.ClientConfig
Log telegraf.Logger `toml:"-"`
sensorsConfig []sensorConfig
grpcClientConns []grpcConnection
wg *sync.WaitGroup
}
// Structure to hold sensors path list and measurement name
type sensorConfig struct {
measurementName string
pathList []*telemetry.Path
}
type grpcConnection struct {
connection *grpc.ClientConn
cancel context.CancelFunc
}
func (g *grpcConnection) close() {
g.connection.Close()
g.cancel()
}
func (*OpenConfigTelemetry) SampleConfig() string {
return sampleConfig
}
func (m *OpenConfigTelemetry) Init() error {
switch m.TimestampSource {
case "", "collection":
case "data":
default:
return fmt.Errorf("unknown option for timestamp_source: %q", m.TimestampSource)
}
return nil
}
func (m *OpenConfigTelemetry) Start(acc telegraf.Accumulator) error {
// Build sensors config
if m.splitSensorConfig() == 0 {
return errors.New("no valid sensor configuration available")
}
// Parse TLS config
var creds credentials.TransportCredentials
if m.EnableTLS {
tlscfg, err := m.ClientConfig.TLSConfig()
if err != nil {
return err
}
creds = credentials.NewTLS(tlscfg)
} else {
creds = insecure.NewCredentials()
}
// Setup the basic connection options
options := []grpc.DialOption{
grpc.WithTransportCredentials(creds),
}
// Add keep-alive settings
if m.KeepAlivePeriod > 0 {
params := keepalive.ClientParameters{
Time: time.Duration(m.KeepAlivePeriod),
Timeout: 2 * time.Duration(m.KeepAlivePeriod),
}
options = append(options, grpc.WithKeepaliveParams(params))
}
// Connect to given list of servers and start collecting data
var grpcClientConn *grpc.ClientConn
var wg sync.WaitGroup
m.wg = &wg
for _, server := range m.Servers {
ctx, cancel := context.WithCancel(context.Background())
if len(m.Username) > 0 {
ctx = metadata.AppendToOutgoingContext(
ctx,
"username", m.Username,
"password", m.Password,
"clientid", m.ClientID,
)
}
// Extract device address and port
grpcServer, grpcPort, err := net.SplitHostPort(server)
if err != nil {
m.Log.Errorf("Invalid server address: %s", err.Error())
cancel()
continue
}
grpcClientConn, err = grpc.NewClient(server, options...)
if err != nil {
m.Log.Errorf("Failed to connect to %s: %s", server, err.Error())
} else {
m.Log.Debugf("Opened a new gRPC session to %s on port %s", grpcServer, grpcPort)
}
// Add to the list of client connections
connection := grpcConnection{
connection: grpcClientConn,
cancel: cancel,
}
m.grpcClientConns = append(m.grpcClientConns, connection)
if m.Username != "" && m.Password != "" && m.ClientID != "" {
if err := m.authenticate(ctx, server, grpcClientConn); err != nil {
m.Log.Errorf("Error authenticating to %s: %v", grpcServer, err)
continue
}
}
// Subscribe and gather telemetry data
m.collectData(ctx, grpcServer, grpcClientConn, acc)
}
return nil
}
func (*OpenConfigTelemetry) Gather(telegraf.Accumulator) error {
return nil
}
func (m *OpenConfigTelemetry) Stop() {
for _, grpcClientConn := range m.grpcClientConns {
grpcClientConn.close()
}
m.wg.Wait()
}
// Takes in XML path with predicates and returns list of tags+values along with a final
// XML path without predicates. If /events/event[id=2]/attributes[key='message']/value
// is given input, this function will emit /events/event/attributes/value as xmlpath and
// { /events/event/@id=2, /events/event/attributes/@key='message' } as tags
func spitTagsNPath(xmlpath string) (string, map[string]string) {
subs := keyPathRegex.FindAllStringSubmatch(xmlpath, -1)
tags := make(map[string]string)
// Given XML path, this will spit out final path without predicates
if len(subs) > 0 {
for _, sub := range subs {
tagKey := strings.Split(xmlpath, sub[0])[0] + "/" + strings.TrimSpace(sub[1]) + "/@"
// If we have multiple keys in give path like /events/event[id=2 and type=3]/,
// we must emit multiple tags
for _, kv := range strings.Split(sub[2], " and ") {
key := tagKey + strings.TrimSpace(strings.Split(kv, "=")[0])
tagValue := strings.ReplaceAll(strings.Split(kv, "=")[1], "'", "")
tags[key] = tagValue
}
xmlpath = strings.Replace(xmlpath, sub[0], "/"+strings.TrimSpace(sub[1]), 1)
}
}
return xmlpath, tags
}
// Takes in a OC response, extracts tag information from keys and returns a
// list of groups with unique sets of tags+values
func (m *OpenConfigTelemetry) extractData(r *telemetry.OpenConfigData, grpcServer string) []dataGroup {
// Use empty prefix. We will update this when we iterate over key-value pairs
prefix := ""
dgroups := make([]dataGroup, 0, 5*len(r.Kv))
for _, v := range r.Kv {
kv := make(map[string]interface{})
if v.Key == "__prefix__" {
prefix = v.GetStrValue()
continue
}
// Also, lets use prefix if there is one
xmlpath, finaltags := spitTagsNPath(prefix + v.Key)
finaltags["device"] = grpcServer
switch v.Value.(type) {
case *telemetry.KeyValue_StrValue:
// If StrAsTags is set, we treat all string values as tags
if m.StrAsTags {
finaltags[xmlpath] = v.GetStrValue()
} else {
kv[xmlpath] = v.GetStrValue()
}
case *telemetry.KeyValue_DoubleValue:
kv[xmlpath] = v.GetDoubleValue()
case *telemetry.KeyValue_IntValue:
kv[xmlpath] = v.GetIntValue()
case *telemetry.KeyValue_UintValue:
kv[xmlpath] = v.GetUintValue()
case *telemetry.KeyValue_SintValue:
kv[xmlpath] = v.GetSintValue()
case *telemetry.KeyValue_BoolValue:
kv[xmlpath] = v.GetBoolValue()
case *telemetry.KeyValue_BytesValue:
kv[xmlpath] = v.GetBytesValue()
}
// Insert other tags from message
finaltags["system_id"] = r.SystemId
finaltags["path"] = r.Path
// Insert derived key and value
dgroups = collectionByKeys(dgroups).insert(finaltags, kv)
// Insert data from message header
dgroups = collectionByKeys(dgroups).insert(finaltags,
map[string]interface{}{"_sequence": r.SequenceNumber})
dgroups = collectionByKeys(dgroups).insert(finaltags,
map[string]interface{}{"_timestamp": r.Timestamp})
dgroups = collectionByKeys(dgroups).insert(finaltags,
map[string]interface{}{"_component_id": r.ComponentId})
dgroups = collectionByKeys(dgroups).insert(finaltags,
map[string]interface{}{"_subcomponent_id": r.SubComponentId})
}
return dgroups
}
// Takes in sensor configuration and converts it into slice of sensorConfig objects
func (m *OpenConfigTelemetry) splitSensorConfig() int {
var pathlist []*telemetry.Path
var measurementName string
var reportingRate uint32
m.sensorsConfig = make([]sensorConfig, 0)
for _, sensor := range m.Sensors {
spathSplit := strings.Fields(sensor)
reportingRate = uint32(time.Duration(m.SampleFrequency) / time.Millisecond)
// Extract measurement name and custom reporting rate if specified. Custom
// reporting rate will be specified at the beginning of sensor list,
// followed by measurement name like "1000ms interfaces /interfaces"
// where 1000ms is the custom reporting rate and interfaces is the
// measurement name. If 1000ms is not given, we use global reporting rate
// from sample_frequency. if measurement name is not given, we use first
// sensor name as the measurement name. If first or the word after custom
// reporting rate doesn't start with /, we treat it as measurement name
// and exclude it from list of sensors to subscribe
duration, err := time.ParseDuration(spathSplit[0])
if err == nil {
reportingRate = uint32(duration / time.Millisecond)
spathSplit = spathSplit[1:]
}
if len(spathSplit) == 0 {
m.Log.Error("No sensors are specified")
continue
}
// Word after custom reporting rate is treated as measurement name
measurementName = spathSplit[0]
// If our word after custom reporting rate doesn't start with /, we treat
// it as measurement name. Else we treat it as sensor
if !strings.HasPrefix(measurementName, "/") {
spathSplit = spathSplit[1:]
}
if len(spathSplit) == 0 {
m.Log.Error("No valid sensors are specified")
continue
}
// Iterate over our sensors and create pathlist to subscribe
pathlist = make([]*telemetry.Path, 0)
for _, path := range spathSplit {
pathlist = append(pathlist, &telemetry.Path{Path: path,
SampleFrequency: reportingRate})
}
m.sensorsConfig = append(m.sensorsConfig, sensorConfig{
measurementName: measurementName, pathList: pathlist,
})
}
return len(m.sensorsConfig)
}
// Subscribes and collects OpenConfig telemetry data from given server
func (m *OpenConfigTelemetry) collectData(
ctx context.Context,
grpcServer string,
grpcClientConn *grpc.ClientConn,
acc telegraf.Accumulator,
) {
c := telemetry.NewOpenConfigTelemetryClient(grpcClientConn)
for _, sensor := range m.sensorsConfig {
m.wg.Add(1)
go func(ctx context.Context, sensor sensorConfig) {
defer m.wg.Done()
for {
stream, err := c.TelemetrySubscribe(
ctx,
&telemetry.SubscriptionRequest{PathList: sensor.pathList},
)
if err != nil {
rpcStatus, _ := status.FromError(err)
if rpcStatus.Code() == codes.Unauthenticated {
if m.Username != "" && m.Password != "" && m.ClientID != "" {
err := m.authenticate(ctx, grpcServer, grpcClientConn)
if err == nil {
time.Sleep(1 * time.Second)
continue
}
acc.AddError(fmt.Errorf("could not re-authenticate: %w", err))
}
} else if rpcStatus.Code() != codes.Unavailable {
// If service is currently unavailable and may come back later, retry
acc.AddError(fmt.Errorf("could not subscribe to %s on %q: %w", sensor.measurementName, grpcServer, err))
return
}
// Retry with delay. If delay is not provided, use default
if time.Duration(m.RetryDelay) > 0 {
m.Log.Debugf("Retrying %s from %s with timeout %v", sensor.measurementName, grpcServer, time.Duration(m.RetryDelay))
time.Sleep(time.Duration(m.RetryDelay))
continue
}
return
}
m.Log.Debugf("Successfully subscribed to %s on %s", sensor.measurementName, grpcServer)
for {
r, err := stream.Recv()
if err != nil {
// If we encounter error in the stream, break so we can retry
// the connection
acc.AddError(fmt.Errorf("failed to read from %s from %s: %w", sensor.measurementName, grpcServer, err))
time.Sleep(1 * time.Second)
break
}
m.Log.Debugf("Received from %s on %s: %v", sensor.measurementName, grpcServer, r)
// Create a point and add to batch
tags := make(map[string]string)
// Insert additional tags
tags["device"] = grpcServer
dgroups := m.extractData(r, grpcServer)
// Print final data collection
m.Log.Debugf("Available collection for %s on %s: %v", sensor.measurementName, grpcServer, dgroups)
timestamp := time.Now()
// Iterate through data groups and add them
for _, group := range dgroups {
if m.TimestampSource == "data" {
// OpenConfig timestamp is in milliseconds since epoch
ts, ok := group.data["_timestamp"].(uint64)
if ok {
timestamp = time.UnixMilli(int64(ts))
} else {
m.Log.Warnf("Invalid type %T for _timestamp %v", group.data["_timestamp"], group.data["_timestamp"])
}
}
if len(group.tags) == 0 {
acc.AddFields(sensor.measurementName, group.data, tags, timestamp)
} else {
acc.AddFields(sensor.measurementName, group.data, group.tags, timestamp)
}
}
}
}
}(ctx, sensor)
}
}
func (m *OpenConfigTelemetry) authenticate(ctx context.Context, server string, grpcClientConn *grpc.ClientConn) error {
lc := authentication.NewLoginClient(grpcClientConn)
loginReply, err := lc.LoginCheck(
ctx,
&authentication.LoginRequest{
UserName: m.Username,
Password: m.Password,
ClientId: m.ClientID,
},
)
if err != nil {
return fmt.Errorf("could not initiate login check for %s: %w", server, err)
}
// Check if the user is authenticated. Bail if auth error
if !loginReply.Result {
return fmt.Errorf("failed to authenticate the user for %s", server)
}
return nil
}
func init() {
inputs.Add("jti_openconfig_telemetry", func() telegraf.Input {
return &OpenConfigTelemetry{
RetryDelay: config.Duration(time.Second),
KeepAlivePeriod: config.Duration(10 * time.Second),
StrAsTags: false,
TimestampSource: "collection",
}
})
}