Adding upstream version 1.34.4.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-05-24 07:26:29 +02:00 · 2025-05-24 07:26:29 +02:00 · 4978089aab
commit 4978089aab
parent e393c3af3f
4963 changed files with 677545 additions and 0 deletions
--- a/plugins/inputs/lanz/README.md
+++ b/plugins/inputs/lanz/README.md
@ -0,0 +1,110 @@
+# Arista LANZ Consumer Input Plugin
+
+This service plugin consumes messages from the
+[Arista Networks’ Latency Analyzer (LANZ)][lanz] by receiving the datastream
+on TCP (usually through port 50001) on the switch's management IP.
+
+> [!NOTE]
+> You will need to configure LANZ and enable streaming LANZ data, see the
+> [documentation][config_lanz] for more details.
+
+⭐ Telegraf v1.14.0
+🏷️ network
+💻 all
+
+[lanz]: https://www.arista.com/en/um-eos/eos-latency-analyzer-lanz
+[config_lanz]: https://www.arista.com/en/um-eos/eos-section-44-3-configuring-lanz
+
+## Service Input <!-- @/docs/includes/service_input.md -->
+
+This plugin is a service input. Normal plugins gather metrics determined by the
+interval setting. Service plugins start a service to listen and wait for
+metrics or events to occur. Service plugins have two key differences from
+normal plugins:
+
+1. The global or plugin specific `interval` setting may not apply
+2. The CLI options of `--test`, `--test-wait`, and `--once` may not produce
+   output for this plugin
+
+## Global configuration options <!-- @/docs/includes/plugin_config.md -->
+
+In addition to the plugin-specific configuration settings, plugins support
+additional global and plugin configuration settings. These settings are used to
+modify metrics, tags, and field or create aliases and configure ordering, etc.
+See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
+
+[CONFIGURATION.md]: ../../../docs/CONFIGURATION.md#plugins
+
+## Configuration
+
+```toml @sample.conf
+# Read metrics off Arista LANZ, via socket
+[[inputs.lanz]]
+  ## URL to Arista LANZ endpoint
+  servers = [
+    "tcp://switch1.int.example.com:50001",
+    "tcp://switch2.int.example.com:50001",
+  ]
+```
+
+## Metrics
+
+For more details on the metrics see the [protocol buffer definition][proto].
+
+- lanz_congestion_record:
+  - tags:
+    - intf_name
+    - switch_id
+    - port_id
+    - entry_type
+    - traffic_class
+    - fabric_peer_intf_name
+    - source
+    - port
+  - fields:
+    - timestamp        (integer)
+    - queue_size       (integer)
+    - time_of_max_qlen (integer)
+    - tx_latency       (integer)
+    - q_drop_count     (integer)
+
+- lanz_global_buffer_usage_record
+  - tags:
+    - entry_type
+    - source
+    - port
+  - fields:
+    - timestamp   (integer)
+    - buffer_size (integer)
+    - duration    (integer)
+
+[proto]: https://github.com/aristanetworks/goarista/blob/master/lanz/proto/lanz.proto
+
+## Sample Queries
+
+Get the max tx_latency for the last hour for all interfaces on all switches.
+
+```sql
+SELECT max("tx_latency") AS "max_tx_latency" FROM "congestion_record" WHERE time > now() - 1h GROUP BY time(10s), "hostname", "intf_name"
+```
+
+Get the max tx_latency for the last hour for all interfaces on all switches.
+
+```sql
+SELECT max("queue_size") AS "max_queue_size" FROM "congestion_record" WHERE time > now() - 1h GROUP BY time(10s), "hostname", "intf_name"
+```
+
+Get the max buffer_size for over the last hour for all switches.
+
+```sql
+SELECT max("buffer_size") AS "max_buffer_size" FROM "global_buffer_usage_record" WHERE time > now() - 1h GROUP BY time(10s), "hostname"
+```
+
+## Example Output
+
+```text
+lanz_global_buffer_usage_record,entry_type=2,host=telegraf.int.example.com,port=50001,source=switch01.int.example.com timestamp=158334105824919i,buffer_size=505i,duration=0i 1583341058300643815
+lanz_congestion_record,entry_type=2,host=telegraf.int.example.com,intf_name=Ethernet36,port=50001,port_id=61,source=switch01.int.example.com,switch_id=0,traffic_class=1 time_of_max_qlen=0i,tx_latency=564480i,q_drop_count=0i,timestamp=158334105824919i,queue_size=225i 1583341058300636045
+lanz_global_buffer_usage_record,entry_type=2,host=telegraf.int.example.com,port=50001,source=switch01.int.example.com timestamp=158334105824919i,buffer_size=589i,duration=0i 1583341058300457464
+lanz_congestion_record,entry_type=1,host=telegraf.int.example.com,intf_name=Ethernet36,port=50001,port_id=61,source=switch01.int.example.com,switch_id=0,traffic_class=1 q_drop_count=0i,timestamp=158334105824919i,queue_size=232i,time_of_max_qlen=0i,tx_latency=584640i 1583341058300450302
+```
--- a/plugins/inputs/lanz/lanz.go
+++ b/plugins/inputs/lanz/lanz.go
@ -0,0 +1,128 @@
+//go:generate ../../../tools/readme_config_includer/generator
+package lanz
+
+import (
+	_ "embed"
+	"net/url"
+	"strconv"
+	"sync"
+	"time"
+
+	"github.com/aristanetworks/goarista/lanz"
+	pb "github.com/aristanetworks/goarista/lanz/proto"
+
+	"github.com/influxdata/telegraf"
+	"github.com/influxdata/telegraf/plugins/inputs"
+)
+
+//go:embed sample.conf
+var sampleConfig string
+
+type Lanz struct {
+	Servers []string `toml:"servers"`
+	clients []lanz.Client
+	wg      sync.WaitGroup
+}
+
+func (*Lanz) SampleConfig() string {
+	return sampleConfig
+}
+
+func (l *Lanz) Start(acc telegraf.Accumulator) error {
+	if len(l.Servers) == 0 {
+		l.Servers = append(l.Servers, "tcp://127.0.0.1:50001")
+	}
+
+	for _, server := range l.Servers {
+		deviceURL, err := url.Parse(server)
+		if err != nil {
+			return err
+		}
+		client := lanz.New(
+			lanz.WithAddr(deviceURL.Host),
+			lanz.WithBackoff(1*time.Second),
+			lanz.WithTimeout(10*time.Second),
+		)
+		l.clients = append(l.clients, client)
+
+		in := make(chan *pb.LanzRecord)
+		go func() {
+			client.Run(in)
+		}()
+		l.wg.Add(1)
+		go func() {
+			l.wg.Done()
+			receive(acc, in, deviceURL)
+		}()
+	}
+	return nil
+}
+
+func (*Lanz) Gather(telegraf.Accumulator) error {
+	return nil
+}
+
+func (l *Lanz) Stop() {
+	for _, client := range l.clients {
+		client.Stop()
+	}
+	l.wg.Wait()
+}
+
+func receive(acc telegraf.Accumulator, in <-chan *pb.LanzRecord, deviceURL *url.URL) {
+	//nolint:staticcheck // for-select used on purpose
+	for {
+		select {
+		case msg, ok := <-in:
+			if !ok {
+				return
+			}
+			msgToAccumulator(acc, msg, deviceURL)
+		}
+	}
+}
+
+func msgToAccumulator(acc telegraf.Accumulator, msg *pb.LanzRecord, deviceURL *url.URL) {
+	cr := msg.GetCongestionRecord()
+	if cr != nil {
+		vals := map[string]interface{}{
+			"timestamp":        int64(cr.GetTimestamp()),
+			"queue_size":       int64(cr.GetQueueSize()),
+			"time_of_max_qlen": int64(cr.GetTimeOfMaxQLen()),
+			"tx_latency":       int64(cr.GetTxLatency()),
+			"q_drop_count":     int64(cr.GetQDropCount()),
+		}
+		tags := map[string]string{
+			"intf_name":             cr.GetIntfName(),
+			"switch_id":             strconv.FormatInt(int64(cr.GetSwitchId()), 10),
+			"port_id":               strconv.FormatInt(int64(cr.GetPortId()), 10),
+			"entry_type":            strconv.FormatInt(int64(cr.GetEntryType()), 10),
+			"traffic_class":         strconv.FormatInt(int64(cr.GetTrafficClass()), 10),
+			"fabric_peer_intf_name": cr.GetFabricPeerIntfName(),
+			"source":                deviceURL.Hostname(),
+			"port":                  deviceURL.Port(),
+		}
+		acc.AddFields("lanz_congestion_record", vals, tags)
+	}
+
+	gbur := msg.GetGlobalBufferUsageRecord()
+	if gbur != nil {
+		vals := map[string]interface{}{
+			"timestamp":   int64(gbur.GetTimestamp()),
+			"buffer_size": int64(gbur.GetBufferSize()),
+			"duration":    int64(gbur.GetDuration()),
+		}
+		tags := map[string]string{
+			"entry_type": strconv.FormatInt(int64(gbur.GetEntryType()), 10),
+			"source":     deviceURL.Hostname(),
+			"port":       deviceURL.Port(),
+		}
+		acc.AddFields("lanz_global_buffer_usage_record", vals, tags)
+	}
+}
+
+func init() {
+	inputs.Add("lanz", func() telegraf.Input {
+		return &Lanz{}
+	})
+}
--- a/plugins/inputs/lanz/lanz_test.go
+++ b/plugins/inputs/lanz/lanz_test.go
@ -0,0 +1,136 @@
+package lanz
+
+import (
+	"net/url"
+	"strconv"
+	"testing"
+
+	pb "github.com/aristanetworks/goarista/lanz/proto"
+	"google.golang.org/protobuf/proto"
+
+	"github.com/influxdata/telegraf/testutil"
+)
+
+var testProtoBufCongestionRecord1 = &pb.LanzRecord{
+	CongestionRecord: &pb.CongestionRecord{
+		Timestamp:          proto.Uint64(100000000000000),
+		IntfName:           proto.String("eth1"),
+		SwitchId:           proto.Uint32(1),
+		PortId:             proto.Uint32(1),
+		QueueSize:          proto.Uint32(1),
+		EntryType:          pb.CongestionRecord_EntryType.Enum(1),
+		TrafficClass:       proto.Uint32(1),
+		TimeOfMaxQLen:      proto.Uint64(100000000000000),
+		TxLatency:          proto.Uint32(100),
+		QDropCount:         proto.Uint32(1),
+		FabricPeerIntfName: proto.String("FabricPeerIntfName1"),
+	},
+}
+var testProtoBufCongestionRecord2 = &pb.LanzRecord{
+	CongestionRecord: &pb.CongestionRecord{
+		Timestamp:          proto.Uint64(200000000000000),
+		IntfName:           proto.String("eth2"),
+		SwitchId:           proto.Uint32(2),
+		PortId:             proto.Uint32(2),
+		QueueSize:          proto.Uint32(2),
+		EntryType:          pb.CongestionRecord_EntryType.Enum(2),
+		TrafficClass:       proto.Uint32(2),
+		TimeOfMaxQLen:      proto.Uint64(200000000000000),
+		TxLatency:          proto.Uint32(200),
+		QDropCount:         proto.Uint32(2),
+		FabricPeerIntfName: proto.String("FabricPeerIntfName2"),
+	},
+}
+
+var testProtoBufGlobalBufferUsageRecord = &pb.LanzRecord{
+	GlobalBufferUsageRecord: &pb.GlobalBufferUsageRecord{
+		EntryType:  pb.GlobalBufferUsageRecord_EntryType.Enum(1),
+		Timestamp:  proto.Uint64(100000000000000),
+		BufferSize: proto.Uint32(1),
+		Duration:   proto.Uint32(10),
+	},
+}
+
+func TestLanzGeneratesMetrics(t *testing.T) {
+	l := &Lanz{Servers: []string{
+		"tcp://switch01.int.example.com:50001",
+		"tcp://switch02.int.example.com:50001",
+	}}
+
+	deviceURL1, err := url.Parse(l.Servers[0])
+	if err != nil {
+		t.Fail()
+	}
+	deviceURL2, err := url.Parse(l.Servers[1])
+	if err != nil {
+		t.Fail()
+	}
+
+	var acc testutil.Accumulator
+	msgToAccumulator(&acc, testProtoBufCongestionRecord1, deviceURL1)
+	acc.Wait(1)
+
+	vals1 := map[string]interface{}{
+		"timestamp":        int64(100000000000000),
+		"queue_size":       int64(1),
+		"time_of_max_qlen": int64(100000000000000),
+		"tx_latency":       int64(100),
+		"q_drop_count":     int64(1),
+	}
+	tags1 := map[string]string{
+		"intf_name":             "eth1",
+		"switch_id":             strconv.FormatInt(int64(1), 10),
+		"port_id":               strconv.FormatInt(int64(1), 10),
+		"entry_type":            strconv.FormatInt(int64(1), 10),
+		"traffic_class":         strconv.FormatInt(int64(1), 10),
+		"fabric_peer_intf_name": "FabricPeerIntfName1",
+		"source":                "switch01.int.example.com",
+		"port":                  "50001",
+	}
+
+	acc.AssertContainsFields(t, "lanz_congestion_record", vals1)
+	acc.AssertContainsTaggedFields(t, "lanz_congestion_record", vals1, tags1)
+
+	acc.ClearMetrics()
+	msgToAccumulator(&acc, testProtoBufCongestionRecord2, deviceURL2)
+	acc.Wait(1)
+
+	vals2 := map[string]interface{}{
+		"timestamp":        int64(200000000000000),
+		"queue_size":       int64(2),
+		"time_of_max_qlen": int64(200000000000000),
+		"tx_latency":       int64(200),
+		"q_drop_count":     int64(2),
+	}
+	tags2 := map[string]string{
+		"intf_name":             "eth2",
+		"switch_id":             strconv.FormatInt(int64(2), 10),
+		"port_id":               strconv.FormatInt(int64(2), 10),
+		"entry_type":            strconv.FormatInt(int64(2), 10),
+		"traffic_class":         strconv.FormatInt(int64(2), 10),
+		"fabric_peer_intf_name": "FabricPeerIntfName2",
+		"source":                "switch02.int.example.com",
+		"port":                  "50001",
+	}
+
+	acc.AssertContainsFields(t, "lanz_congestion_record", vals2)
+	acc.AssertContainsTaggedFields(t, "lanz_congestion_record", vals2, tags2)
+
+	acc.ClearMetrics()
+	msgToAccumulator(&acc, testProtoBufGlobalBufferUsageRecord, deviceURL1)
+	acc.Wait(1)
+
+	gburVals1 := map[string]interface{}{
+		"timestamp":   int64(100000000000000),
+		"buffer_size": int64(1),
+		"duration":    int64(10),
+	}
+	gburTags1 := map[string]string{
+		"entry_type": strconv.FormatInt(int64(1), 10),
+		"source":     "switch01.int.example.com",
+		"port":       "50001",
+	}
+
+	acc.AssertContainsFields(t, "lanz_global_buffer_usage_record", gburVals1)
+	acc.AssertContainsTaggedFields(t, "lanz_global_buffer_usage_record", gburVals1, gburTags1)
+}
--- a/plugins/inputs/lanz/sample.conf
+++ b/plugins/inputs/lanz/sample.conf
@ -0,0 +1,7 @@
+# Read metrics off Arista LANZ, via socket
+[[inputs.lanz]]
+  ## URL to Arista LANZ endpoint
+  servers = [
+    "tcp://switch1.int.example.com:50001",
+    "tcp://switch2.int.example.com:50001",
+  ]