1
0
Fork 0

Adding upstream version 1.34.4.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-05-24 07:26:29 +02:00
parent e393c3af3f
commit 4978089aab
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
4963 changed files with 677545 additions and 0 deletions

View file

@ -0,0 +1,110 @@
# Arista LANZ Consumer Input Plugin
This service plugin consumes messages from the
[Arista Networks Latency Analyzer (LANZ)][lanz] by receiving the datastream
on TCP (usually through port 50001) on the switch's management IP.
> [!NOTE]
> You will need to configure LANZ and enable streaming LANZ data, see the
> [documentation][config_lanz] for more details.
⭐ Telegraf v1.14.0
🏷️ network
💻 all
[lanz]: https://www.arista.com/en/um-eos/eos-latency-analyzer-lanz
[config_lanz]: https://www.arista.com/en/um-eos/eos-section-44-3-configuring-lanz
## Service Input <!-- @/docs/includes/service_input.md -->
This plugin is a service input. Normal plugins gather metrics determined by the
interval setting. Service plugins start a service to listen and wait for
metrics or events to occur. Service plugins have two key differences from
normal plugins:
1. The global or plugin specific `interval` setting may not apply
2. The CLI options of `--test`, `--test-wait`, and `--once` may not produce
output for this plugin
## Global configuration options <!-- @/docs/includes/plugin_config.md -->
In addition to the plugin-specific configuration settings, plugins support
additional global and plugin configuration settings. These settings are used to
modify metrics, tags, and field or create aliases and configure ordering, etc.
See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
[CONFIGURATION.md]: ../../../docs/CONFIGURATION.md#plugins
## Configuration
```toml @sample.conf
# Read metrics off Arista LANZ, via socket
[[inputs.lanz]]
## URL to Arista LANZ endpoint
servers = [
"tcp://switch1.int.example.com:50001",
"tcp://switch2.int.example.com:50001",
]
```
## Metrics
For more details on the metrics see the [protocol buffer definition][proto].
- lanz_congestion_record:
- tags:
- intf_name
- switch_id
- port_id
- entry_type
- traffic_class
- fabric_peer_intf_name
- source
- port
- fields:
- timestamp (integer)
- queue_size (integer)
- time_of_max_qlen (integer)
- tx_latency (integer)
- q_drop_count (integer)
- lanz_global_buffer_usage_record
- tags:
- entry_type
- source
- port
- fields:
- timestamp (integer)
- buffer_size (integer)
- duration (integer)
[proto]: https://github.com/aristanetworks/goarista/blob/master/lanz/proto/lanz.proto
## Sample Queries
Get the max tx_latency for the last hour for all interfaces on all switches.
```sql
SELECT max("tx_latency") AS "max_tx_latency" FROM "congestion_record" WHERE time > now() - 1h GROUP BY time(10s), "hostname", "intf_name"
```
Get the max tx_latency for the last hour for all interfaces on all switches.
```sql
SELECT max("queue_size") AS "max_queue_size" FROM "congestion_record" WHERE time > now() - 1h GROUP BY time(10s), "hostname", "intf_name"
```
Get the max buffer_size for over the last hour for all switches.
```sql
SELECT max("buffer_size") AS "max_buffer_size" FROM "global_buffer_usage_record" WHERE time > now() - 1h GROUP BY time(10s), "hostname"
```
## Example Output
```text
lanz_global_buffer_usage_record,entry_type=2,host=telegraf.int.example.com,port=50001,source=switch01.int.example.com timestamp=158334105824919i,buffer_size=505i,duration=0i 1583341058300643815
lanz_congestion_record,entry_type=2,host=telegraf.int.example.com,intf_name=Ethernet36,port=50001,port_id=61,source=switch01.int.example.com,switch_id=0,traffic_class=1 time_of_max_qlen=0i,tx_latency=564480i,q_drop_count=0i,timestamp=158334105824919i,queue_size=225i 1583341058300636045
lanz_global_buffer_usage_record,entry_type=2,host=telegraf.int.example.com,port=50001,source=switch01.int.example.com timestamp=158334105824919i,buffer_size=589i,duration=0i 1583341058300457464
lanz_congestion_record,entry_type=1,host=telegraf.int.example.com,intf_name=Ethernet36,port=50001,port_id=61,source=switch01.int.example.com,switch_id=0,traffic_class=1 q_drop_count=0i,timestamp=158334105824919i,queue_size=232i,time_of_max_qlen=0i,tx_latency=584640i 1583341058300450302
```

128
plugins/inputs/lanz/lanz.go Normal file
View file

@ -0,0 +1,128 @@
//go:generate ../../../tools/readme_config_includer/generator
package lanz
import (
_ "embed"
"net/url"
"strconv"
"sync"
"time"
"github.com/aristanetworks/goarista/lanz"
pb "github.com/aristanetworks/goarista/lanz/proto"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
)
//go:embed sample.conf
var sampleConfig string
type Lanz struct {
Servers []string `toml:"servers"`
clients []lanz.Client
wg sync.WaitGroup
}
func (*Lanz) SampleConfig() string {
return sampleConfig
}
func (l *Lanz) Start(acc telegraf.Accumulator) error {
if len(l.Servers) == 0 {
l.Servers = append(l.Servers, "tcp://127.0.0.1:50001")
}
for _, server := range l.Servers {
deviceURL, err := url.Parse(server)
if err != nil {
return err
}
client := lanz.New(
lanz.WithAddr(deviceURL.Host),
lanz.WithBackoff(1*time.Second),
lanz.WithTimeout(10*time.Second),
)
l.clients = append(l.clients, client)
in := make(chan *pb.LanzRecord)
go func() {
client.Run(in)
}()
l.wg.Add(1)
go func() {
l.wg.Done()
receive(acc, in, deviceURL)
}()
}
return nil
}
func (*Lanz) Gather(telegraf.Accumulator) error {
return nil
}
func (l *Lanz) Stop() {
for _, client := range l.clients {
client.Stop()
}
l.wg.Wait()
}
func receive(acc telegraf.Accumulator, in <-chan *pb.LanzRecord, deviceURL *url.URL) {
//nolint:staticcheck // for-select used on purpose
for {
select {
case msg, ok := <-in:
if !ok {
return
}
msgToAccumulator(acc, msg, deviceURL)
}
}
}
func msgToAccumulator(acc telegraf.Accumulator, msg *pb.LanzRecord, deviceURL *url.URL) {
cr := msg.GetCongestionRecord()
if cr != nil {
vals := map[string]interface{}{
"timestamp": int64(cr.GetTimestamp()),
"queue_size": int64(cr.GetQueueSize()),
"time_of_max_qlen": int64(cr.GetTimeOfMaxQLen()),
"tx_latency": int64(cr.GetTxLatency()),
"q_drop_count": int64(cr.GetQDropCount()),
}
tags := map[string]string{
"intf_name": cr.GetIntfName(),
"switch_id": strconv.FormatInt(int64(cr.GetSwitchId()), 10),
"port_id": strconv.FormatInt(int64(cr.GetPortId()), 10),
"entry_type": strconv.FormatInt(int64(cr.GetEntryType()), 10),
"traffic_class": strconv.FormatInt(int64(cr.GetTrafficClass()), 10),
"fabric_peer_intf_name": cr.GetFabricPeerIntfName(),
"source": deviceURL.Hostname(),
"port": deviceURL.Port(),
}
acc.AddFields("lanz_congestion_record", vals, tags)
}
gbur := msg.GetGlobalBufferUsageRecord()
if gbur != nil {
vals := map[string]interface{}{
"timestamp": int64(gbur.GetTimestamp()),
"buffer_size": int64(gbur.GetBufferSize()),
"duration": int64(gbur.GetDuration()),
}
tags := map[string]string{
"entry_type": strconv.FormatInt(int64(gbur.GetEntryType()), 10),
"source": deviceURL.Hostname(),
"port": deviceURL.Port(),
}
acc.AddFields("lanz_global_buffer_usage_record", vals, tags)
}
}
func init() {
inputs.Add("lanz", func() telegraf.Input {
return &Lanz{}
})
}

View file

@ -0,0 +1,136 @@
package lanz
import (
"net/url"
"strconv"
"testing"
pb "github.com/aristanetworks/goarista/lanz/proto"
"google.golang.org/protobuf/proto"
"github.com/influxdata/telegraf/testutil"
)
var testProtoBufCongestionRecord1 = &pb.LanzRecord{
CongestionRecord: &pb.CongestionRecord{
Timestamp: proto.Uint64(100000000000000),
IntfName: proto.String("eth1"),
SwitchId: proto.Uint32(1),
PortId: proto.Uint32(1),
QueueSize: proto.Uint32(1),
EntryType: pb.CongestionRecord_EntryType.Enum(1),
TrafficClass: proto.Uint32(1),
TimeOfMaxQLen: proto.Uint64(100000000000000),
TxLatency: proto.Uint32(100),
QDropCount: proto.Uint32(1),
FabricPeerIntfName: proto.String("FabricPeerIntfName1"),
},
}
var testProtoBufCongestionRecord2 = &pb.LanzRecord{
CongestionRecord: &pb.CongestionRecord{
Timestamp: proto.Uint64(200000000000000),
IntfName: proto.String("eth2"),
SwitchId: proto.Uint32(2),
PortId: proto.Uint32(2),
QueueSize: proto.Uint32(2),
EntryType: pb.CongestionRecord_EntryType.Enum(2),
TrafficClass: proto.Uint32(2),
TimeOfMaxQLen: proto.Uint64(200000000000000),
TxLatency: proto.Uint32(200),
QDropCount: proto.Uint32(2),
FabricPeerIntfName: proto.String("FabricPeerIntfName2"),
},
}
var testProtoBufGlobalBufferUsageRecord = &pb.LanzRecord{
GlobalBufferUsageRecord: &pb.GlobalBufferUsageRecord{
EntryType: pb.GlobalBufferUsageRecord_EntryType.Enum(1),
Timestamp: proto.Uint64(100000000000000),
BufferSize: proto.Uint32(1),
Duration: proto.Uint32(10),
},
}
func TestLanzGeneratesMetrics(t *testing.T) {
l := &Lanz{Servers: []string{
"tcp://switch01.int.example.com:50001",
"tcp://switch02.int.example.com:50001",
}}
deviceURL1, err := url.Parse(l.Servers[0])
if err != nil {
t.Fail()
}
deviceURL2, err := url.Parse(l.Servers[1])
if err != nil {
t.Fail()
}
var acc testutil.Accumulator
msgToAccumulator(&acc, testProtoBufCongestionRecord1, deviceURL1)
acc.Wait(1)
vals1 := map[string]interface{}{
"timestamp": int64(100000000000000),
"queue_size": int64(1),
"time_of_max_qlen": int64(100000000000000),
"tx_latency": int64(100),
"q_drop_count": int64(1),
}
tags1 := map[string]string{
"intf_name": "eth1",
"switch_id": strconv.FormatInt(int64(1), 10),
"port_id": strconv.FormatInt(int64(1), 10),
"entry_type": strconv.FormatInt(int64(1), 10),
"traffic_class": strconv.FormatInt(int64(1), 10),
"fabric_peer_intf_name": "FabricPeerIntfName1",
"source": "switch01.int.example.com",
"port": "50001",
}
acc.AssertContainsFields(t, "lanz_congestion_record", vals1)
acc.AssertContainsTaggedFields(t, "lanz_congestion_record", vals1, tags1)
acc.ClearMetrics()
msgToAccumulator(&acc, testProtoBufCongestionRecord2, deviceURL2)
acc.Wait(1)
vals2 := map[string]interface{}{
"timestamp": int64(200000000000000),
"queue_size": int64(2),
"time_of_max_qlen": int64(200000000000000),
"tx_latency": int64(200),
"q_drop_count": int64(2),
}
tags2 := map[string]string{
"intf_name": "eth2",
"switch_id": strconv.FormatInt(int64(2), 10),
"port_id": strconv.FormatInt(int64(2), 10),
"entry_type": strconv.FormatInt(int64(2), 10),
"traffic_class": strconv.FormatInt(int64(2), 10),
"fabric_peer_intf_name": "FabricPeerIntfName2",
"source": "switch02.int.example.com",
"port": "50001",
}
acc.AssertContainsFields(t, "lanz_congestion_record", vals2)
acc.AssertContainsTaggedFields(t, "lanz_congestion_record", vals2, tags2)
acc.ClearMetrics()
msgToAccumulator(&acc, testProtoBufGlobalBufferUsageRecord, deviceURL1)
acc.Wait(1)
gburVals1 := map[string]interface{}{
"timestamp": int64(100000000000000),
"buffer_size": int64(1),
"duration": int64(10),
}
gburTags1 := map[string]string{
"entry_type": strconv.FormatInt(int64(1), 10),
"source": "switch01.int.example.com",
"port": "50001",
}
acc.AssertContainsFields(t, "lanz_global_buffer_usage_record", gburVals1)
acc.AssertContainsTaggedFields(t, "lanz_global_buffer_usage_record", gburVals1, gburTags1)
}

View file

@ -0,0 +1,7 @@
# Read metrics off Arista LANZ, via socket
[[inputs.lanz]]
## URL to Arista LANZ endpoint
servers = [
"tcp://switch1.int.example.com:50001",
"tcp://switch2.int.example.com:50001",
]