Adding upstream version 1.34.4.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
e393c3af3f
commit
4978089aab
4963 changed files with 677545 additions and 0 deletions
110
plugins/inputs/lanz/README.md
Normal file
110
plugins/inputs/lanz/README.md
Normal file
|
@ -0,0 +1,110 @@
|
|||
# Arista LANZ Consumer Input Plugin
|
||||
|
||||
This service plugin consumes messages from the
|
||||
[Arista Networks’ Latency Analyzer (LANZ)][lanz] by receiving the datastream
|
||||
on TCP (usually through port 50001) on the switch's management IP.
|
||||
|
||||
> [!NOTE]
|
||||
> You will need to configure LANZ and enable streaming LANZ data, see the
|
||||
> [documentation][config_lanz] for more details.
|
||||
|
||||
⭐ Telegraf v1.14.0
|
||||
🏷️ network
|
||||
💻 all
|
||||
|
||||
[lanz]: https://www.arista.com/en/um-eos/eos-latency-analyzer-lanz
|
||||
[config_lanz]: https://www.arista.com/en/um-eos/eos-section-44-3-configuring-lanz
|
||||
|
||||
## Service Input <!-- @/docs/includes/service_input.md -->
|
||||
|
||||
This plugin is a service input. Normal plugins gather metrics determined by the
|
||||
interval setting. Service plugins start a service to listen and wait for
|
||||
metrics or events to occur. Service plugins have two key differences from
|
||||
normal plugins:
|
||||
|
||||
1. The global or plugin specific `interval` setting may not apply
|
||||
2. The CLI options of `--test`, `--test-wait`, and `--once` may not produce
|
||||
output for this plugin
|
||||
|
||||
## Global configuration options <!-- @/docs/includes/plugin_config.md -->
|
||||
|
||||
In addition to the plugin-specific configuration settings, plugins support
|
||||
additional global and plugin configuration settings. These settings are used to
|
||||
modify metrics, tags, and field or create aliases and configure ordering, etc.
|
||||
See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
||||
|
||||
[CONFIGURATION.md]: ../../../docs/CONFIGURATION.md#plugins
|
||||
|
||||
## Configuration
|
||||
|
||||
```toml @sample.conf
|
||||
# Read metrics off Arista LANZ, via socket
|
||||
[[inputs.lanz]]
|
||||
## URL to Arista LANZ endpoint
|
||||
servers = [
|
||||
"tcp://switch1.int.example.com:50001",
|
||||
"tcp://switch2.int.example.com:50001",
|
||||
]
|
||||
```
|
||||
|
||||
## Metrics
|
||||
|
||||
For more details on the metrics see the [protocol buffer definition][proto].
|
||||
|
||||
- lanz_congestion_record:
|
||||
- tags:
|
||||
- intf_name
|
||||
- switch_id
|
||||
- port_id
|
||||
- entry_type
|
||||
- traffic_class
|
||||
- fabric_peer_intf_name
|
||||
- source
|
||||
- port
|
||||
- fields:
|
||||
- timestamp (integer)
|
||||
- queue_size (integer)
|
||||
- time_of_max_qlen (integer)
|
||||
- tx_latency (integer)
|
||||
- q_drop_count (integer)
|
||||
|
||||
- lanz_global_buffer_usage_record
|
||||
- tags:
|
||||
- entry_type
|
||||
- source
|
||||
- port
|
||||
- fields:
|
||||
- timestamp (integer)
|
||||
- buffer_size (integer)
|
||||
- duration (integer)
|
||||
|
||||
[proto]: https://github.com/aristanetworks/goarista/blob/master/lanz/proto/lanz.proto
|
||||
|
||||
## Sample Queries
|
||||
|
||||
Get the max tx_latency for the last hour for all interfaces on all switches.
|
||||
|
||||
```sql
|
||||
SELECT max("tx_latency") AS "max_tx_latency" FROM "congestion_record" WHERE time > now() - 1h GROUP BY time(10s), "hostname", "intf_name"
|
||||
```
|
||||
|
||||
Get the max tx_latency for the last hour for all interfaces on all switches.
|
||||
|
||||
```sql
|
||||
SELECT max("queue_size") AS "max_queue_size" FROM "congestion_record" WHERE time > now() - 1h GROUP BY time(10s), "hostname", "intf_name"
|
||||
```
|
||||
|
||||
Get the max buffer_size for over the last hour for all switches.
|
||||
|
||||
```sql
|
||||
SELECT max("buffer_size") AS "max_buffer_size" FROM "global_buffer_usage_record" WHERE time > now() - 1h GROUP BY time(10s), "hostname"
|
||||
```
|
||||
|
||||
## Example Output
|
||||
|
||||
```text
|
||||
lanz_global_buffer_usage_record,entry_type=2,host=telegraf.int.example.com,port=50001,source=switch01.int.example.com timestamp=158334105824919i,buffer_size=505i,duration=0i 1583341058300643815
|
||||
lanz_congestion_record,entry_type=2,host=telegraf.int.example.com,intf_name=Ethernet36,port=50001,port_id=61,source=switch01.int.example.com,switch_id=0,traffic_class=1 time_of_max_qlen=0i,tx_latency=564480i,q_drop_count=0i,timestamp=158334105824919i,queue_size=225i 1583341058300636045
|
||||
lanz_global_buffer_usage_record,entry_type=2,host=telegraf.int.example.com,port=50001,source=switch01.int.example.com timestamp=158334105824919i,buffer_size=589i,duration=0i 1583341058300457464
|
||||
lanz_congestion_record,entry_type=1,host=telegraf.int.example.com,intf_name=Ethernet36,port=50001,port_id=61,source=switch01.int.example.com,switch_id=0,traffic_class=1 q_drop_count=0i,timestamp=158334105824919i,queue_size=232i,time_of_max_qlen=0i,tx_latency=584640i 1583341058300450302
|
||||
```
|
128
plugins/inputs/lanz/lanz.go
Normal file
128
plugins/inputs/lanz/lanz.go
Normal file
|
@ -0,0 +1,128 @@
|
|||
//go:generate ../../../tools/readme_config_includer/generator
|
||||
package lanz
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/aristanetworks/goarista/lanz"
|
||||
pb "github.com/aristanetworks/goarista/lanz/proto"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
)
|
||||
|
||||
//go:embed sample.conf
|
||||
var sampleConfig string
|
||||
|
||||
type Lanz struct {
|
||||
Servers []string `toml:"servers"`
|
||||
clients []lanz.Client
|
||||
wg sync.WaitGroup
|
||||
}
|
||||
|
||||
func (*Lanz) SampleConfig() string {
|
||||
return sampleConfig
|
||||
}
|
||||
|
||||
func (l *Lanz) Start(acc telegraf.Accumulator) error {
|
||||
if len(l.Servers) == 0 {
|
||||
l.Servers = append(l.Servers, "tcp://127.0.0.1:50001")
|
||||
}
|
||||
|
||||
for _, server := range l.Servers {
|
||||
deviceURL, err := url.Parse(server)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
client := lanz.New(
|
||||
lanz.WithAddr(deviceURL.Host),
|
||||
lanz.WithBackoff(1*time.Second),
|
||||
lanz.WithTimeout(10*time.Second),
|
||||
)
|
||||
l.clients = append(l.clients, client)
|
||||
|
||||
in := make(chan *pb.LanzRecord)
|
||||
go func() {
|
||||
client.Run(in)
|
||||
}()
|
||||
l.wg.Add(1)
|
||||
go func() {
|
||||
l.wg.Done()
|
||||
receive(acc, in, deviceURL)
|
||||
}()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (*Lanz) Gather(telegraf.Accumulator) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *Lanz) Stop() {
|
||||
for _, client := range l.clients {
|
||||
client.Stop()
|
||||
}
|
||||
l.wg.Wait()
|
||||
}
|
||||
|
||||
func receive(acc telegraf.Accumulator, in <-chan *pb.LanzRecord, deviceURL *url.URL) {
|
||||
//nolint:staticcheck // for-select used on purpose
|
||||
for {
|
||||
select {
|
||||
case msg, ok := <-in:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
msgToAccumulator(acc, msg, deviceURL)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func msgToAccumulator(acc telegraf.Accumulator, msg *pb.LanzRecord, deviceURL *url.URL) {
|
||||
cr := msg.GetCongestionRecord()
|
||||
if cr != nil {
|
||||
vals := map[string]interface{}{
|
||||
"timestamp": int64(cr.GetTimestamp()),
|
||||
"queue_size": int64(cr.GetQueueSize()),
|
||||
"time_of_max_qlen": int64(cr.GetTimeOfMaxQLen()),
|
||||
"tx_latency": int64(cr.GetTxLatency()),
|
||||
"q_drop_count": int64(cr.GetQDropCount()),
|
||||
}
|
||||
tags := map[string]string{
|
||||
"intf_name": cr.GetIntfName(),
|
||||
"switch_id": strconv.FormatInt(int64(cr.GetSwitchId()), 10),
|
||||
"port_id": strconv.FormatInt(int64(cr.GetPortId()), 10),
|
||||
"entry_type": strconv.FormatInt(int64(cr.GetEntryType()), 10),
|
||||
"traffic_class": strconv.FormatInt(int64(cr.GetTrafficClass()), 10),
|
||||
"fabric_peer_intf_name": cr.GetFabricPeerIntfName(),
|
||||
"source": deviceURL.Hostname(),
|
||||
"port": deviceURL.Port(),
|
||||
}
|
||||
acc.AddFields("lanz_congestion_record", vals, tags)
|
||||
}
|
||||
|
||||
gbur := msg.GetGlobalBufferUsageRecord()
|
||||
if gbur != nil {
|
||||
vals := map[string]interface{}{
|
||||
"timestamp": int64(gbur.GetTimestamp()),
|
||||
"buffer_size": int64(gbur.GetBufferSize()),
|
||||
"duration": int64(gbur.GetDuration()),
|
||||
}
|
||||
tags := map[string]string{
|
||||
"entry_type": strconv.FormatInt(int64(gbur.GetEntryType()), 10),
|
||||
"source": deviceURL.Hostname(),
|
||||
"port": deviceURL.Port(),
|
||||
}
|
||||
acc.AddFields("lanz_global_buffer_usage_record", vals, tags)
|
||||
}
|
||||
}
|
||||
|
||||
func init() {
|
||||
inputs.Add("lanz", func() telegraf.Input {
|
||||
return &Lanz{}
|
||||
})
|
||||
}
|
136
plugins/inputs/lanz/lanz_test.go
Normal file
136
plugins/inputs/lanz/lanz_test.go
Normal file
|
@ -0,0 +1,136 @@
|
|||
package lanz
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
pb "github.com/aristanetworks/goarista/lanz/proto"
|
||||
"google.golang.org/protobuf/proto"
|
||||
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
)
|
||||
|
||||
var testProtoBufCongestionRecord1 = &pb.LanzRecord{
|
||||
CongestionRecord: &pb.CongestionRecord{
|
||||
Timestamp: proto.Uint64(100000000000000),
|
||||
IntfName: proto.String("eth1"),
|
||||
SwitchId: proto.Uint32(1),
|
||||
PortId: proto.Uint32(1),
|
||||
QueueSize: proto.Uint32(1),
|
||||
EntryType: pb.CongestionRecord_EntryType.Enum(1),
|
||||
TrafficClass: proto.Uint32(1),
|
||||
TimeOfMaxQLen: proto.Uint64(100000000000000),
|
||||
TxLatency: proto.Uint32(100),
|
||||
QDropCount: proto.Uint32(1),
|
||||
FabricPeerIntfName: proto.String("FabricPeerIntfName1"),
|
||||
},
|
||||
}
|
||||
var testProtoBufCongestionRecord2 = &pb.LanzRecord{
|
||||
CongestionRecord: &pb.CongestionRecord{
|
||||
Timestamp: proto.Uint64(200000000000000),
|
||||
IntfName: proto.String("eth2"),
|
||||
SwitchId: proto.Uint32(2),
|
||||
PortId: proto.Uint32(2),
|
||||
QueueSize: proto.Uint32(2),
|
||||
EntryType: pb.CongestionRecord_EntryType.Enum(2),
|
||||
TrafficClass: proto.Uint32(2),
|
||||
TimeOfMaxQLen: proto.Uint64(200000000000000),
|
||||
TxLatency: proto.Uint32(200),
|
||||
QDropCount: proto.Uint32(2),
|
||||
FabricPeerIntfName: proto.String("FabricPeerIntfName2"),
|
||||
},
|
||||
}
|
||||
|
||||
var testProtoBufGlobalBufferUsageRecord = &pb.LanzRecord{
|
||||
GlobalBufferUsageRecord: &pb.GlobalBufferUsageRecord{
|
||||
EntryType: pb.GlobalBufferUsageRecord_EntryType.Enum(1),
|
||||
Timestamp: proto.Uint64(100000000000000),
|
||||
BufferSize: proto.Uint32(1),
|
||||
Duration: proto.Uint32(10),
|
||||
},
|
||||
}
|
||||
|
||||
func TestLanzGeneratesMetrics(t *testing.T) {
|
||||
l := &Lanz{Servers: []string{
|
||||
"tcp://switch01.int.example.com:50001",
|
||||
"tcp://switch02.int.example.com:50001",
|
||||
}}
|
||||
|
||||
deviceURL1, err := url.Parse(l.Servers[0])
|
||||
if err != nil {
|
||||
t.Fail()
|
||||
}
|
||||
deviceURL2, err := url.Parse(l.Servers[1])
|
||||
if err != nil {
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
var acc testutil.Accumulator
|
||||
msgToAccumulator(&acc, testProtoBufCongestionRecord1, deviceURL1)
|
||||
acc.Wait(1)
|
||||
|
||||
vals1 := map[string]interface{}{
|
||||
"timestamp": int64(100000000000000),
|
||||
"queue_size": int64(1),
|
||||
"time_of_max_qlen": int64(100000000000000),
|
||||
"tx_latency": int64(100),
|
||||
"q_drop_count": int64(1),
|
||||
}
|
||||
tags1 := map[string]string{
|
||||
"intf_name": "eth1",
|
||||
"switch_id": strconv.FormatInt(int64(1), 10),
|
||||
"port_id": strconv.FormatInt(int64(1), 10),
|
||||
"entry_type": strconv.FormatInt(int64(1), 10),
|
||||
"traffic_class": strconv.FormatInt(int64(1), 10),
|
||||
"fabric_peer_intf_name": "FabricPeerIntfName1",
|
||||
"source": "switch01.int.example.com",
|
||||
"port": "50001",
|
||||
}
|
||||
|
||||
acc.AssertContainsFields(t, "lanz_congestion_record", vals1)
|
||||
acc.AssertContainsTaggedFields(t, "lanz_congestion_record", vals1, tags1)
|
||||
|
||||
acc.ClearMetrics()
|
||||
msgToAccumulator(&acc, testProtoBufCongestionRecord2, deviceURL2)
|
||||
acc.Wait(1)
|
||||
|
||||
vals2 := map[string]interface{}{
|
||||
"timestamp": int64(200000000000000),
|
||||
"queue_size": int64(2),
|
||||
"time_of_max_qlen": int64(200000000000000),
|
||||
"tx_latency": int64(200),
|
||||
"q_drop_count": int64(2),
|
||||
}
|
||||
tags2 := map[string]string{
|
||||
"intf_name": "eth2",
|
||||
"switch_id": strconv.FormatInt(int64(2), 10),
|
||||
"port_id": strconv.FormatInt(int64(2), 10),
|
||||
"entry_type": strconv.FormatInt(int64(2), 10),
|
||||
"traffic_class": strconv.FormatInt(int64(2), 10),
|
||||
"fabric_peer_intf_name": "FabricPeerIntfName2",
|
||||
"source": "switch02.int.example.com",
|
||||
"port": "50001",
|
||||
}
|
||||
|
||||
acc.AssertContainsFields(t, "lanz_congestion_record", vals2)
|
||||
acc.AssertContainsTaggedFields(t, "lanz_congestion_record", vals2, tags2)
|
||||
|
||||
acc.ClearMetrics()
|
||||
msgToAccumulator(&acc, testProtoBufGlobalBufferUsageRecord, deviceURL1)
|
||||
acc.Wait(1)
|
||||
|
||||
gburVals1 := map[string]interface{}{
|
||||
"timestamp": int64(100000000000000),
|
||||
"buffer_size": int64(1),
|
||||
"duration": int64(10),
|
||||
}
|
||||
gburTags1 := map[string]string{
|
||||
"entry_type": strconv.FormatInt(int64(1), 10),
|
||||
"source": "switch01.int.example.com",
|
||||
"port": "50001",
|
||||
}
|
||||
|
||||
acc.AssertContainsFields(t, "lanz_global_buffer_usage_record", gburVals1)
|
||||
acc.AssertContainsTaggedFields(t, "lanz_global_buffer_usage_record", gburVals1, gburTags1)
|
||||
}
|
7
plugins/inputs/lanz/sample.conf
Normal file
7
plugins/inputs/lanz/sample.conf
Normal file
|
@ -0,0 +1,7 @@
|
|||
# Read metrics off Arista LANZ, via socket
|
||||
[[inputs.lanz]]
|
||||
## URL to Arista LANZ endpoint
|
||||
servers = [
|
||||
"tcp://switch1.int.example.com:50001",
|
||||
"tcp://switch2.int.example.com:50001",
|
||||
]
|
Loading…
Add table
Add a link
Reference in a new issue