Adding upstream version 1.34.4.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
e393c3af3f
commit
4978089aab
4963 changed files with 677545 additions and 0 deletions
112
plugins/inputs/infiniband/README.md
Normal file
112
plugins/inputs/infiniband/README.md
Normal file
|
@ -0,0 +1,112 @@
|
|||
# InfiniBand Input Plugin
|
||||
|
||||
This plugin gathers statistics for all InfiniBand devices and ports on the
|
||||
system. These are the counters that can be found in
|
||||
`/sys/class/infiniband/<dev>/port/<port>/counters/`
|
||||
and RDMA counters can be found in
|
||||
`/sys/class/infiniband/<dev>/ports/<port>/hw_counters/`
|
||||
|
||||
⭐ Telegraf v1.14.0
|
||||
🏷️ network
|
||||
💻 linux
|
||||
|
||||
## Global configuration options <!-- @/docs/includes/plugin_config.md -->
|
||||
|
||||
In addition to the plugin-specific configuration settings, plugins support
|
||||
additional global and plugin configuration settings. These settings are used to
|
||||
modify metrics, tags, and field or create aliases and configure ordering, etc.
|
||||
See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
||||
|
||||
[CONFIGURATION.md]: ../../../docs/CONFIGURATION.md#plugins
|
||||
|
||||
## Configuration
|
||||
|
||||
```toml @sample.conf
|
||||
# Gets counters from all InfiniBand cards and ports installed
|
||||
# This plugin ONLY supports Linux
|
||||
[[inputs.infiniband]]
|
||||
# no configuration
|
||||
|
||||
## Collect RDMA counters
|
||||
# gather_rdma = false
|
||||
```
|
||||
|
||||
## Metrics
|
||||
|
||||
Actual metrics depend on the InfiniBand devices, the plugin uses a simple
|
||||
mapping from counter -> counter value.
|
||||
|
||||
[Information about the counters][counters] collected is provided by Nvidia.
|
||||
|
||||
[counters]: https://enterprise-support.nvidia.com/s/article/understanding-mlx5-linux-counters-and-status-parameters
|
||||
|
||||
The following fields are emitted by the plugin when selecting `counters`:
|
||||
|
||||
- infiniband
|
||||
- tags:
|
||||
- device
|
||||
- port
|
||||
- fields:
|
||||
|
||||
### Infiniband Counters
|
||||
|
||||
- excessive_buffer_overrun_errors (integer)
|
||||
- link_downed (integer)
|
||||
- link_error_recovery (integer)
|
||||
- local_link_integrity_errors (integer)
|
||||
- multicast_rcv_packets (integer)
|
||||
- multicast_xmit_packets (integer)
|
||||
- port_rcv_constraint_errors (integer)
|
||||
- port_rcv_data (integer)
|
||||
- port_rcv_errors (integer)
|
||||
- port_rcv_packets (integer)
|
||||
- port_rcv_remote_physical_errors (integer)
|
||||
- port_rcv_switch_relay_errors (integer)
|
||||
- port_xmit_constraint_errors (integer)
|
||||
- port_xmit_data (integer)
|
||||
- port_xmit_discards (integer)
|
||||
- port_xmit_packets (integer)
|
||||
- port_xmit_wait (integer)
|
||||
- symbol_error (integer)
|
||||
- unicast_rcv_packets (integer)
|
||||
- unicast_xmit_packets (integer)
|
||||
- VL15_dropped (integer)
|
||||
|
||||
### Infiniband RDMA counters
|
||||
|
||||
- duplicate_request (integer)
|
||||
- implied_nak_seq_err (integer)
|
||||
- lifespan (integer)
|
||||
- local_ack_timeout_err (integer)
|
||||
- np_cnp_sent (integer)
|
||||
- np_ecn_marked_roce_packets (integer)
|
||||
- out_of_buffer (integer)
|
||||
- out_of_sequence (integer)
|
||||
- packet_seq_err (integer)
|
||||
- req_cqe_error (integer)
|
||||
- req_cqe_flush_error (integer)
|
||||
- req_remote_access_errors (integer)
|
||||
- req_remote_invalid_request (integer)
|
||||
- resp_cqe_error (integer)
|
||||
- resp_cqe_flush_error (integer)
|
||||
- resp_local_length_error (integer)
|
||||
- resp_remote_access_errors (integer)
|
||||
- rnr_nak_retry_err (integer)
|
||||
- roce_adp_retrans (integer)
|
||||
- roce_adp_retrans_to (integer)
|
||||
- roce_slow_restart (integer)
|
||||
- roce_slow_restart_cnps (integer)
|
||||
- roce_slow_restart_trans (integer)
|
||||
- rp_cnp_handled (integer)
|
||||
- rp_cnp_ignored (integer)
|
||||
- rx_atomic_requests (integer)
|
||||
- rx_icrc_encapsulated (integer)
|
||||
- rx_read_requests (integer)
|
||||
- rx_write_requests (integer)
|
||||
|
||||
## Example Output
|
||||
|
||||
```text
|
||||
infiniband,device=mlx5_bond_0,host=hop-r640-12,port=1 port_xmit_data=85378896588i,VL15_dropped=0i,port_rcv_packets=34914071i,port_rcv_data=34600185253i,port_xmit_discards=0i,link_downed=0i,local_link_integrity_errors=0i,symbol_error=0i,link_error_recovery=0i,multicast_rcv_packets=0i,multicast_xmit_packets=0i,unicast_xmit_packets=82002535i,excessive_buffer_overrun_errors=0i,port_rcv_switch_relay_errors=0i,unicast_rcv_packets=34914071i,port_xmit_constraint_errors=0i,port_rcv_errors=0i,port_xmit_wait=0i,port_rcv_remote_physical_errors=0i,port_rcv_constraint_errors=0i,port_xmit_packets=82002535i 1737652060000000000
|
||||
infiniband,device=mlx5_bond_0,host=hop-r640-12,port=1 local_ack_timeout_err=0i,lifespan=10i,out_of_buffer=0i,resp_remote_access_errors=0i,resp_local_length_error=0i,np_cnp_sent=0i,roce_slow_restart=0i,rx_read_requests=6000i,duplicate_request=0i,resp_cqe_error=0i,rx_write_requests=19000i,roce_slow_restart_cnps=0i,rx_icrc_encapsulated=0i,rnr_nak_retry_err=0i,roce_adp_retrans=0i,out_of_sequence=0i,req_remote_access_errors=0i,roce_slow_restart_trans=0i,req_remote_invalid_request=0i,req_cqe_error=0i,resp_cqe_flush_error=0i,packet_seq_err=0i,roce_adp_retrans_to=0i,np_ecn_marked_roce_packets=0i,rp_cnp_handled=0i,implied_nak_seq_err=0i,rp_cnp_ignored=0i,req_cqe_flush_error=0i,rx_atomic_requests=0i 1737652060000000000
|
||||
```
|
26
plugins/inputs/infiniband/infiniband.go
Normal file
26
plugins/inputs/infiniband/infiniband.go
Normal file
|
@ -0,0 +1,26 @@
|
|||
//go:generate ../../../tools/readme_config_includer/generator
|
||||
package infiniband
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
)
|
||||
|
||||
//go:embed sample.conf
|
||||
var sampleConfig string
|
||||
|
||||
type Infiniband struct {
|
||||
RDMA bool `toml:"gather_rdma"`
|
||||
Log telegraf.Logger `toml:"-"`
|
||||
}
|
||||
|
||||
func (*Infiniband) SampleConfig() string {
|
||||
return sampleConfig
|
||||
}
|
||||
|
||||
// Initialise plugin
|
||||
func init() {
|
||||
inputs.Add("infiniband", func() telegraf.Input { return &Infiniband{} })
|
||||
}
|
62
plugins/inputs/infiniband/infiniband_linux.go
Normal file
62
plugins/inputs/infiniband/infiniband_linux.go
Normal file
|
@ -0,0 +1,62 @@
|
|||
//go:build linux
|
||||
|
||||
package infiniband
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strconv"
|
||||
|
||||
"github.com/Mellanox/rdmamap"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
)
|
||||
|
||||
// Gather statistics from our infiniband cards
|
||||
func (ib *Infiniband) Gather(acc telegraf.Accumulator) error {
|
||||
rdmaDevices := rdmamap.GetRdmaDeviceList()
|
||||
|
||||
if len(rdmaDevices) == 0 {
|
||||
return errors.New("no InfiniBand devices found in /sys/class/infiniband/")
|
||||
}
|
||||
|
||||
for _, dev := range rdmaDevices {
|
||||
devicePorts := rdmamap.GetPorts(dev)
|
||||
for _, port := range devicePorts {
|
||||
portInt, err := strconv.Atoi(port)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stats, err := rdmamap.GetRdmaSysfsStats(dev, portInt)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
addStats(dev, port, stats, acc)
|
||||
|
||||
if ib.RDMA {
|
||||
stats, err := rdmamap.GetRdmaSysfsHwStats(dev, portInt)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
addStats(dev, port, stats, acc)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Add the statistics to the accumulator
|
||||
func addStats(dev, port string, stats []rdmamap.RdmaStatEntry, acc telegraf.Accumulator) {
|
||||
// Allow users to filter by card and port
|
||||
tags := map[string]string{"device": dev, "port": port}
|
||||
fields := make(map[string]interface{})
|
||||
|
||||
for _, entry := range stats {
|
||||
fields[entry.Name] = entry.Value
|
||||
}
|
||||
|
||||
acc.AddFields("infiniband", fields, tags)
|
||||
}
|
23
plugins/inputs/infiniband/infiniband_notlinux.go
Normal file
23
plugins/inputs/infiniband/infiniband_notlinux.go
Normal file
|
@ -0,0 +1,23 @@
|
|||
//go:build !linux
|
||||
|
||||
package infiniband
|
||||
|
||||
import (
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
)
|
||||
|
||||
func (i *Infiniband) Init() error {
|
||||
i.Log.Warn("Current platform is not supported")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (*Infiniband) Gather(_ telegraf.Accumulator) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
inputs.Add("infiniband", func() telegraf.Input {
|
||||
return &Infiniband{}
|
||||
})
|
||||
}
|
299
plugins/inputs/infiniband/infiniband_test.go
Normal file
299
plugins/inputs/infiniband/infiniband_test.go
Normal file
|
@ -0,0 +1,299 @@
|
|||
//go:build linux
|
||||
|
||||
package infiniband
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/Mellanox/rdmamap"
|
||||
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
)
|
||||
|
||||
func TestInfiniband(t *testing.T) {
|
||||
fields := map[string]interface{}{
|
||||
"excessive_buffer_overrun_errors": uint64(0),
|
||||
"link_downed": uint64(0),
|
||||
"link_error_recovery": uint64(0),
|
||||
"local_link_integrity_errors": uint64(0),
|
||||
"multicast_rcv_packets": uint64(0),
|
||||
"multicast_xmit_packets": uint64(0),
|
||||
"port_rcv_constraint_errors": uint64(0),
|
||||
"port_rcv_data": uint64(237159415345822),
|
||||
"port_rcv_errors": uint64(0),
|
||||
"port_rcv_packets": uint64(801977655075),
|
||||
"port_rcv_remote_physical_errors": uint64(0),
|
||||
"port_rcv_switch_relay_errors": uint64(0),
|
||||
"port_xmit_constraint_errors": uint64(0),
|
||||
"port_xmit_data": uint64(238334949937759),
|
||||
"port_xmit_discards": uint64(0),
|
||||
"port_xmit_packets": uint64(803162651391),
|
||||
"port_xmit_wait": uint64(4294967295),
|
||||
"symbol_error": uint64(0),
|
||||
"unicast_rcv_packets": uint64(801977655075),
|
||||
"unicast_xmit_packets": uint64(803162651391),
|
||||
"VL15_dropped": uint64(0),
|
||||
}
|
||||
|
||||
tags := map[string]string{
|
||||
"device": "m1x5_0",
|
||||
"port": "1",
|
||||
}
|
||||
|
||||
sampleRdmastatsEntries := []rdmamap.RdmaStatEntry{
|
||||
{
|
||||
Name: "excessive_buffer_overrun_errors",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "link_downed",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "link_error_recovery",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "local_link_integrity_errors",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "multicast_rcv_packets",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "multicast_xmit_packets",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "port_rcv_constraint_errors",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "port_rcv_data",
|
||||
Value: uint64(237159415345822),
|
||||
},
|
||||
{
|
||||
Name: "port_rcv_errors",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "port_rcv_packets",
|
||||
Value: uint64(801977655075),
|
||||
},
|
||||
{
|
||||
Name: "port_rcv_remote_physical_errors",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "port_rcv_switch_relay_errors",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "port_xmit_constraint_errors",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "port_xmit_data",
|
||||
Value: uint64(238334949937759),
|
||||
},
|
||||
{
|
||||
Name: "port_xmit_discards",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "port_xmit_packets",
|
||||
Value: uint64(803162651391),
|
||||
},
|
||||
{
|
||||
Name: "port_xmit_wait",
|
||||
Value: uint64(4294967295),
|
||||
},
|
||||
{
|
||||
Name: "symbol_error",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "unicast_rcv_packets",
|
||||
Value: uint64(801977655075),
|
||||
},
|
||||
{
|
||||
Name: "unicast_xmit_packets",
|
||||
Value: uint64(803162651391),
|
||||
},
|
||||
{
|
||||
Name: "VL15_dropped",
|
||||
Value: uint64(0),
|
||||
},
|
||||
}
|
||||
|
||||
var acc testutil.Accumulator
|
||||
|
||||
addStats("m1x5_0", "1", sampleRdmastatsEntries, &acc)
|
||||
|
||||
acc.AssertContainsTaggedFields(t, "infiniband", fields, tags)
|
||||
}
|
||||
|
||||
func TestInfinibandRDMA(t *testing.T) {
|
||||
fields := map[string]interface{}{
|
||||
"duplicate_request": uint64(0),
|
||||
"implied_nak_seq_err": uint64(0),
|
||||
"lifespan": uint64(10),
|
||||
"local_ack_timeout_err": uint64(38),
|
||||
"np_cnp_sent": uint64(10284520),
|
||||
"np_ecn_marked_roce_packets": uint64(286733949),
|
||||
"out_of_buffer": uint64(1149772),
|
||||
"out_of_sequence": uint64(44),
|
||||
"packet_seq_err": uint64(1),
|
||||
"req_cqe_error": uint64(10776),
|
||||
"req_cqe_flush_error": uint64(2173),
|
||||
"req_remote_access_errors": uint64(0),
|
||||
"req_remote_invalid_request": uint64(0),
|
||||
"resp_cqe_error": uint64(759),
|
||||
"resp_cqe_flush_error": uint64(759),
|
||||
"resp_local_length_error": uint64(0),
|
||||
"resp_remote_access_errors": uint64(0),
|
||||
"rnr_nak_retry_err": uint64(0),
|
||||
"roce_adp_retrans": uint64(0),
|
||||
"roce_adp_retrans_to": uint64(0),
|
||||
"roce_slow_restart": uint64(0),
|
||||
"roce_slow_restart_cnps": uint64(0),
|
||||
"roce_slow_restart_trans": uint64(0),
|
||||
"rp_cnp_handled": uint64(1),
|
||||
"rp_cnp_ignored": uint64(0),
|
||||
"rx_atomic_requests": uint64(0),
|
||||
"rx_icrc_encapsulated": uint64(0),
|
||||
"rx_read_requests": uint64(488228),
|
||||
"rx_write_requests": uint64(3928699),
|
||||
}
|
||||
|
||||
tags := map[string]string{
|
||||
"device": "m1x5_0",
|
||||
"port": "1",
|
||||
}
|
||||
|
||||
sampleRdmaHwStatsEntries := []rdmamap.RdmaStatEntry{
|
||||
{
|
||||
Name: "duplicate_request",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "implied_nak_seq_err",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "lifespan",
|
||||
Value: uint64(10),
|
||||
},
|
||||
{
|
||||
Name: "local_ack_timeout_err",
|
||||
Value: uint64(38),
|
||||
},
|
||||
{
|
||||
Name: "np_cnp_sent",
|
||||
Value: uint64(10284520),
|
||||
},
|
||||
{
|
||||
Name: "np_ecn_marked_roce_packets",
|
||||
Value: uint64(286733949),
|
||||
},
|
||||
{
|
||||
Name: "out_of_buffer",
|
||||
Value: uint64(1149772),
|
||||
},
|
||||
{
|
||||
Name: "out_of_sequence",
|
||||
Value: uint64(44),
|
||||
},
|
||||
{
|
||||
Name: "packet_seq_err",
|
||||
Value: uint64(1),
|
||||
},
|
||||
{
|
||||
Name: "req_cqe_error",
|
||||
Value: uint64(10776),
|
||||
},
|
||||
{
|
||||
Name: "req_cqe_flush_error",
|
||||
Value: uint64(2173),
|
||||
},
|
||||
{
|
||||
Name: "req_remote_access_errors",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "req_remote_invalid_request",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "resp_cqe_error",
|
||||
Value: uint64(759),
|
||||
},
|
||||
{
|
||||
Name: "resp_cqe_flush_error",
|
||||
Value: uint64(759),
|
||||
},
|
||||
{
|
||||
Name: "resp_local_length_error",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "resp_remote_access_errors",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "rnr_nak_retry_err",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "roce_adp_retrans",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "roce_adp_retrans_to",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "roce_slow_restart",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "roce_slow_restart_cnps",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "roce_slow_restart_trans",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "rp_cnp_handled",
|
||||
Value: uint64(1),
|
||||
},
|
||||
{
|
||||
Name: "rp_cnp_ignored",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "rx_atomic_requests",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "rx_icrc_encapsulated",
|
||||
Value: uint64(0),
|
||||
},
|
||||
{
|
||||
Name: "rx_read_requests",
|
||||
Value: uint64(488228),
|
||||
},
|
||||
{
|
||||
Name: "rx_write_requests",
|
||||
Value: uint64(3928699),
|
||||
},
|
||||
}
|
||||
|
||||
var acc testutil.Accumulator
|
||||
|
||||
addStats("m1x5_0", "1", sampleRdmaHwStatsEntries, &acc)
|
||||
|
||||
acc.AssertContainsTaggedFields(t, "infiniband", fields, tags)
|
||||
}
|
7
plugins/inputs/infiniband/sample.conf
Normal file
7
plugins/inputs/infiniband/sample.conf
Normal file
|
@ -0,0 +1,7 @@
|
|||
# Gets counters from all InfiniBand cards and ports installed
|
||||
# This plugin ONLY supports Linux
|
||||
[[inputs.infiniband]]
|
||||
# no configuration
|
||||
|
||||
## Collect RDMA counters
|
||||
# gather_rdma = false
|
Loading…
Add table
Add a link
Reference in a new issue