795 lines
26 KiB
Go
795 lines
26 KiB
Go
|
package nvidia_smi
|
||
|
|
||
|
import (
|
||
|
"os"
|
||
|
"path/filepath"
|
||
|
"runtime"
|
||
|
"testing"
|
||
|
"time"
|
||
|
|
||
|
"github.com/stretchr/testify/require"
|
||
|
|
||
|
"github.com/influxdata/telegraf"
|
||
|
"github.com/influxdata/telegraf/config"
|
||
|
"github.com/influxdata/telegraf/internal"
|
||
|
"github.com/influxdata/telegraf/models"
|
||
|
"github.com/influxdata/telegraf/testutil"
|
||
|
)
|
||
|
|
||
|
func TestProbe(t *testing.T) {
|
||
|
var binPath string
|
||
|
var nvidiaSMIArgsPrefix []string
|
||
|
if runtime.GOOS == "windows" {
|
||
|
binPath = `C:\Windows\System32\WindowsPowerShell\v1.0\powershell.exe`
|
||
|
nvidiaSMIArgsPrefix = []string{"-Command"}
|
||
|
} else {
|
||
|
binPath = "/bin/bash"
|
||
|
nvidiaSMIArgsPrefix = []string{"-c"}
|
||
|
}
|
||
|
|
||
|
for _, tt := range []struct {
|
||
|
name string
|
||
|
args string
|
||
|
expectError bool
|
||
|
}{
|
||
|
{
|
||
|
name: "probe success",
|
||
|
args: "exit 0",
|
||
|
expectError: false,
|
||
|
},
|
||
|
{
|
||
|
name: "probe error",
|
||
|
args: "exit 1",
|
||
|
expectError: true,
|
||
|
},
|
||
|
} {
|
||
|
t.Run(tt.name, func(t *testing.T) {
|
||
|
plugin := &NvidiaSMI{
|
||
|
BinPath: binPath,
|
||
|
nvidiaSMIArgs: append(nvidiaSMIArgsPrefix, tt.args),
|
||
|
Log: &testutil.Logger{},
|
||
|
Timeout: config.Duration(5 * time.Second),
|
||
|
}
|
||
|
model := models.NewRunningInput(plugin, &models.InputConfig{
|
||
|
Name: "nvidia_smi",
|
||
|
StartupErrorBehavior: "probe",
|
||
|
})
|
||
|
err := model.Probe()
|
||
|
if tt.expectError {
|
||
|
require.Error(t, err)
|
||
|
} else {
|
||
|
require.NoError(t, err)
|
||
|
}
|
||
|
})
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func TestErrorBehaviorDefault(t *testing.T) {
|
||
|
// make sure we can't find nvidia-smi in $PATH somewhere
|
||
|
os.Unsetenv("PATH")
|
||
|
plugin := &NvidiaSMI{
|
||
|
BinPath: "/random/non-existent/path",
|
||
|
Log: &testutil.Logger{},
|
||
|
}
|
||
|
model := models.NewRunningInput(plugin, &models.InputConfig{
|
||
|
Name: "nvidia_smi",
|
||
|
})
|
||
|
require.NoError(t, model.Init())
|
||
|
|
||
|
var acc testutil.Accumulator
|
||
|
var ferr *internal.FatalError
|
||
|
require.NotErrorAs(t, model.Start(&acc), &ferr)
|
||
|
require.ErrorIs(t, model.Gather(&acc), internal.ErrNotConnected)
|
||
|
}
|
||
|
|
||
|
func TestErrorBehaviorError(t *testing.T) {
|
||
|
// make sure we can't find nvidia-smi in $PATH somewhere
|
||
|
os.Unsetenv("PATH")
|
||
|
plugin := &NvidiaSMI{
|
||
|
BinPath: "/random/non-existent/path",
|
||
|
Log: &testutil.Logger{},
|
||
|
}
|
||
|
model := models.NewRunningInput(plugin, &models.InputConfig{
|
||
|
Name: "nvidia_smi",
|
||
|
StartupErrorBehavior: "error",
|
||
|
})
|
||
|
require.NoError(t, model.Init())
|
||
|
|
||
|
var acc testutil.Accumulator
|
||
|
var ferr *internal.FatalError
|
||
|
require.NotErrorAs(t, model.Start(&acc), &ferr)
|
||
|
require.ErrorIs(t, model.Gather(&acc), internal.ErrNotConnected)
|
||
|
}
|
||
|
|
||
|
func TestErrorBehaviorRetry(t *testing.T) {
|
||
|
// make sure we can't find nvidia-smi in $PATH somewhere
|
||
|
os.Unsetenv("PATH")
|
||
|
plugin := &NvidiaSMI{
|
||
|
BinPath: "/random/non-existent/path",
|
||
|
Log: &testutil.Logger{},
|
||
|
}
|
||
|
model := models.NewRunningInput(plugin, &models.InputConfig{
|
||
|
Name: "nvidia_smi",
|
||
|
StartupErrorBehavior: "retry",
|
||
|
})
|
||
|
require.NoError(t, model.Init())
|
||
|
|
||
|
var acc testutil.Accumulator
|
||
|
var ferr *internal.FatalError
|
||
|
require.NotErrorAs(t, model.Start(&acc), &ferr)
|
||
|
require.ErrorIs(t, model.Gather(&acc), internal.ErrNotConnected)
|
||
|
}
|
||
|
|
||
|
func TestErrorBehaviorIgnore(t *testing.T) {
|
||
|
// make sure we can't find nvidia-smi in $PATH somewhere
|
||
|
os.Unsetenv("PATH")
|
||
|
plugin := &NvidiaSMI{
|
||
|
BinPath: "/random/non-existent/path",
|
||
|
Log: &testutil.Logger{},
|
||
|
}
|
||
|
model := models.NewRunningInput(plugin, &models.InputConfig{
|
||
|
Name: "nvidia_smi",
|
||
|
StartupErrorBehavior: "ignore",
|
||
|
})
|
||
|
require.NoError(t, model.Init())
|
||
|
|
||
|
var acc testutil.Accumulator
|
||
|
var ferr *internal.FatalError
|
||
|
require.ErrorAs(t, model.Start(&acc), &ferr)
|
||
|
require.ErrorIs(t, model.Gather(&acc), internal.ErrNotConnected)
|
||
|
}
|
||
|
|
||
|
func TestGatherValidXML(t *testing.T) {
|
||
|
tests := []struct {
|
||
|
name string
|
||
|
filename string
|
||
|
expected []telegraf.Metric
|
||
|
}{
|
||
|
{
|
||
|
name: "GeForce GTX 1070 Ti",
|
||
|
filename: "gtx-1070-ti.xml",
|
||
|
expected: []telegraf.Metric{
|
||
|
testutil.MustMetric(
|
||
|
"nvidia_smi",
|
||
|
map[string]string{
|
||
|
"name": "GeForce GTX 1070 Ti",
|
||
|
"compute_mode": "Default",
|
||
|
"index": "0",
|
||
|
"pstate": "P8",
|
||
|
"uuid": "GPU-f9ba66fc-a7f5-94c5-da19-019ef2f9c665",
|
||
|
},
|
||
|
map[string]interface{}{
|
||
|
"clocks_current_graphics": 135,
|
||
|
"clocks_current_memory": 405,
|
||
|
"clocks_current_sm": 135,
|
||
|
"clocks_current_video": 405,
|
||
|
"encoder_stats_average_fps": 0,
|
||
|
"encoder_stats_average_latency": 0,
|
||
|
"encoder_stats_session_count": 0,
|
||
|
"fan_speed": 100,
|
||
|
"memory_free": 4054,
|
||
|
"memory_total": 4096,
|
||
|
"memory_used": 42,
|
||
|
"pcie_link_gen_current": 1,
|
||
|
"pcie_link_width_current": 16,
|
||
|
"temperature_gpu": 39,
|
||
|
"utilization_gpu": 0,
|
||
|
"utilization_memory": 0,
|
||
|
},
|
||
|
time.Unix(0, 0)),
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
name: "GeForce GTX 1660 Ti",
|
||
|
filename: "gtx-1660-ti.xml",
|
||
|
expected: []telegraf.Metric{
|
||
|
testutil.MustMetric(
|
||
|
"nvidia_smi",
|
||
|
map[string]string{
|
||
|
"compute_mode": "Default",
|
||
|
"index": "0",
|
||
|
"name": "Graphics Device",
|
||
|
"pstate": "P8",
|
||
|
"uuid": "GPU-304a277d-3545-63b8-3a36-dfde3c992989",
|
||
|
},
|
||
|
map[string]interface{}{
|
||
|
"clocks_current_graphics": 300,
|
||
|
"clocks_current_memory": 405,
|
||
|
"clocks_current_sm": 300,
|
||
|
"clocks_current_video": 540,
|
||
|
"cuda_version": "10.1",
|
||
|
"display_active": "Disabled",
|
||
|
"display_mode": "Disabled",
|
||
|
"driver_version": "418.43",
|
||
|
"encoder_stats_average_fps": 0,
|
||
|
"encoder_stats_average_latency": 0,
|
||
|
"encoder_stats_session_count": 0,
|
||
|
"fbc_stats_average_fps": 0,
|
||
|
"fbc_stats_average_latency": 0,
|
||
|
"fbc_stats_session_count": 0,
|
||
|
"fan_speed": 0,
|
||
|
"memory_free": 5912,
|
||
|
"memory_total": 5912,
|
||
|
"memory_used": 0,
|
||
|
"pcie_link_gen_current": 1,
|
||
|
"pcie_link_width_current": 16,
|
||
|
"power_draw": 8.93,
|
||
|
"power_limit": 130.0,
|
||
|
"temperature_gpu": 40,
|
||
|
"utilization_gpu": 0,
|
||
|
"utilization_memory": 1,
|
||
|
"utilization_encoder": 0,
|
||
|
"utilization_decoder": 0,
|
||
|
"vbios_version": "90.16.25.00.4C",
|
||
|
},
|
||
|
time.Unix(0, 0)),
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
name: "Quadro P400",
|
||
|
filename: "quadro-p400.xml",
|
||
|
expected: []telegraf.Metric{
|
||
|
testutil.MustMetric(
|
||
|
"nvidia_smi",
|
||
|
map[string]string{
|
||
|
"compute_mode": "Default",
|
||
|
"index": "0",
|
||
|
"name": "Quadro P400",
|
||
|
"pstate": "P8",
|
||
|
"uuid": "GPU-8f750be4-dfbc-23b9-b33f-da729a536494",
|
||
|
},
|
||
|
map[string]interface{}{
|
||
|
"clocks_current_graphics": 139,
|
||
|
"clocks_current_memory": 405,
|
||
|
"clocks_current_sm": 139,
|
||
|
"clocks_current_video": 544,
|
||
|
"cuda_version": "10.1",
|
||
|
"display_active": "Disabled",
|
||
|
"display_mode": "Disabled",
|
||
|
"driver_version": "418.43",
|
||
|
"encoder_stats_average_fps": 0,
|
||
|
"encoder_stats_average_latency": 0,
|
||
|
"encoder_stats_session_count": 0,
|
||
|
"fbc_stats_average_fps": 0,
|
||
|
"fbc_stats_average_latency": 0,
|
||
|
"fbc_stats_session_count": 0,
|
||
|
"fan_speed": 34,
|
||
|
"memory_free": 1998,
|
||
|
"memory_total": 1998,
|
||
|
"memory_used": 0,
|
||
|
"pcie_link_gen_current": 1,
|
||
|
"pcie_link_width_current": 16,
|
||
|
"serial": "0424418054852",
|
||
|
"temperature_gpu": 33,
|
||
|
"utilization_gpu": 0,
|
||
|
"utilization_memory": 3,
|
||
|
"utilization_encoder": 0,
|
||
|
"utilization_decoder": 0,
|
||
|
"vbios_version": "86.07.3B.00.4A",
|
||
|
},
|
||
|
time.Unix(0, 0)),
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
name: "Quadro P2000",
|
||
|
filename: "quadro-p2000-v12.xml",
|
||
|
expected: []telegraf.Metric{
|
||
|
testutil.MustMetric(
|
||
|
"nvidia_smi",
|
||
|
map[string]string{
|
||
|
"arch": "Pascal",
|
||
|
"compute_mode": "Default",
|
||
|
"index": "0",
|
||
|
"name": "Quadro P2000",
|
||
|
"pstate": "P8",
|
||
|
"uuid": "GPU-396caaed-39ca-3199-2e68-717cdb786ec6",
|
||
|
},
|
||
|
map[string]interface{}{
|
||
|
|
||
|
"clocks_current_graphics": 139,
|
||
|
"clocks_current_memory": 405,
|
||
|
"clocks_current_sm": 139,
|
||
|
"clocks_current_video": 544,
|
||
|
"cuda_version": "12.0",
|
||
|
"display_active": "Disabled",
|
||
|
"display_mode": "Disabled",
|
||
|
"driver_version": "525.125.06",
|
||
|
"encoder_stats_average_fps": 0,
|
||
|
"encoder_stats_average_latency": 0,
|
||
|
"encoder_stats_session_count": 0,
|
||
|
"fbc_stats_average_fps": 0,
|
||
|
"fbc_stats_average_latency": 0,
|
||
|
"fbc_stats_session_count": 0,
|
||
|
"fan_speed": 46,
|
||
|
"memory_free": 5051,
|
||
|
"memory_reserved": 66,
|
||
|
"memory_total": 5120,
|
||
|
"memory_used": 1,
|
||
|
"pcie_link_gen_current": 1,
|
||
|
"pcie_link_width_current": 8,
|
||
|
"power_draw": 4.61,
|
||
|
"power_limit": 75.0,
|
||
|
"serial": "0322218049033",
|
||
|
"temperature_gpu": 34,
|
||
|
"utilization_gpu": 0,
|
||
|
"utilization_memory": 0,
|
||
|
"utilization_encoder": 0,
|
||
|
"utilization_decoder": 0,
|
||
|
"vbios_version": "86.06.3F.00.30",
|
||
|
},
|
||
|
time.Unix(0, 0)),
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
name: "Tesla T4",
|
||
|
filename: "tesla-t4.xml",
|
||
|
expected: []telegraf.Metric{
|
||
|
testutil.MustMetric(
|
||
|
"nvidia_smi",
|
||
|
map[string]string{
|
||
|
"compute_mode": "Default",
|
||
|
"index": "0",
|
||
|
"name": "Tesla T4",
|
||
|
"pstate": "P0",
|
||
|
"uuid": "GPU-d37e67a5-91dd-3774-a5cb-99096249601a",
|
||
|
},
|
||
|
map[string]interface{}{
|
||
|
"clocks_current_graphics": 585,
|
||
|
"clocks_current_memory": 5000,
|
||
|
"clocks_current_sm": 585,
|
||
|
"clocks_current_video": 810,
|
||
|
"cuda_version": "11.7",
|
||
|
"current_ecc": "Enabled",
|
||
|
"display_active": "Disabled",
|
||
|
"display_mode": "Disabled",
|
||
|
"driver_version": "515.105.01",
|
||
|
"encoder_stats_average_fps": 0,
|
||
|
"encoder_stats_average_latency": 0,
|
||
|
"encoder_stats_session_count": 0,
|
||
|
"fbc_stats_average_fps": 0,
|
||
|
"fbc_stats_average_latency": 0,
|
||
|
"fbc_stats_session_count": 0,
|
||
|
"power_draw": 26.78,
|
||
|
"power_limit": 70.0,
|
||
|
"memory_free": 13939,
|
||
|
"memory_total": 15360,
|
||
|
"memory_used": 1032,
|
||
|
"memory_reserved": 388,
|
||
|
"retired_pages_multiple_single_bit": 0,
|
||
|
"retired_pages_double_bit": 0,
|
||
|
"retired_pages_blacklist": "No",
|
||
|
"retired_pages_pending": "No",
|
||
|
"pcie_link_gen_current": 3,
|
||
|
"pcie_link_width_current": 8,
|
||
|
"serial": "0000000000000",
|
||
|
"temperature_gpu": 40,
|
||
|
"utilization_gpu": 0,
|
||
|
"utilization_memory": 0,
|
||
|
"utilization_encoder": 0,
|
||
|
"utilization_decoder": 0,
|
||
|
"vbios_version": "90.04.84.00.06",
|
||
|
},
|
||
|
time.Unix(0, 0)),
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
name: "A10G",
|
||
|
filename: "a10g.xml",
|
||
|
expected: []telegraf.Metric{
|
||
|
testutil.MustMetric(
|
||
|
"nvidia_smi",
|
||
|
map[string]string{
|
||
|
"compute_mode": "Default",
|
||
|
"index": "0",
|
||
|
"name": "NVIDIA A10G",
|
||
|
"pstate": "P8",
|
||
|
"uuid": "GPU-9a9a6c50-2a47-2f51-a902-b82c3b127e94",
|
||
|
},
|
||
|
map[string]interface{}{
|
||
|
"clocks_current_graphics": 210,
|
||
|
"clocks_current_memory": 405,
|
||
|
"clocks_current_sm": 210,
|
||
|
"clocks_current_video": 555,
|
||
|
"cuda_version": "11.7",
|
||
|
"current_ecc": "Enabled",
|
||
|
"display_active": "Disabled",
|
||
|
"display_mode": "Disabled",
|
||
|
"driver_version": "515.105.01",
|
||
|
"encoder_stats_average_fps": 0,
|
||
|
"encoder_stats_average_latency": 0,
|
||
|
"encoder_stats_session_count": 0,
|
||
|
"fbc_stats_average_fps": 0,
|
||
|
"fbc_stats_average_latency": 0,
|
||
|
"fbc_stats_session_count": 0,
|
||
|
"fan_speed": 0,
|
||
|
"power_draw": 25.58,
|
||
|
"power_limit": 300.0,
|
||
|
"memory_free": 22569,
|
||
|
"memory_total": 23028,
|
||
|
"memory_used": 22,
|
||
|
"memory_reserved": 435,
|
||
|
"remapped_rows_correctable": 0,
|
||
|
"remapped_rows_uncorrectable": 0,
|
||
|
"remapped_rows_pending": "No",
|
||
|
"remapped_rows_failure": "No",
|
||
|
"pcie_link_gen_current": 1,
|
||
|
"pcie_link_width_current": 8,
|
||
|
"serial": "0000000000000",
|
||
|
"temperature_gpu": 17,
|
||
|
"utilization_gpu": 0,
|
||
|
"utilization_memory": 0,
|
||
|
"utilization_encoder": 0,
|
||
|
"utilization_decoder": 0,
|
||
|
"vbios_version": "94.02.75.00.01",
|
||
|
},
|
||
|
time.Unix(0, 0)),
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
name: "RTC 3060 schema v12",
|
||
|
filename: "rtx-3060-v12.xml",
|
||
|
expected: []telegraf.Metric{
|
||
|
testutil.MustMetric(
|
||
|
"nvidia_smi",
|
||
|
map[string]string{
|
||
|
"compute_mode": "Default",
|
||
|
"index": "0",
|
||
|
"name": "NVIDIA GeForce RTX 3060",
|
||
|
"arch": "Ampere",
|
||
|
"pstate": "P8",
|
||
|
"uuid": "GPU-d6889ff6-2523-9142-ca3c-1ca3f396a625",
|
||
|
},
|
||
|
map[string]interface{}{
|
||
|
"clocks_current_graphics": 210,
|
||
|
"clocks_current_memory": 405,
|
||
|
"clocks_current_sm": 210,
|
||
|
"clocks_current_video": 555,
|
||
|
"cuda_version": "12.8",
|
||
|
"display_active": "Disabled",
|
||
|
"display_mode": "Disabled",
|
||
|
"driver_version": "570.124.04",
|
||
|
"encoder_stats_average_fps": 0,
|
||
|
"encoder_stats_average_latency": 0,
|
||
|
"encoder_stats_session_count": 0,
|
||
|
"fbc_stats_average_fps": 0,
|
||
|
"fbc_stats_average_latency": 0,
|
||
|
"fbc_stats_session_count": 0,
|
||
|
"fan_speed": 0,
|
||
|
"power_draw": 11.63,
|
||
|
"memory_free": 11806,
|
||
|
"memory_total": 12288,
|
||
|
"memory_used": 116,
|
||
|
"memory_reserved": 368,
|
||
|
"pcie_link_gen_current": 1,
|
||
|
"pcie_link_width_current": 16,
|
||
|
"temperature_gpu": 42,
|
||
|
"utilization_gpu": 0,
|
||
|
"utilization_jpeg": 0,
|
||
|
"utilization_memory": 0,
|
||
|
"utilization_encoder": 0,
|
||
|
"utilization_decoder": 0,
|
||
|
"utilization_ofa": 0,
|
||
|
"vbios_version": "94.04.71.00.69",
|
||
|
},
|
||
|
time.Unix(1689872450, 0),
|
||
|
),
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
name: "RTC 3080 schema v12",
|
||
|
filename: "rtx-3080-v12.xml",
|
||
|
expected: []telegraf.Metric{
|
||
|
testutil.MustMetric(
|
||
|
"nvidia_smi",
|
||
|
map[string]string{
|
||
|
"compute_mode": "Default",
|
||
|
"index": "0",
|
||
|
"name": "NVIDIA GeForce RTX 3080",
|
||
|
"arch": "Ampere",
|
||
|
"pstate": "P8",
|
||
|
"uuid": "GPU-19d6d965-2acc-f646-00f8-4c76979aabb4",
|
||
|
},
|
||
|
map[string]interface{}{
|
||
|
"clocks_current_graphics": 210,
|
||
|
"clocks_current_memory": 405,
|
||
|
"clocks_current_sm": 210,
|
||
|
"clocks_current_video": 555,
|
||
|
"cuda_version": "12.2",
|
||
|
"display_active": "Enabled",
|
||
|
"display_mode": "Enabled",
|
||
|
"driver_version": "536.40",
|
||
|
"encoder_stats_average_fps": 0,
|
||
|
"encoder_stats_average_latency": 0,
|
||
|
"encoder_stats_session_count": 0,
|
||
|
"fbc_stats_average_fps": 0,
|
||
|
"fbc_stats_average_latency": 0,
|
||
|
"fbc_stats_session_count": 0,
|
||
|
"fan_speed": 0,
|
||
|
"power_draw": 22.78,
|
||
|
"memory_free": 8938,
|
||
|
"memory_total": 10240,
|
||
|
"memory_used": 1128,
|
||
|
"memory_reserved": 173,
|
||
|
"pcie_link_gen_current": 4,
|
||
|
"pcie_link_width_current": 16,
|
||
|
"temperature_gpu": 31,
|
||
|
"utilization_gpu": 0,
|
||
|
"utilization_jpeg": 0,
|
||
|
"utilization_memory": 37,
|
||
|
"utilization_encoder": 0,
|
||
|
"utilization_decoder": 0,
|
||
|
"utilization_ofa": 0,
|
||
|
"vbios_version": "94.02.71.40.72",
|
||
|
},
|
||
|
time.Unix(1689872450, 0)),
|
||
|
testutil.MustMetric(
|
||
|
"nvidia_smi_process",
|
||
|
map[string]string{
|
||
|
"name": "/usr/lib/Xorg",
|
||
|
"type": "G",
|
||
|
},
|
||
|
map[string]interface{}{
|
||
|
"pid": int64(835),
|
||
|
"used_memory": int64(550),
|
||
|
},
|
||
|
time.Unix(1689872450, 0)),
|
||
|
testutil.MustMetric(
|
||
|
"nvidia_smi_process",
|
||
|
map[string]string{
|
||
|
"name": "/usr/bin/gnome-shell",
|
||
|
"type": "G",
|
||
|
},
|
||
|
map[string]interface{}{
|
||
|
"pid": int64(1481),
|
||
|
"used_memory": int64(18),
|
||
|
},
|
||
|
time.Unix(1689872450, 0)),
|
||
|
testutil.MustMetric(
|
||
|
"nvidia_smi_process",
|
||
|
map[string]string{
|
||
|
"name": "/opt/microsoft/msedge/msedge --type=gpu-process " +
|
||
|
"--crashpad-handler-pid=2176 --enable-crash-reporter=, " +
|
||
|
"--change-stack-guard-on-fork=enable --gpu-preferences=" +
|
||
|
"WAAAAAAAAAAgAAAEAAAAAAAAAAAAAAAAAABgAAAAAAA4AAAAAAAAAAA" +
|
||
|
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAGAAAAAAAAAAYAA" +
|
||
|
"AAAAAAAAgAAAAAAAAACAAAAAAAAAAIAAAAAAAAAA== --shared-files " +
|
||
|
"--field-trial-handle=0,i,3110290512380155730," +
|
||
|
"7457693378709978105,262144 --variations-seed-version",
|
||
|
"type": "G",
|
||
|
},
|
||
|
map[string]interface{}{
|
||
|
"pid": int64(2214),
|
||
|
"used_memory": int64(79),
|
||
|
},
|
||
|
time.Unix(1689872450, 0)),
|
||
|
testutil.MustMetric(
|
||
|
"nvidia_smi_process",
|
||
|
map[string]string{
|
||
|
"name": "/usr/lib/firefox/firefox",
|
||
|
"type": "G",
|
||
|
},
|
||
|
map[string]interface{}{
|
||
|
"pid": int64(4044),
|
||
|
"used_memory": int64(541),
|
||
|
},
|
||
|
time.Unix(1689872450, 0)),
|
||
|
testutil.MustMetric(
|
||
|
"nvidia_smi_process",
|
||
|
map[string]string{
|
||
|
"name": "/opt/visual-studio-code/code --type=gpu-process " +
|
||
|
"--enable-crash-reporter=6f39585a-ecc4-42e2-b899-9456cbe56b44" +
|
||
|
",no_channel --user-data-dir=/home/powersj/.config/Code " +
|
||
|
"--gpu-preferences=WAAAAAAAAAAgAAAEAAAAAAAAAAAAAAAAAABg" +
|
||
|
"AAAAAAA4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" +
|
||
|
"ABAAAAGAAAAAAAAAAYAAAAAAAAAAgAAAAAAAAACAAAAAAAAAAIAAAAAAAAAA== " +
|
||
|
"--shared-files --field-trial-handle=0,i,685715063932313394," +
|
||
|
"4769839452661094675,262144 --disable-features=" +
|
||
|
"CalculateNativeWinOcclusion,SpareRendererForSitePerProcess",
|
||
|
"type": "G",
|
||
|
},
|
||
|
map[string]interface{}{
|
||
|
"pid": int64(42416),
|
||
|
"used_memory": int64(159),
|
||
|
},
|
||
|
time.Unix(1689872450, 0)),
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
name: "RTC 3090 schema v12",
|
||
|
filename: "rtx-3090-v12.xml",
|
||
|
expected: []telegraf.Metric{
|
||
|
testutil.MustMetric(
|
||
|
"nvidia_smi",
|
||
|
map[string]string{
|
||
|
"compute_mode": "Default",
|
||
|
"index": "0",
|
||
|
"name": "NVIDIA GeForce RTX 3090",
|
||
|
"arch": "Ampere",
|
||
|
"pstate": "P8",
|
||
|
"uuid": "GPU-12345678-aaaa-bbbb-cccc-0123456789ab",
|
||
|
},
|
||
|
map[string]interface{}{
|
||
|
"clocks_current_graphics": 0,
|
||
|
"clocks_current_memory": 405,
|
||
|
"clocks_current_sm": 0,
|
||
|
"clocks_current_video": 555,
|
||
|
"cuda_version": "12.0",
|
||
|
"display_active": "Disabled",
|
||
|
"display_mode": "Disabled",
|
||
|
"driver_version": "525.147.05",
|
||
|
"encoder_stats_average_fps": 0,
|
||
|
"encoder_stats_average_latency": 0,
|
||
|
"encoder_stats_session_count": 0,
|
||
|
"fbc_stats_average_fps": 0,
|
||
|
"fbc_stats_average_latency": 0,
|
||
|
"fbc_stats_session_count": 0,
|
||
|
"fan_speed": 0,
|
||
|
"power_draw": 27.23,
|
||
|
"power_limit": 200.0,
|
||
|
"memory_free": 24258,
|
||
|
"memory_total": 24576,
|
||
|
"memory_used": 1,
|
||
|
"memory_reserved": 316,
|
||
|
"pcie_link_gen_current": 1,
|
||
|
"pcie_link_width_current": 16,
|
||
|
"temperature_gpu": 37,
|
||
|
"utilization_gpu": 0,
|
||
|
"utilization_memory": 0,
|
||
|
"utilization_encoder": 0,
|
||
|
"utilization_decoder": 0,
|
||
|
"vbios_version": "94.02.71.40.72",
|
||
|
},
|
||
|
time.Unix(1689872450, 0)),
|
||
|
},
|
||
|
},
|
||
|
{
|
||
|
name: "A100-SXM4 schema v12",
|
||
|
filename: "a100-sxm4-v12.xml",
|
||
|
expected: []telegraf.Metric{
|
||
|
testutil.MustMetric(
|
||
|
"nvidia_smi",
|
||
|
map[string]string{
|
||
|
"compute_mode": "Default",
|
||
|
"index": "0",
|
||
|
"name": "NVIDIA A100-SXM4-80GB",
|
||
|
"arch": "Ampere",
|
||
|
"pstate": "P0",
|
||
|
"uuid": "GPU-513536b6-7d19-9063-b049-1e69664bb298",
|
||
|
},
|
||
|
map[string]interface{}{
|
||
|
"clocks_current_graphics": 1275,
|
||
|
"clocks_current_memory": 1593,
|
||
|
"clocks_current_sm": 1275,
|
||
|
"clocks_current_video": 1275,
|
||
|
"cuda_version": "12.2",
|
||
|
"current_ecc": "Enabled",
|
||
|
"display_active": "Disabled",
|
||
|
"display_mode": "Enabled",
|
||
|
"driver_version": "535.54.03",
|
||
|
"encoder_stats_average_fps": 0,
|
||
|
"encoder_stats_average_latency": 0,
|
||
|
"encoder_stats_session_count": 0,
|
||
|
"fbc_stats_average_fps": 0,
|
||
|
"fbc_stats_average_latency": 0,
|
||
|
"fbc_stats_session_count": 0,
|
||
|
"power_draw": 67.03,
|
||
|
"memory_free": 80999,
|
||
|
"memory_total": 81920,
|
||
|
"memory_used": 50,
|
||
|
"memory_reserved": 869,
|
||
|
"pcie_link_gen_current": 4,
|
||
|
"pcie_link_width_current": 16,
|
||
|
"serial": "1650522003820",
|
||
|
"temperature_gpu": 27,
|
||
|
"vbios_version": "92.00.36.00.02",
|
||
|
},
|
||
|
time.Unix(1689872450, 0)),
|
||
|
testutil.MustMetric(
|
||
|
"nvidia_smi_mig",
|
||
|
map[string]string{
|
||
|
"compute_mode": "Default",
|
||
|
"index": "0",
|
||
|
"name": "NVIDIA A100-SXM4-80GB",
|
||
|
"arch": "Ampere",
|
||
|
"pstate": "P0",
|
||
|
"uuid": "GPU-513536b6-7d19-9063-b049-1e69664bb298",
|
||
|
"compute_index": "0",
|
||
|
"gpu_index": "3",
|
||
|
},
|
||
|
map[string]interface{}{
|
||
|
"memory_bar1_free": 32767,
|
||
|
"memory_bar1_total": 32767,
|
||
|
"memory_bar1_used": 0,
|
||
|
"memory_fb_free": 19955,
|
||
|
"memory_fb_reserved": 0,
|
||
|
"memory_fb_total": 19968,
|
||
|
"memory_fb_used": 12,
|
||
|
"sram_uncorrectable": 0,
|
||
|
},
|
||
|
time.Unix(1689872450, 0)),
|
||
|
testutil.MustMetric(
|
||
|
"nvidia_smi_mig",
|
||
|
map[string]string{
|
||
|
"compute_mode": "Default",
|
||
|
"index": "1",
|
||
|
"name": "NVIDIA A100-SXM4-80GB",
|
||
|
"arch": "Ampere",
|
||
|
"pstate": "P0",
|
||
|
"uuid": "GPU-513536b6-7d19-9063-b049-1e69664bb298",
|
||
|
"compute_index": "0",
|
||
|
"gpu_index": "4",
|
||
|
},
|
||
|
map[string]interface{}{
|
||
|
"memory_bar1_free": 32767,
|
||
|
"memory_bar1_total": 32767,
|
||
|
"memory_bar1_used": 0,
|
||
|
"memory_fb_free": 19955,
|
||
|
"memory_fb_reserved": 0,
|
||
|
"memory_fb_total": 19968,
|
||
|
"memory_fb_used": 12,
|
||
|
"sram_uncorrectable": 0,
|
||
|
},
|
||
|
time.Unix(1689872450, 0)),
|
||
|
testutil.MustMetric(
|
||
|
"nvidia_smi_mig",
|
||
|
map[string]string{
|
||
|
"compute_mode": "Default",
|
||
|
"index": "2",
|
||
|
"name": "NVIDIA A100-SXM4-80GB",
|
||
|
"arch": "Ampere",
|
||
|
"pstate": "P0",
|
||
|
"uuid": "GPU-513536b6-7d19-9063-b049-1e69664bb298",
|
||
|
"compute_index": "0",
|
||
|
"gpu_index": "5",
|
||
|
},
|
||
|
map[string]interface{}{
|
||
|
"memory_bar1_free": 32767,
|
||
|
"memory_bar1_total": 32767,
|
||
|
"memory_bar1_used": 0,
|
||
|
"memory_fb_free": 19955,
|
||
|
"memory_fb_reserved": 0,
|
||
|
"memory_fb_total": 19968,
|
||
|
"memory_fb_used": 12,
|
||
|
"sram_uncorrectable": 0,
|
||
|
},
|
||
|
time.Unix(1689872450, 0)),
|
||
|
testutil.MustMetric(
|
||
|
"nvidia_smi_mig",
|
||
|
map[string]string{
|
||
|
"compute_mode": "Default",
|
||
|
"index": "3",
|
||
|
"name": "NVIDIA A100-SXM4-80GB",
|
||
|
"arch": "Ampere",
|
||
|
"pstate": "P0",
|
||
|
"uuid": "GPU-513536b6-7d19-9063-b049-1e69664bb298",
|
||
|
"compute_index": "0",
|
||
|
"gpu_index": "6",
|
||
|
},
|
||
|
map[string]interface{}{
|
||
|
"memory_bar1_free": 32767,
|
||
|
"memory_bar1_total": 32767,
|
||
|
"memory_bar1_used": 0,
|
||
|
"memory_fb_free": 19955,
|
||
|
"memory_fb_reserved": 0,
|
||
|
"memory_fb_total": 19968,
|
||
|
"memory_fb_used": 12,
|
||
|
"sram_uncorrectable": 0,
|
||
|
},
|
||
|
time.Unix(1689872450, 0)),
|
||
|
},
|
||
|
},
|
||
|
}
|
||
|
for _, tt := range tests {
|
||
|
t.Run(tt.name, func(t *testing.T) {
|
||
|
octets, err := os.ReadFile(filepath.Join("testdata", tt.filename))
|
||
|
require.NoError(t, err)
|
||
|
|
||
|
plugin := &NvidiaSMI{Log: &testutil.Logger{}}
|
||
|
|
||
|
var acc testutil.Accumulator
|
||
|
require.NoError(t, plugin.parse(&acc, octets))
|
||
|
testutil.RequireMetricsEqual(t, tt.expected, acc.GetTelegrafMetrics(), testutil.IgnoreTime())
|
||
|
})
|
||
|
}
|
||
|
}
|