1
0
Fork 0

Adding upstream version 1.34.4.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-05-24 07:26:29 +02:00
parent e393c3af3f
commit 4978089aab
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
4963 changed files with 677545 additions and 0 deletions

View file

@ -0,0 +1,127 @@
package schema_v12
import (
"encoding/xml"
"strconv"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs/nvidia_smi/common"
)
// Parse parses the XML-encoded data from nvidia-smi and adds measurements.
func Parse(acc telegraf.Accumulator, buf []byte) error {
var s smi
if err := xml.Unmarshal(buf, &s); err != nil {
return err
}
timestamp := time.Now()
if s.Timestamp != "" {
if t, err := time.ParseInLocation(time.ANSIC, s.Timestamp, time.Local); err == nil {
timestamp = t
}
}
for i := range s.Gpu {
gpu := &s.Gpu[i]
tags := map[string]string{
"index": strconv.Itoa(i),
}
fields := make(map[string]interface{}, 44)
common.SetTagIfUsed(tags, "pstate", gpu.PerformanceState)
common.SetTagIfUsed(tags, "name", gpu.ProductName)
common.SetTagIfUsed(tags, "arch", gpu.ProductArchitecture)
common.SetTagIfUsed(tags, "uuid", gpu.UUID)
common.SetTagIfUsed(tags, "compute_mode", gpu.ComputeMode)
common.SetIfUsed("str", fields, "driver_version", s.DriverVersion)
common.SetIfUsed("str", fields, "cuda_version", s.CudaVersion)
common.SetIfUsed("str", fields, "serial", gpu.Serial)
common.SetIfUsed("str", fields, "vbios_version", gpu.VbiosVersion)
common.SetIfUsed("str", fields, "display_active", gpu.DisplayActive)
common.SetIfUsed("str", fields, "display_mode", gpu.DisplayMode)
common.SetIfUsed("str", fields, "current_ecc", gpu.EccMode.CurrentEcc)
common.SetIfUsed("int", fields, "fan_speed", gpu.FanSpeed)
common.SetIfUsed("int", fields, "memory_total", gpu.FbMemoryUsage.Total)
common.SetIfUsed("int", fields, "memory_used", gpu.FbMemoryUsage.Used)
common.SetIfUsed("int", fields, "memory_free", gpu.FbMemoryUsage.Free)
common.SetIfUsed("int", fields, "memory_reserved", gpu.FbMemoryUsage.Reserved)
common.SetIfUsed("int", fields, "retired_pages_multiple_single_bit", gpu.RetiredPages.MultipleSingleBitRetirement.RetiredCount)
common.SetIfUsed("int", fields, "retired_pages_double_bit", gpu.RetiredPages.DoubleBitRetirement.RetiredCount)
common.SetIfUsed("str", fields, "retired_pages_blacklist", gpu.RetiredPages.PendingBlacklist)
common.SetIfUsed("str", fields, "retired_pages_pending", gpu.RetiredPages.PendingRetirement)
common.SetIfUsed("int", fields, "remapped_rows_correctable", gpu.RemappedRows.Correctable)
common.SetIfUsed("int", fields, "remapped_rows_uncorrectable", gpu.RemappedRows.Uncorrectable)
common.SetIfUsed("str", fields, "remapped_rows_pending", gpu.RemappedRows.Pending)
common.SetIfUsed("str", fields, "remapped_rows_failure", gpu.RemappedRows.Failure)
common.SetIfUsed("int", fields, "temperature_gpu", gpu.Temperature.GpuTemp)
common.SetIfUsed("int", fields, "utilization_gpu", gpu.Utilization.GpuUtil)
common.SetIfUsed("int", fields, "utilization_memory", gpu.Utilization.MemoryUtil)
common.SetIfUsed("int", fields, "utilization_encoder", gpu.Utilization.EncoderUtil)
common.SetIfUsed("int", fields, "utilization_decoder", gpu.Utilization.DecoderUtil)
common.SetIfUsed("int", fields, "utilization_jpeg", gpu.Utilization.JpegUtil)
common.SetIfUsed("int", fields, "utilization_ofa", gpu.Utilization.OfaUtil)
common.SetIfUsed("int", fields, "pcie_link_gen_current", gpu.Pci.PciGpuLinkInfo.PcieGen.CurrentLinkGen)
common.SetIfUsed("int", fields, "pcie_link_width_current", gpu.Pci.PciGpuLinkInfo.LinkWidths.CurrentLinkWidth)
common.SetIfUsed("int", fields, "encoder_stats_session_count", gpu.EncoderStats.SessionCount)
common.SetIfUsed("int", fields, "encoder_stats_average_fps", gpu.EncoderStats.AverageFps)
common.SetIfUsed("int", fields, "encoder_stats_average_latency", gpu.EncoderStats.AverageLatency)
common.SetIfUsed("int", fields, "fbc_stats_session_count", gpu.FbcStats.SessionCount)
common.SetIfUsed("int", fields, "fbc_stats_average_fps", gpu.FbcStats.AverageFps)
common.SetIfUsed("int", fields, "fbc_stats_average_latency", gpu.FbcStats.AverageLatency)
common.SetIfUsed("int", fields, "clocks_current_graphics", gpu.Clocks.GraphicsClock)
common.SetIfUsed("int", fields, "clocks_current_sm", gpu.Clocks.SmClock)
common.SetIfUsed("int", fields, "clocks_current_memory", gpu.Clocks.MemClock)
common.SetIfUsed("int", fields, "clocks_current_video", gpu.Clocks.VideoClock)
common.SetIfUsed("float", fields, "power_draw", gpu.PowerReadings.PowerDraw)
common.SetIfUsed("float", fields, "power_draw", gpu.PowerReadings.InstantPowerDraw)
common.SetIfUsed("float", fields, "power_limit", gpu.PowerReadings.PowerLimit)
common.SetIfUsed("float", fields, "power_draw", gpu.GpuPowerReadings.PowerDraw)
common.SetIfUsed("float", fields, "power_draw", gpu.GpuPowerReadings.InstantPowerDraw)
common.SetIfUsed("float", fields, "power_limit", gpu.GpuPowerReadings.PowerLimit)
common.SetIfUsed("float", fields, "module_power_draw", gpu.ModulePowerReadings.PowerDraw)
common.SetIfUsed("float", fields, "module_power_draw", gpu.ModulePowerReadings.InstantPowerDraw)
acc.AddFields("nvidia_smi", fields, tags, timestamp)
for _, device := range gpu.MigDevices.MigDevice {
tags := make(map[string]string, 8)
common.SetTagIfUsed(tags, "index", device.Index)
common.SetTagIfUsed(tags, "gpu_index", device.GpuInstanceID)
common.SetTagIfUsed(tags, "compute_index", device.ComputeInstanceID)
common.SetTagIfUsed(tags, "pstate", gpu.PerformanceState)
common.SetTagIfUsed(tags, "name", gpu.ProductName)
common.SetTagIfUsed(tags, "arch", gpu.ProductArchitecture)
common.SetTagIfUsed(tags, "uuid", gpu.UUID)
common.SetTagIfUsed(tags, "compute_mode", gpu.ComputeMode)
fields := make(map[string]interface{}, 8)
common.SetIfUsed("int", fields, "sram_uncorrectable", device.EccErrorCount.VolatileCount.SramUncorrectable)
common.SetIfUsed("int", fields, "memory_fb_total", device.FbMemoryUsage.Total)
common.SetIfUsed("int", fields, "memory_fb_reserved", device.FbMemoryUsage.Reserved)
common.SetIfUsed("int", fields, "memory_fb_used", device.FbMemoryUsage.Used)
common.SetIfUsed("int", fields, "memory_fb_free", device.FbMemoryUsage.Free)
common.SetIfUsed("int", fields, "memory_bar1_total", device.Bar1MemoryUsage.Total)
common.SetIfUsed("int", fields, "memory_bar1_used", device.Bar1MemoryUsage.Used)
common.SetIfUsed("int", fields, "memory_bar1_free", device.Bar1MemoryUsage.Free)
acc.AddFields("nvidia_smi_mig", fields, tags, timestamp)
}
for _, process := range gpu.Processes.ProcessInfo {
tags := make(map[string]string, 2)
common.SetTagIfUsed(tags, "name", process.ProcessName)
common.SetTagIfUsed(tags, "type", process.Type)
fields := make(map[string]interface{}, 2)
common.SetIfUsed("int", fields, "pid", process.Pid)
common.SetIfUsed("int", fields, "used_memory", process.UsedMemory)
acc.AddFields("nvidia_smi_process", fields, tags, timestamp)
}
}
return nil
}

View file

@ -0,0 +1,302 @@
package schema_v12
// Generated by https://github.com/twpayne/go-xmlstruct with some type corrections.
type smi struct {
AttachedGpus string `xml:"attached_gpus"`
CudaVersion string `xml:"cuda_version"`
DriverVersion string `xml:"driver_version"`
Gpu []struct {
ID string `xml:"id,attr"`
AccountedProcesses struct{} `xml:"accounted_processes"`
AccountingMode string `xml:"accounting_mode"`
AccountingModeBufferSize string `xml:"accounting_mode_buffer_size"`
AddressingMode string `xml:"addressing_mode"`
ApplicationsClocks struct {
GraphicsClock string `xml:"graphics_clock"`
MemClock string `xml:"mem_clock"`
} `xml:"applications_clocks"`
Bar1MemoryUsage struct {
Free string `xml:"free"`
Total string `xml:"total"`
Used string `xml:"used"`
} `xml:"bar1_memory_usage"`
BoardID string `xml:"board_id"`
BoardPartNumber string `xml:"board_part_number"`
CcProtectedMemoryUsage struct {
Free string `xml:"free"`
Total string `xml:"total"`
Used string `xml:"used"`
} `xml:"cc_protected_memory_usage"`
ClockPolicy struct {
AutoBoost string `xml:"auto_boost"`
AutoBoostDefault string `xml:"auto_boost_default"`
} `xml:"clock_policy"`
Clocks struct {
GraphicsClock string `xml:"graphics_clock"`
MemClock string `xml:"mem_clock"`
SmClock string `xml:"sm_clock"`
VideoClock string `xml:"video_clock"`
} `xml:"clocks"`
ClocksEventReasons struct {
ClocksEventReasonApplicationsClocksSetting string `xml:"clocks_event_reason_applications_clocks_setting"`
ClocksEventReasonDisplayClocksSetting string `xml:"clocks_event_reason_display_clocks_setting"`
ClocksEventReasonGpuIdle string `xml:"clocks_event_reason_gpu_idle"`
ClocksEventReasonHwPowerBrakeSlowdown string `xml:"clocks_event_reason_hw_power_brake_slowdown"`
ClocksEventReasonHwSlowdown string `xml:"clocks_event_reason_hw_slowdown"`
ClocksEventReasonHwThermalSlowdown string `xml:"clocks_event_reason_hw_thermal_slowdown"`
ClocksEventReasonSwPowerCap string `xml:"clocks_event_reason_sw_power_cap"`
ClocksEventReasonSwThermalSlowdown string `xml:"clocks_event_reason_sw_thermal_slowdown"`
ClocksEventReasonSyncBoost string `xml:"clocks_event_reason_sync_boost"`
} `xml:"clocks_event_reasons"`
ComputeMode string `xml:"compute_mode"`
DefaultApplicationsClocks struct {
GraphicsClock string `xml:"graphics_clock"`
MemClock string `xml:"mem_clock"`
} `xml:"default_applications_clocks"`
DeferredClocks struct {
MemClock string `xml:"mem_clock"`
} `xml:"deferred_clocks"`
DisplayActive string `xml:"display_active"`
DisplayMode string `xml:"display_mode"`
DriverModel struct {
CurrentDm string `xml:"current_dm"`
PendingDm string `xml:"pending_dm"`
} `xml:"driver_model"`
EccErrors struct {
Aggregate struct {
DramCorrectable string `xml:"dram_correctable"`
DramUncorrectable string `xml:"dram_uncorrectable"`
SramCorrectable string `xml:"sram_correctable"`
SramUncorrectable string `xml:"sram_uncorrectable"`
} `xml:"aggregate"`
Volatile struct {
DramCorrectable string `xml:"dram_correctable"`
DramUncorrectable string `xml:"dram_uncorrectable"`
SramCorrectable string `xml:"sram_correctable"`
SramUncorrectable string `xml:"sram_uncorrectable"`
} `xml:"volatile"`
} `xml:"ecc_errors"`
EccMode struct {
CurrentEcc string `xml:"current_ecc"`
PendingEcc string `xml:"pending_ecc"`
} `xml:"ecc_mode"`
EncoderStats struct {
AverageFps string `xml:"average_fps"`
AverageLatency string `xml:"average_latency"`
SessionCount string `xml:"session_count"`
} `xml:"encoder_stats"`
Fabric struct {
State string `xml:"state"`
Status string `xml:"status"`
} `xml:"fabric"`
FanSpeed string `xml:"fan_speed"`
FbMemoryUsage struct {
Free string `xml:"free"`
Reserved string `xml:"reserved"`
Total string `xml:"total"`
Used string `xml:"used"`
} `xml:"fb_memory_usage"`
FbcStats struct {
AverageFps string `xml:"average_fps"`
AverageLatency string `xml:"average_latency"`
SessionCount string `xml:"session_count"`
} `xml:"fbc_stats"`
GpuFruPartNumber string `xml:"gpu_fru_part_number"`
GpuModuleID string `xml:"gpu_module_id"`
GpuOperationMode struct {
CurrentGom string `xml:"current_gom"`
PendingGom string `xml:"pending_gom"`
} `xml:"gpu_operation_mode"`
GpuPartNumber string `xml:"gpu_part_number"`
GpuPowerReadings struct {
CurrentPowerLimit string `xml:"current_power_limit"`
DefaultPowerLimit string `xml:"default_power_limit"`
MaxPowerLimit string `xml:"max_power_limit"`
MinPowerLimit string `xml:"min_power_limit"`
PowerDraw string `xml:"power_draw"`
AveragePowerDraw string `xml:"average_power_draw"`
InstantPowerDraw string `xml:"instant_power_draw"`
PowerLimit string `xml:"power_limit"`
PowerState string `xml:"power_state"`
RequestedPowerLimit string `xml:"requested_power_limit"`
} `xml:"gpu_power_readings"`
GpuResetStatus struct {
DrainAndResetRecommended string `xml:"drain_and_reset_recommended"`
ResetRequired string `xml:"reset_required"`
} `xml:"gpu_reset_status"`
GpuVirtualizationMode struct {
HostVgpuMode string `xml:"host_vgpu_mode"`
VirtualizationMode string `xml:"virtualization_mode"`
} `xml:"gpu_virtualization_mode"`
GspFirmwareVersion string `xml:"gsp_firmware_version"`
Ibmnpu struct {
RelaxedOrderingMode string `xml:"relaxed_ordering_mode"`
} `xml:"ibmnpu"`
InforomVersion struct {
EccObject string `xml:"ecc_object"`
ImgVersion string `xml:"img_version"`
OemObject string `xml:"oem_object"`
PwrObject string `xml:"pwr_object"`
} `xml:"inforom_version"`
MaxClocks struct {
GraphicsClock string `xml:"graphics_clock"`
MemClock string `xml:"mem_clock"`
SmClock string `xml:"sm_clock"`
VideoClock string `xml:"video_clock"`
} `xml:"max_clocks"`
MaxCustomerBoostClocks struct {
GraphicsClock string `xml:"graphics_clock"`
} `xml:"max_customer_boost_clocks"`
MigDevices struct {
MigDevice []struct {
Index string `xml:"index"`
GpuInstanceID string `xml:"gpu_instance_id"`
ComputeInstanceID string `xml:"compute_instance_id"`
EccErrorCount struct {
Text string `xml:",chardata" json:"text"`
VolatileCount struct {
SramUncorrectable string `xml:"sram_uncorrectable"`
} `xml:"volatile_count" json:"volatile_count"`
} `xml:"ecc_error_count" json:"ecc_error_count"`
FbMemoryUsage struct {
Total string `xml:"total"`
Reserved string `xml:"reserved"`
Used string `xml:"used"`
Free string `xml:"free"`
} `xml:"fb_memory_usage" json:"fb_memory_usage"`
Bar1MemoryUsage struct {
Total string `xml:"total"`
Used string `xml:"used"`
Free string `xml:"free"`
} `xml:"bar1_memory_usage" json:"bar1_memory_usage"`
} `xml:"mig_device" json:"mig_device"`
} `xml:"mig_devices" json:"mig_devices"`
MigMode struct {
CurrentMig string `xml:"current_mig"`
PendingMig string `xml:"pending_mig"`
} `xml:"mig_mode"`
MinorNumber string `xml:"minor_number"`
ModulePowerReadings struct {
CurrentPowerLimit string `xml:"current_power_limit"`
DefaultPowerLimit string `xml:"default_power_limit"`
MaxPowerLimit string `xml:"max_power_limit"`
MinPowerLimit string `xml:"min_power_limit"`
PowerDraw string `xml:"power_draw"`
AveragePowerDraw string `xml:"average_power_draw"`
InstantPowerDraw string `xml:"instant_power_draw"`
PowerState string `xml:"power_state"`
RequestedPowerLimit string `xml:"requested_power_limit"`
} `xml:"module_power_readings"`
MultigpuBoard string `xml:"multigpu_board"`
Pci struct {
AtomicCapsInbound string `xml:"atomic_caps_inbound"`
AtomicCapsOutbound string `xml:"atomic_caps_outbound"`
PciBridgeChip struct {
BridgeChipFw string `xml:"bridge_chip_fw"`
BridgeChipType string `xml:"bridge_chip_type"`
} `xml:"pci_bridge_chip"`
PciBus string `xml:"pci_bus"`
PciBusID string `xml:"pci_bus_id"`
PciDevice string `xml:"pci_device"`
PciDeviceID string `xml:"pci_device_id"`
PciDomain string `xml:"pci_domain"`
PciGpuLinkInfo struct {
LinkWidths struct {
CurrentLinkWidth string `xml:"current_link_width"`
MaxLinkWidth string `xml:"max_link_width"`
} `xml:"link_widths"`
PcieGen struct {
CurrentLinkGen string `xml:"current_link_gen"`
DeviceCurrentLinkGen string `xml:"device_current_link_gen"`
MaxDeviceLinkGen string `xml:"max_device_link_gen"`
MaxHostLinkGen string `xml:"max_host_link_gen"`
MaxLinkGen string `xml:"max_link_gen"`
} `xml:"pcie_gen"`
} `xml:"pci_gpu_link_info"`
PciSubSystemID string `xml:"pci_sub_system_id"`
ReplayCounter string `xml:"replay_counter"`
ReplayRolloverCounter string `xml:"replay_rollover_counter"`
RxUtil string `xml:"rx_util"`
TxUtil string `xml:"tx_util"`
} `xml:"pci"`
PerformanceState string `xml:"performance_state"`
PersistenceMode string `xml:"persistence_mode"`
PowerReadings struct {
PowerState string `xml:"power_state"`
PowerManagement string `xml:"power_management"`
PowerDraw string `xml:"power_draw"`
AveragePowerDraw string `xml:"average_power_draw"`
InstantPowerDraw string `xml:"instant_power_draw"`
PowerLimit string `xml:"power_limit"`
DefaultPowerLimit string `xml:"default_power_limit"`
EnforcedPowerLimit string `xml:"enforced_power_limit"`
MinPowerLimit string `xml:"min_power_limit"`
MaxPowerLimit string `xml:"max_power_limit"`
} `xml:"power_readings"`
Processes struct {
ProcessInfo []struct {
Pid string `xml:"pid"`
Type string `xml:"type"`
ProcessName string `xml:"process_name"`
UsedMemory string `xml:"used_memory"`
} `xml:"process_info"`
} `xml:"processes"`
ProductArchitecture string `xml:"product_architecture"`
ProductBrand string `xml:"product_brand"`
ProductName string `xml:"product_name"`
RemappedRows struct {
// Manually added
Correctable string `xml:"remapped_row_corr"`
Uncorrectable string `xml:"remapped_row_unc"`
Pending string `xml:"remapped_row_pending"`
Failure string `xml:"remapped_row_failure"`
} `xml:"remapped_rows"`
RetiredPages struct {
DoubleBitRetirement struct {
RetiredCount string `xml:"retired_count"`
RetiredPagelist string `xml:"retired_pagelist"`
} `xml:"double_bit_retirement"`
MultipleSingleBitRetirement struct {
RetiredCount string `xml:"retired_count"`
RetiredPagelist string `xml:"retired_pagelist"`
} `xml:"multiple_single_bit_retirement"`
PendingBlacklist string `xml:"pending_blacklist"`
PendingRetirement string `xml:"pending_retirement"`
} `xml:"retired_pages"`
Serial string `xml:"serial"`
SupportedClocks struct {
SupportedMemClock []struct {
SupportedGraphicsClock []string `xml:"supported_graphics_clock"`
Value string `xml:"value"`
} `xml:"supported_mem_clock"`
} `xml:"supported_clocks"`
SupportedGpuTargetTemp struct {
GpuTargetTempMax string `xml:"gpu_target_temp_max"`
GpuTargetTempMin string `xml:"gpu_target_temp_min"`
} `xml:"supported_gpu_target_temp"`
Temperature struct {
GpuTargetTemperature string `xml:"gpu_target_temperature"`
GpuTemp string `xml:"gpu_temp"`
GpuTempMaxGpuThreshold string `xml:"gpu_temp_max_gpu_threshold"`
GpuTempMaxMemThreshold string `xml:"gpu_temp_max_mem_threshold"`
GpuTempMaxThreshold string `xml:"gpu_temp_max_threshold"`
GpuTempSlowThreshold string `xml:"gpu_temp_slow_threshold"`
GpuTempTlimit string `xml:"gpu_temp_tlimit"`
MemoryTemp string `xml:"memory_temp"`
} `xml:"temperature"`
Utilization struct {
DecoderUtil string `xml:"decoder_util"`
EncoderUtil string `xml:"encoder_util"`
GpuUtil string `xml:"gpu_util"`
JpegUtil string `xml:"jpeg_util"`
MemoryUtil string `xml:"memory_util"`
OfaUtil string `xml:"ofa_util"`
} `xml:"utilization"`
UUID string `xml:"uuid"`
VbiosVersion string `xml:"vbios_version"`
Voltage struct {
GraphicsVolt string `xml:"graphics_volt"`
} `xml:"voltage"`
} `xml:"gpu"`
Timestamp string `xml:"timestamp"`
}