197 lines
8.2 KiB
Go
197 lines
8.2 KiB
Go
|
package smartctl
|
||
|
|
||
|
import (
|
||
|
"encoding/json"
|
||
|
"fmt"
|
||
|
"time"
|
||
|
|
||
|
"github.com/influxdata/telegraf"
|
||
|
"github.com/influxdata/telegraf/internal"
|
||
|
)
|
||
|
|
||
|
func (s *Smartctl) scanDevice(acc telegraf.Accumulator, deviceName, deviceType string) error {
|
||
|
args := []string{"--json", "--all", deviceName, "--device", deviceType, "--nocheck=" + s.NoCheck}
|
||
|
cmd := execCommand(s.Path, args...)
|
||
|
if s.UseSudo {
|
||
|
cmd = execCommand("sudo", append([]string{"-n", s.Path}, args...)...)
|
||
|
}
|
||
|
|
||
|
var device smartctlDeviceJSON
|
||
|
out, err := internal.CombinedOutputTimeout(cmd, time.Duration(s.Timeout))
|
||
|
if err != nil {
|
||
|
// Error running the command and unable to parse the JSON, then bail
|
||
|
if jsonErr := json.Unmarshal(out, &device); jsonErr != nil {
|
||
|
return fmt.Errorf("error running smartctl with %s: %w", args, err)
|
||
|
}
|
||
|
|
||
|
// If we were able to parse the result, then only exit if we get an error
|
||
|
// as sometimes we can get warnings, that still produce data.
|
||
|
if len(device.Smartctl.Messages) > 0 &&
|
||
|
device.Smartctl.Messages[0].Severity == "error" &&
|
||
|
device.Smartctl.Messages[0].String != "" {
|
||
|
return fmt.Errorf("error running smartctl with %s got smartctl error message: %s", args, device.Smartctl.Messages[0].String)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if err := json.Unmarshal(out, &device); err != nil {
|
||
|
return fmt.Errorf("error unable to unmarshall response %s: %w", args, err)
|
||
|
}
|
||
|
|
||
|
t := time.Now()
|
||
|
|
||
|
tags := map[string]string{
|
||
|
"name": device.Device.Name,
|
||
|
"type": device.Device.Type,
|
||
|
"serial": device.SerialNumber,
|
||
|
}
|
||
|
|
||
|
if device.ModelName != "" {
|
||
|
tags["model"] = device.ModelName
|
||
|
}
|
||
|
if device.Vendor != "" {
|
||
|
tags["vendor"] = device.Vendor
|
||
|
}
|
||
|
|
||
|
// The JSON WWN is in decimal and needs to be converted to hex
|
||
|
if device.Wwn.ID != 0 && device.Wwn.Naa != 0 && device.Wwn.Oui != 0 {
|
||
|
tags["wwn"] = fmt.Sprintf("%01x%06x%09x", device.Wwn.Naa, device.Wwn.Oui, device.Wwn.ID)
|
||
|
}
|
||
|
|
||
|
fields := map[string]interface{}{
|
||
|
"capacity": device.UserCapacity.Bytes,
|
||
|
"health_ok": device.SmartStatus.Passed,
|
||
|
"temperature": device.Temperature.Current,
|
||
|
"firmware": device.FirmwareVersion,
|
||
|
}
|
||
|
|
||
|
if device.SCSIVendor != "" {
|
||
|
fields["scsi_vendor"] = device.SCSIVendor
|
||
|
}
|
||
|
if device.SCSIModelName != "" {
|
||
|
fields["scsi_model"] = device.SCSIModelName
|
||
|
}
|
||
|
if device.SCSIRevision != "" {
|
||
|
fields["scsi_revision"] = device.SCSIRevision
|
||
|
}
|
||
|
if device.SCSIVersion != "" {
|
||
|
fields["scsi_version"] = device.SCSIVersion
|
||
|
}
|
||
|
if device.SCSITransportProtocol.Name != "" {
|
||
|
fields["scsi_transport_protocol"] = device.SCSITransportProtocol.Name
|
||
|
}
|
||
|
if device.SCSIProtectionType != 0 {
|
||
|
fields["scsi_protection_type"] = device.SCSIProtectionType
|
||
|
}
|
||
|
if device.SCSIProtectionIntervalBytesPerLB != 0 {
|
||
|
fields["scsi_protection_interval_bytes_per_lb"] = device.SCSIProtectionIntervalBytesPerLB
|
||
|
}
|
||
|
if device.SCSIGrownDefectList != 0 {
|
||
|
fields["scsi_grown_defect_list"] = device.SCSIGrownDefectList
|
||
|
}
|
||
|
if device.LogicalBlockSize != 0 {
|
||
|
fields["logical_block_size"] = device.LogicalBlockSize
|
||
|
}
|
||
|
if device.RotationRate != 0 {
|
||
|
fields["rotation_rate"] = device.RotationRate
|
||
|
}
|
||
|
if device.SCSIStartStopCycleCounter.SpecifiedCycleCountOverDeviceLifetime != 0 {
|
||
|
fields["specified_cycle_count_over_device_lifetime"] = device.SCSIStartStopCycleCounter.SpecifiedCycleCountOverDeviceLifetime
|
||
|
}
|
||
|
if device.SCSIStartStopCycleCounter.AccumulatedStartStopCycles != 0 {
|
||
|
fields["accumulated_start_stop_cycles"] = device.SCSIStartStopCycleCounter.AccumulatedStartStopCycles
|
||
|
}
|
||
|
if device.PowerOnTime.Hours != 0 {
|
||
|
fields["power_on_hours"] = device.PowerOnTime.Hours
|
||
|
}
|
||
|
if device.PowerOnTime.Minutes != 0 {
|
||
|
fields["power_on_minutes"] = device.PowerOnTime.Minutes
|
||
|
}
|
||
|
|
||
|
// Add NVMe specific fields
|
||
|
if device.Device.Type == "nvme" {
|
||
|
fields["critical_warning"] = device.NvmeSmartHealthInformationLog.CriticalWarning
|
||
|
fields["temperature"] = device.NvmeSmartHealthInformationLog.Temperature
|
||
|
fields["available_spare"] = device.NvmeSmartHealthInformationLog.AvailableSpare
|
||
|
fields["available_spare_threshold"] = device.NvmeSmartHealthInformationLog.AvailableSpareThreshold
|
||
|
fields["percentage_used"] = device.NvmeSmartHealthInformationLog.PercentageUsed
|
||
|
fields["data_units_read"] = device.NvmeSmartHealthInformationLog.DataUnitsRead
|
||
|
fields["data_units_written"] = device.NvmeSmartHealthInformationLog.DataUnitsWritten
|
||
|
fields["host_reads"] = device.NvmeSmartHealthInformationLog.HostReads
|
||
|
fields["host_writes"] = device.NvmeSmartHealthInformationLog.HostWrites
|
||
|
fields["controller_busy_time"] = device.NvmeSmartHealthInformationLog.ControllerBusyTime
|
||
|
fields["power_cycles"] = device.NvmeSmartHealthInformationLog.PowerCycles
|
||
|
fields["power_on_hours"] = device.NvmeSmartHealthInformationLog.PowerOnHours
|
||
|
fields["unsafe_shutdowns"] = device.NvmeSmartHealthInformationLog.UnsafeShutdowns
|
||
|
fields["media_errors"] = device.NvmeSmartHealthInformationLog.MediaErrors
|
||
|
fields["num_err_log_entries"] = device.NvmeSmartHealthInformationLog.NumErrLogEntries
|
||
|
fields["warning_temp_time"] = device.NvmeSmartHealthInformationLog.WarningTempTime
|
||
|
fields["critical_comp_time"] = device.NvmeSmartHealthInformationLog.CriticalCompTime
|
||
|
}
|
||
|
|
||
|
acc.AddFields("smartctl", fields, tags, t)
|
||
|
|
||
|
// Check for ATA specific attribute fields
|
||
|
for _, attribute := range device.AtaSmartAttributes.Table {
|
||
|
attributeTags := make(map[string]string, len(tags)+1)
|
||
|
for k, v := range tags {
|
||
|
attributeTags[k] = v
|
||
|
}
|
||
|
attributeTags["name"] = attribute.Name
|
||
|
|
||
|
fields := map[string]interface{}{
|
||
|
"raw_value": attribute.Raw.Value,
|
||
|
"worst": attribute.Worst,
|
||
|
"threshold": attribute.Thresh,
|
||
|
"value": attribute.Value,
|
||
|
}
|
||
|
|
||
|
acc.AddFields("smartctl_attributes", fields, attributeTags, t)
|
||
|
}
|
||
|
|
||
|
// Check for SCSI error counter entries
|
||
|
if device.Device.Type == "scsi" {
|
||
|
counterTags := make(map[string]string, len(tags)+1)
|
||
|
for k, v := range tags {
|
||
|
counterTags[k] = v
|
||
|
}
|
||
|
|
||
|
counterTags["page"] = "read"
|
||
|
fields := map[string]interface{}{
|
||
|
"errors_corrected_by_eccfast": device.ScsiErrorCounterLog.Read.ErrorsCorrectedByEccfast,
|
||
|
"errors_corrected_by_eccdelayed": device.ScsiErrorCounterLog.Read.ErrorsCorrectedByEccdelayed,
|
||
|
"errors_corrected_by_rereads_rewrites": device.ScsiErrorCounterLog.Read.ErrorsCorrectedByRereadsRewrites,
|
||
|
"total_errors_corrected": device.ScsiErrorCounterLog.Read.TotalErrorsCorrected,
|
||
|
"correction_algorithm_invocations": device.ScsiErrorCounterLog.Read.CorrectionAlgorithmInvocations,
|
||
|
"gigabytes_processed": device.ScsiErrorCounterLog.Read.GigabytesProcessed,
|
||
|
"total_uncorrected_errors": device.ScsiErrorCounterLog.Read.TotalUncorrectedErrors,
|
||
|
}
|
||
|
acc.AddFields("smartctl_scsi_error_counter_log", fields, counterTags, t)
|
||
|
|
||
|
counterTags["page"] = "write"
|
||
|
fields = map[string]interface{}{
|
||
|
"errors_corrected_by_eccfast": device.ScsiErrorCounterLog.Write.ErrorsCorrectedByEccfast,
|
||
|
"errors_corrected_by_eccdelayed": device.ScsiErrorCounterLog.Write.ErrorsCorrectedByEccdelayed,
|
||
|
"errors_corrected_by_rereads_rewrites": device.ScsiErrorCounterLog.Write.ErrorsCorrectedByRereadsRewrites,
|
||
|
"total_errors_corrected": device.ScsiErrorCounterLog.Write.TotalErrorsCorrected,
|
||
|
"correction_algorithm_invocations": device.ScsiErrorCounterLog.Write.CorrectionAlgorithmInvocations,
|
||
|
"gigabytes_processed": device.ScsiErrorCounterLog.Write.GigabytesProcessed,
|
||
|
"total_uncorrected_errors": device.ScsiErrorCounterLog.Write.TotalUncorrectedErrors,
|
||
|
}
|
||
|
acc.AddFields("smartctl_scsi_error_counter_log", fields, counterTags, t)
|
||
|
|
||
|
counterTags["page"] = "verify"
|
||
|
fields = map[string]interface{}{
|
||
|
"errors_corrected_by_eccfast": device.ScsiErrorCounterLog.Verify.ErrorsCorrectedByEccfast,
|
||
|
"errors_corrected_by_eccdelayed": device.ScsiErrorCounterLog.Verify.ErrorsCorrectedByEccdelayed,
|
||
|
"errors_corrected_by_rereads_rewrites": device.ScsiErrorCounterLog.Verify.ErrorsCorrectedByRereadsRewrites,
|
||
|
"total_errors_corrected": device.ScsiErrorCounterLog.Verify.TotalErrorsCorrected,
|
||
|
"correction_algorithm_invocations": device.ScsiErrorCounterLog.Verify.CorrectionAlgorithmInvocations,
|
||
|
"gigabytes_processed": device.ScsiErrorCounterLog.Verify.GigabytesProcessed,
|
||
|
"total_uncorrected_errors": device.ScsiErrorCounterLog.Verify.TotalUncorrectedErrors,
|
||
|
}
|
||
|
acc.AddFields("smartctl_scsi_error_counter_log", fields, counterTags, t)
|
||
|
}
|
||
|
|
||
|
return nil
|
||
|
}
|