1
0
Fork 0
telegraf/plugins/parsers/nagios/parser.go
Daniel Baumann 4978089aab
Adding upstream version 1.34.4.
Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-05-24 07:26:29 +02:00

323 lines
7.6 KiB
Go

package nagios
import (
"bufio"
"bytes"
"errors"
"os/exec"
"regexp"
"strconv"
"strings"
"syscall"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/metric"
"github.com/influxdata/telegraf/plugins/parsers"
)
// unknownExitCode is the nagios unknown status code
// the exit code should be used if an error occurs or something unexpected happens
const unknownExitCode = 3
// getExitCode get the exit code from an error value which is the result
// of running a command through exec package api.
func getExitCode(err error) (int, error) {
if err == nil {
return 0, nil
}
var ee *exec.ExitError
if !errors.As(err, &ee) {
return unknownExitCode, err
}
ws, ok := ee.Sys().(syscall.WaitStatus)
if !ok {
return 0, errors.New("expected syscall.WaitStatus")
}
return ws.ExitStatus(), nil
}
// AddState adds a state derived from the runErr. Unknown state will be set as fallback.
// If any error occurs, it is guaranteed to be added to the service output.
// An updated slice of metrics will be returned.
func AddState(runErr error, errMessage []byte, metrics []telegraf.Metric) []telegraf.Metric {
state, exitErr := getExitCode(runErr)
// This will ensure that in every error case the valid nagios state 'unknown' will be returned.
// No error needs to be thrown because the output will contain the error information.
// Description found at 'Plugin Return Codes' https://nagios-plugins.org/doc/guidelines.html
if exitErr != nil || state < 0 || state > unknownExitCode {
state = unknownExitCode
}
for _, m := range metrics {
if m.Name() == "nagios_state" {
m.AddField("state", state)
if state == unknownExitCode {
errorMessage := string(errMessage)
if exitErr != nil && exitErr.Error() != "" {
errorMessage = exitErr.Error()
}
value, ok := m.GetField("service_output")
if !ok || value == "" {
// By adding the error message as output, the metric contains all needed information to understand
// the problem and fix it
m.AddField("service_output", errorMessage)
}
}
return metrics
}
}
var ts time.Time
if len(metrics) != 0 {
ts = metrics[0].Time()
} else {
ts = time.Now().UTC()
}
f := map[string]interface{}{
"state": state,
}
m := metric.New("nagios_state", nil, f, ts)
return append(metrics, m)
}
type Parser struct {
DefaultTags map[string]string `toml:"-"`
Log telegraf.Logger `toml:"-"`
metricName string
}
// Got from Alignak
// https://github.com/Alignak-monitoring/alignak/blob/develop/alignak/misc/perfdata.py
var (
perfSplitRegExp = regexp.MustCompile(`([^=]+=\S+)`)
nagiosRegExp = regexp.MustCompile(
`^([^=]+)=([\d\.\-\+eE]+)([\w\/%]*);?([\d\.\-\+eE:~@]+)?;?([\d\.\-\+eE:~@]+)?;?([\d\.\-\+eE]+)?;?([\d\.\-\+eE]+)?;?\s*`,
)
)
func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
metrics, err := p.Parse([]byte(line))
return metrics[0], err
}
func (p *Parser) SetDefaultTags(tags map[string]string) {
p.DefaultTags = tags
}
func (p *Parser) Parse(buf []byte) ([]telegraf.Metric, error) {
ts := time.Now().UTC()
s := bufio.NewScanner(bytes.NewReader(buf))
var msg bytes.Buffer
var longmsg bytes.Buffer
metrics := make([]telegraf.Metric, 0)
// Scan the first line.
if !s.Scan() && s.Err() != nil {
return nil, s.Err()
}
parts := bytes.Split(s.Bytes(), []byte{'|'})
switch len(parts) {
case 2:
ms, err := parsePerfData(string(parts[1]), ts)
if err != nil {
p.Log.Errorf("Failed to parse performance data: %s\n", err.Error())
}
metrics = append(metrics, ms...)
fallthrough
case 1:
msg.Write(bytes.TrimSpace(parts[0]))
default:
return nil, errors.New("illegal output format")
}
// Read long output.
for s.Scan() {
if bytes.Contains(s.Bytes(), []byte{'|'}) {
parts := bytes.Split(s.Bytes(), []byte{'|'})
if longmsg.Len() != 0 {
longmsg.WriteByte('\n')
}
longmsg.Write(bytes.TrimSpace(parts[0]))
ms, err := parsePerfData(string(parts[1]), ts)
if err != nil {
p.Log.Errorf("Failed to parse performance data: %s\n", err.Error())
}
metrics = append(metrics, ms...)
break
}
if longmsg.Len() != 0 {
longmsg.WriteByte('\n')
}
longmsg.Write(bytes.TrimSpace(s.Bytes()))
}
// Parse extra performance data.
for s.Scan() {
ms, err := parsePerfData(s.Text(), ts)
if err != nil {
p.Log.Errorf("Failed to parse performance data: %s\n", err.Error())
}
metrics = append(metrics, ms...)
}
if s.Err() != nil {
p.Log.Debugf("Unexpected io error: %s\n", s.Err())
}
// Create nagios state.
fields := map[string]interface{}{
"service_output": msg.String(),
}
if longmsg.Len() != 0 {
fields["long_service_output"] = longmsg.String()
}
m := metric.New("nagios_state", nil, fields, ts)
metrics = append(metrics, m)
return metrics, nil
}
func parsePerfData(perfdatas string, timestamp time.Time) ([]telegraf.Metric, error) {
metrics := make([]telegraf.Metric, 0)
for _, unParsedPerf := range perfSplitRegExp.FindAllString(perfdatas, -1) {
trimmedPerf := strings.TrimSpace(unParsedPerf)
perf := nagiosRegExp.FindStringSubmatch(trimmedPerf)
// verify at least `'label'=value[UOM];` existed
if len(perf) < 3 {
continue
}
if perf[1] == "" || perf[2] == "" {
continue
}
fieldName := strings.Trim(perf[1], "'")
tags := map[string]string{"perfdata": fieldName}
if perf[3] != "" {
str := perf[3]
if str != "" {
tags["unit"] = str
}
}
fields := make(map[string]interface{})
if perf[2] == "U" {
return nil, errors.New("value undetermined")
}
f, err := strconv.ParseFloat(perf[2], 64)
if err == nil {
fields["value"] = f
}
if perf[4] != "" {
low, high, err := parseThreshold(perf[4])
if err == nil {
if strings.Contains(perf[4], "@") {
fields["warning_le"] = low
fields["warning_ge"] = high
} else {
fields["warning_lt"] = low
fields["warning_gt"] = high
}
}
}
if perf[5] != "" {
low, high, err := parseThreshold(perf[5])
if err == nil {
if strings.Contains(perf[5], "@") {
fields["critical_le"] = low
fields["critical_ge"] = high
} else {
fields["critical_lt"] = low
fields["critical_gt"] = high
}
}
}
if perf[6] != "" {
f, err := strconv.ParseFloat(perf[6], 64)
if err == nil {
fields["min"] = f
}
}
if perf[7] != "" {
f, err := strconv.ParseFloat(perf[7], 64)
if err == nil {
fields["max"] = f
}
}
// Create metric
m := metric.New("nagios", tags, fields, timestamp)
// Add Metric
metrics = append(metrics, m)
}
return metrics, nil
}
// from math
const (
MaxFloat64 = 1.797693134862315708145274237317043567981e+308 // 2**1023 * (2**53 - 1) / 2**52
MinFloat64 = 4.940656458412465441765687928682213723651e-324 // 1 / 2**(1023 - 1 + 52)
)
var ErrBadThresholdFormat = errors.New("bad threshold format")
// Handles all cases from https://nagios-plugins.org/doc/guidelines.html#THRESHOLDFORMAT
func parseThreshold(threshold string) (vmin, vmax float64, err error) {
thresh := strings.Split(threshold, ":")
switch len(thresh) {
case 1:
vmax, err = strconv.ParseFloat(thresh[0], 64)
if err != nil {
return 0, 0, ErrBadThresholdFormat
}
return 0, vmax, nil
case 2:
if thresh[0] == "~" {
vmin = MinFloat64
} else {
vmin, err = strconv.ParseFloat(thresh[0], 64)
if err != nil {
vmin = 0
}
}
if thresh[1] == "" {
vmax = MaxFloat64
} else {
vmax, err = strconv.ParseFloat(thresh[1], 64)
if err != nil {
return 0, 0, ErrBadThresholdFormat
}
}
default:
return 0, 0, ErrBadThresholdFormat
}
return vmin, vmax, err
}
func init() {
// Register parser
parsers.Add("nagios",
func(defaultMetricName string) telegraf.Parser {
return &Parser{metricName: defaultMetricName}
},
)
}