323 lines
7.6 KiB
Go
323 lines
7.6 KiB
Go
package nagios
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"errors"
|
|
"os/exec"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/influxdata/telegraf"
|
|
"github.com/influxdata/telegraf/metric"
|
|
"github.com/influxdata/telegraf/plugins/parsers"
|
|
)
|
|
|
|
// unknownExitCode is the nagios unknown status code
|
|
// the exit code should be used if an error occurs or something unexpected happens
|
|
const unknownExitCode = 3
|
|
|
|
// getExitCode get the exit code from an error value which is the result
|
|
// of running a command through exec package api.
|
|
func getExitCode(err error) (int, error) {
|
|
if err == nil {
|
|
return 0, nil
|
|
}
|
|
|
|
var ee *exec.ExitError
|
|
if !errors.As(err, &ee) {
|
|
return unknownExitCode, err
|
|
}
|
|
|
|
ws, ok := ee.Sys().(syscall.WaitStatus)
|
|
if !ok {
|
|
return 0, errors.New("expected syscall.WaitStatus")
|
|
}
|
|
|
|
return ws.ExitStatus(), nil
|
|
}
|
|
|
|
// AddState adds a state derived from the runErr. Unknown state will be set as fallback.
|
|
// If any error occurs, it is guaranteed to be added to the service output.
|
|
// An updated slice of metrics will be returned.
|
|
func AddState(runErr error, errMessage []byte, metrics []telegraf.Metric) []telegraf.Metric {
|
|
state, exitErr := getExitCode(runErr)
|
|
// This will ensure that in every error case the valid nagios state 'unknown' will be returned.
|
|
// No error needs to be thrown because the output will contain the error information.
|
|
// Description found at 'Plugin Return Codes' https://nagios-plugins.org/doc/guidelines.html
|
|
if exitErr != nil || state < 0 || state > unknownExitCode {
|
|
state = unknownExitCode
|
|
}
|
|
|
|
for _, m := range metrics {
|
|
if m.Name() == "nagios_state" {
|
|
m.AddField("state", state)
|
|
|
|
if state == unknownExitCode {
|
|
errorMessage := string(errMessage)
|
|
if exitErr != nil && exitErr.Error() != "" {
|
|
errorMessage = exitErr.Error()
|
|
}
|
|
value, ok := m.GetField("service_output")
|
|
if !ok || value == "" {
|
|
// By adding the error message as output, the metric contains all needed information to understand
|
|
// the problem and fix it
|
|
m.AddField("service_output", errorMessage)
|
|
}
|
|
}
|
|
return metrics
|
|
}
|
|
}
|
|
|
|
var ts time.Time
|
|
if len(metrics) != 0 {
|
|
ts = metrics[0].Time()
|
|
} else {
|
|
ts = time.Now().UTC()
|
|
}
|
|
f := map[string]interface{}{
|
|
"state": state,
|
|
}
|
|
m := metric.New("nagios_state", nil, f, ts)
|
|
|
|
return append(metrics, m)
|
|
}
|
|
|
|
type Parser struct {
|
|
DefaultTags map[string]string `toml:"-"`
|
|
Log telegraf.Logger `toml:"-"`
|
|
|
|
metricName string
|
|
}
|
|
|
|
// Got from Alignak
|
|
// https://github.com/Alignak-monitoring/alignak/blob/develop/alignak/misc/perfdata.py
|
|
var (
|
|
perfSplitRegExp = regexp.MustCompile(`([^=]+=\S+)`)
|
|
nagiosRegExp = regexp.MustCompile(
|
|
`^([^=]+)=([\d\.\-\+eE]+)([\w\/%]*);?([\d\.\-\+eE:~@]+)?;?([\d\.\-\+eE:~@]+)?;?([\d\.\-\+eE]+)?;?([\d\.\-\+eE]+)?;?\s*`,
|
|
)
|
|
)
|
|
|
|
func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
|
|
metrics, err := p.Parse([]byte(line))
|
|
return metrics[0], err
|
|
}
|
|
|
|
func (p *Parser) SetDefaultTags(tags map[string]string) {
|
|
p.DefaultTags = tags
|
|
}
|
|
|
|
func (p *Parser) Parse(buf []byte) ([]telegraf.Metric, error) {
|
|
ts := time.Now().UTC()
|
|
|
|
s := bufio.NewScanner(bytes.NewReader(buf))
|
|
|
|
var msg bytes.Buffer
|
|
var longmsg bytes.Buffer
|
|
|
|
metrics := make([]telegraf.Metric, 0)
|
|
|
|
// Scan the first line.
|
|
if !s.Scan() && s.Err() != nil {
|
|
return nil, s.Err()
|
|
}
|
|
parts := bytes.Split(s.Bytes(), []byte{'|'})
|
|
switch len(parts) {
|
|
case 2:
|
|
ms, err := parsePerfData(string(parts[1]), ts)
|
|
if err != nil {
|
|
p.Log.Errorf("Failed to parse performance data: %s\n", err.Error())
|
|
}
|
|
metrics = append(metrics, ms...)
|
|
fallthrough
|
|
case 1:
|
|
msg.Write(bytes.TrimSpace(parts[0]))
|
|
default:
|
|
return nil, errors.New("illegal output format")
|
|
}
|
|
|
|
// Read long output.
|
|
for s.Scan() {
|
|
if bytes.Contains(s.Bytes(), []byte{'|'}) {
|
|
parts := bytes.Split(s.Bytes(), []byte{'|'})
|
|
if longmsg.Len() != 0 {
|
|
longmsg.WriteByte('\n')
|
|
}
|
|
longmsg.Write(bytes.TrimSpace(parts[0]))
|
|
|
|
ms, err := parsePerfData(string(parts[1]), ts)
|
|
if err != nil {
|
|
p.Log.Errorf("Failed to parse performance data: %s\n", err.Error())
|
|
}
|
|
metrics = append(metrics, ms...)
|
|
break
|
|
}
|
|
if longmsg.Len() != 0 {
|
|
longmsg.WriteByte('\n')
|
|
}
|
|
longmsg.Write(bytes.TrimSpace(s.Bytes()))
|
|
}
|
|
|
|
// Parse extra performance data.
|
|
for s.Scan() {
|
|
ms, err := parsePerfData(s.Text(), ts)
|
|
if err != nil {
|
|
p.Log.Errorf("Failed to parse performance data: %s\n", err.Error())
|
|
}
|
|
metrics = append(metrics, ms...)
|
|
}
|
|
|
|
if s.Err() != nil {
|
|
p.Log.Debugf("Unexpected io error: %s\n", s.Err())
|
|
}
|
|
|
|
// Create nagios state.
|
|
fields := map[string]interface{}{
|
|
"service_output": msg.String(),
|
|
}
|
|
if longmsg.Len() != 0 {
|
|
fields["long_service_output"] = longmsg.String()
|
|
}
|
|
|
|
m := metric.New("nagios_state", nil, fields, ts)
|
|
metrics = append(metrics, m)
|
|
|
|
return metrics, nil
|
|
}
|
|
|
|
func parsePerfData(perfdatas string, timestamp time.Time) ([]telegraf.Metric, error) {
|
|
metrics := make([]telegraf.Metric, 0)
|
|
|
|
for _, unParsedPerf := range perfSplitRegExp.FindAllString(perfdatas, -1) {
|
|
trimmedPerf := strings.TrimSpace(unParsedPerf)
|
|
perf := nagiosRegExp.FindStringSubmatch(trimmedPerf)
|
|
|
|
// verify at least `'label'=value[UOM];` existed
|
|
if len(perf) < 3 {
|
|
continue
|
|
}
|
|
if perf[1] == "" || perf[2] == "" {
|
|
continue
|
|
}
|
|
|
|
fieldName := strings.Trim(perf[1], "'")
|
|
tags := map[string]string{"perfdata": fieldName}
|
|
if perf[3] != "" {
|
|
str := perf[3]
|
|
if str != "" {
|
|
tags["unit"] = str
|
|
}
|
|
}
|
|
|
|
fields := make(map[string]interface{})
|
|
if perf[2] == "U" {
|
|
return nil, errors.New("value undetermined")
|
|
}
|
|
|
|
f, err := strconv.ParseFloat(perf[2], 64)
|
|
if err == nil {
|
|
fields["value"] = f
|
|
}
|
|
if perf[4] != "" {
|
|
low, high, err := parseThreshold(perf[4])
|
|
if err == nil {
|
|
if strings.Contains(perf[4], "@") {
|
|
fields["warning_le"] = low
|
|
fields["warning_ge"] = high
|
|
} else {
|
|
fields["warning_lt"] = low
|
|
fields["warning_gt"] = high
|
|
}
|
|
}
|
|
}
|
|
if perf[5] != "" {
|
|
low, high, err := parseThreshold(perf[5])
|
|
if err == nil {
|
|
if strings.Contains(perf[5], "@") {
|
|
fields["critical_le"] = low
|
|
fields["critical_ge"] = high
|
|
} else {
|
|
fields["critical_lt"] = low
|
|
fields["critical_gt"] = high
|
|
}
|
|
}
|
|
}
|
|
if perf[6] != "" {
|
|
f, err := strconv.ParseFloat(perf[6], 64)
|
|
if err == nil {
|
|
fields["min"] = f
|
|
}
|
|
}
|
|
if perf[7] != "" {
|
|
f, err := strconv.ParseFloat(perf[7], 64)
|
|
if err == nil {
|
|
fields["max"] = f
|
|
}
|
|
}
|
|
|
|
// Create metric
|
|
m := metric.New("nagios", tags, fields, timestamp)
|
|
|
|
// Add Metric
|
|
metrics = append(metrics, m)
|
|
}
|
|
|
|
return metrics, nil
|
|
}
|
|
|
|
// from math
|
|
const (
|
|
MaxFloat64 = 1.797693134862315708145274237317043567981e+308 // 2**1023 * (2**53 - 1) / 2**52
|
|
MinFloat64 = 4.940656458412465441765687928682213723651e-324 // 1 / 2**(1023 - 1 + 52)
|
|
)
|
|
|
|
var ErrBadThresholdFormat = errors.New("bad threshold format")
|
|
|
|
// Handles all cases from https://nagios-plugins.org/doc/guidelines.html#THRESHOLDFORMAT
|
|
func parseThreshold(threshold string) (vmin, vmax float64, err error) {
|
|
thresh := strings.Split(threshold, ":")
|
|
switch len(thresh) {
|
|
case 1:
|
|
vmax, err = strconv.ParseFloat(thresh[0], 64)
|
|
if err != nil {
|
|
return 0, 0, ErrBadThresholdFormat
|
|
}
|
|
|
|
return 0, vmax, nil
|
|
case 2:
|
|
if thresh[0] == "~" {
|
|
vmin = MinFloat64
|
|
} else {
|
|
vmin, err = strconv.ParseFloat(thresh[0], 64)
|
|
if err != nil {
|
|
vmin = 0
|
|
}
|
|
}
|
|
|
|
if thresh[1] == "" {
|
|
vmax = MaxFloat64
|
|
} else {
|
|
vmax, err = strconv.ParseFloat(thresh[1], 64)
|
|
if err != nil {
|
|
return 0, 0, ErrBadThresholdFormat
|
|
}
|
|
}
|
|
default:
|
|
return 0, 0, ErrBadThresholdFormat
|
|
}
|
|
|
|
return vmin, vmax, err
|
|
}
|
|
|
|
func init() {
|
|
// Register parser
|
|
parsers.Add("nagios",
|
|
func(defaultMetricName string) telegraf.Parser {
|
|
return &Parser{metricName: defaultMetricName}
|
|
},
|
|
)
|
|
}
|