package nagios import ( "bufio" "bytes" "errors" "os/exec" "regexp" "strconv" "strings" "syscall" "time" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/metric" "github.com/influxdata/telegraf/plugins/parsers" ) // unknownExitCode is the nagios unknown status code // the exit code should be used if an error occurs or something unexpected happens const unknownExitCode = 3 // getExitCode get the exit code from an error value which is the result // of running a command through exec package api. func getExitCode(err error) (int, error) { if err == nil { return 0, nil } var ee *exec.ExitError if !errors.As(err, &ee) { return unknownExitCode, err } ws, ok := ee.Sys().(syscall.WaitStatus) if !ok { return 0, errors.New("expected syscall.WaitStatus") } return ws.ExitStatus(), nil } // AddState adds a state derived from the runErr. Unknown state will be set as fallback. // If any error occurs, it is guaranteed to be added to the service output. // An updated slice of metrics will be returned. func AddState(runErr error, errMessage []byte, metrics []telegraf.Metric) []telegraf.Metric { state, exitErr := getExitCode(runErr) // This will ensure that in every error case the valid nagios state 'unknown' will be returned. // No error needs to be thrown because the output will contain the error information. // Description found at 'Plugin Return Codes' https://nagios-plugins.org/doc/guidelines.html if exitErr != nil || state < 0 || state > unknownExitCode { state = unknownExitCode } for _, m := range metrics { if m.Name() == "nagios_state" { m.AddField("state", state) if state == unknownExitCode { errorMessage := string(errMessage) if exitErr != nil && exitErr.Error() != "" { errorMessage = exitErr.Error() } value, ok := m.GetField("service_output") if !ok || value == "" { // By adding the error message as output, the metric contains all needed information to understand // the problem and fix it m.AddField("service_output", errorMessage) } } return metrics } } var ts time.Time if len(metrics) != 0 { ts = metrics[0].Time() } else { ts = time.Now().UTC() } f := map[string]interface{}{ "state": state, } m := metric.New("nagios_state", nil, f, ts) return append(metrics, m) } type Parser struct { DefaultTags map[string]string `toml:"-"` Log telegraf.Logger `toml:"-"` metricName string } // Got from Alignak // https://github.com/Alignak-monitoring/alignak/blob/develop/alignak/misc/perfdata.py var ( perfSplitRegExp = regexp.MustCompile(`([^=]+=\S+)`) nagiosRegExp = regexp.MustCompile( `^([^=]+)=([\d\.\-\+eE]+)([\w\/%]*);?([\d\.\-\+eE:~@]+)?;?([\d\.\-\+eE:~@]+)?;?([\d\.\-\+eE]+)?;?([\d\.\-\+eE]+)?;?\s*`, ) ) func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { metrics, err := p.Parse([]byte(line)) return metrics[0], err } func (p *Parser) SetDefaultTags(tags map[string]string) { p.DefaultTags = tags } func (p *Parser) Parse(buf []byte) ([]telegraf.Metric, error) { ts := time.Now().UTC() s := bufio.NewScanner(bytes.NewReader(buf)) var msg bytes.Buffer var longmsg bytes.Buffer metrics := make([]telegraf.Metric, 0) // Scan the first line. if !s.Scan() && s.Err() != nil { return nil, s.Err() } parts := bytes.Split(s.Bytes(), []byte{'|'}) switch len(parts) { case 2: ms, err := parsePerfData(string(parts[1]), ts) if err != nil { p.Log.Errorf("Failed to parse performance data: %s\n", err.Error()) } metrics = append(metrics, ms...) fallthrough case 1: msg.Write(bytes.TrimSpace(parts[0])) default: return nil, errors.New("illegal output format") } // Read long output. for s.Scan() { if bytes.Contains(s.Bytes(), []byte{'|'}) { parts := bytes.Split(s.Bytes(), []byte{'|'}) if longmsg.Len() != 0 { longmsg.WriteByte('\n') } longmsg.Write(bytes.TrimSpace(parts[0])) ms, err := parsePerfData(string(parts[1]), ts) if err != nil { p.Log.Errorf("Failed to parse performance data: %s\n", err.Error()) } metrics = append(metrics, ms...) break } if longmsg.Len() != 0 { longmsg.WriteByte('\n') } longmsg.Write(bytes.TrimSpace(s.Bytes())) } // Parse extra performance data. for s.Scan() { ms, err := parsePerfData(s.Text(), ts) if err != nil { p.Log.Errorf("Failed to parse performance data: %s\n", err.Error()) } metrics = append(metrics, ms...) } if s.Err() != nil { p.Log.Debugf("Unexpected io error: %s\n", s.Err()) } // Create nagios state. fields := map[string]interface{}{ "service_output": msg.String(), } if longmsg.Len() != 0 { fields["long_service_output"] = longmsg.String() } m := metric.New("nagios_state", nil, fields, ts) metrics = append(metrics, m) return metrics, nil } func parsePerfData(perfdatas string, timestamp time.Time) ([]telegraf.Metric, error) { metrics := make([]telegraf.Metric, 0) for _, unParsedPerf := range perfSplitRegExp.FindAllString(perfdatas, -1) { trimmedPerf := strings.TrimSpace(unParsedPerf) perf := nagiosRegExp.FindStringSubmatch(trimmedPerf) // verify at least `'label'=value[UOM];` existed if len(perf) < 3 { continue } if perf[1] == "" || perf[2] == "" { continue } fieldName := strings.Trim(perf[1], "'") tags := map[string]string{"perfdata": fieldName} if perf[3] != "" { str := perf[3] if str != "" { tags["unit"] = str } } fields := make(map[string]interface{}) if perf[2] == "U" { return nil, errors.New("value undetermined") } f, err := strconv.ParseFloat(perf[2], 64) if err == nil { fields["value"] = f } if perf[4] != "" { low, high, err := parseThreshold(perf[4]) if err == nil { if strings.Contains(perf[4], "@") { fields["warning_le"] = low fields["warning_ge"] = high } else { fields["warning_lt"] = low fields["warning_gt"] = high } } } if perf[5] != "" { low, high, err := parseThreshold(perf[5]) if err == nil { if strings.Contains(perf[5], "@") { fields["critical_le"] = low fields["critical_ge"] = high } else { fields["critical_lt"] = low fields["critical_gt"] = high } } } if perf[6] != "" { f, err := strconv.ParseFloat(perf[6], 64) if err == nil { fields["min"] = f } } if perf[7] != "" { f, err := strconv.ParseFloat(perf[7], 64) if err == nil { fields["max"] = f } } // Create metric m := metric.New("nagios", tags, fields, timestamp) // Add Metric metrics = append(metrics, m) } return metrics, nil } // from math const ( MaxFloat64 = 1.797693134862315708145274237317043567981e+308 // 2**1023 * (2**53 - 1) / 2**52 MinFloat64 = 4.940656458412465441765687928682213723651e-324 // 1 / 2**(1023 - 1 + 52) ) var ErrBadThresholdFormat = errors.New("bad threshold format") // Handles all cases from https://nagios-plugins.org/doc/guidelines.html#THRESHOLDFORMAT func parseThreshold(threshold string) (vmin, vmax float64, err error) { thresh := strings.Split(threshold, ":") switch len(thresh) { case 1: vmax, err = strconv.ParseFloat(thresh[0], 64) if err != nil { return 0, 0, ErrBadThresholdFormat } return 0, vmax, nil case 2: if thresh[0] == "~" { vmin = MinFloat64 } else { vmin, err = strconv.ParseFloat(thresh[0], 64) if err != nil { vmin = 0 } } if thresh[1] == "" { vmax = MaxFloat64 } else { vmax, err = strconv.ParseFloat(thresh[1], 64) if err != nil { return 0, 0, ErrBadThresholdFormat } } default: return 0, 0, ErrBadThresholdFormat } return vmin, vmax, err } func init() { // Register parser parsers.Add("nagios", func(defaultMetricName string) telegraf.Parser { return &Parser{metricName: defaultMetricName} }, ) }