467 lines
12 KiB
Go
467 lines
12 KiB
Go
//go:generate ../../../tools/readme_config_includer/generator
|
|
//go:build linux
|
|
|
|
package systemd_units
|
|
|
|
import (
|
|
"context"
|
|
_ "embed"
|
|
"fmt"
|
|
"math"
|
|
"os/user"
|
|
"path"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/coreos/go-systemd/v22/dbus"
|
|
|
|
"github.com/influxdata/telegraf"
|
|
"github.com/influxdata/telegraf/config"
|
|
"github.com/influxdata/telegraf/filter"
|
|
"github.com/influxdata/telegraf/plugins/inputs"
|
|
)
|
|
|
|
//go:embed sample.conf
|
|
var sampleConfig string
|
|
|
|
var (
|
|
// Below are mappings of systemd state tables as defined in
|
|
// https://github.com/systemd/systemd/blob/c87700a1335f489be31cd3549927da68b5638819/src/basic/unit-def.c
|
|
// Duplicate strings are removed from this list.
|
|
// This map is used by `subcommand_show` and `subcommand_list`. Changes must be
|
|
// compatible with both subcommands.
|
|
loadMap = map[string]int{
|
|
"loaded": 0,
|
|
"stub": 1,
|
|
"not-found": 2,
|
|
"bad-setting": 3,
|
|
"error": 4,
|
|
"merged": 5,
|
|
"masked": 6,
|
|
}
|
|
|
|
activeMap = map[string]int{
|
|
"active": 0,
|
|
"reloading": 1,
|
|
"inactive": 2,
|
|
"failed": 3,
|
|
"activating": 4,
|
|
"deactivating": 5,
|
|
}
|
|
|
|
subMap = map[string]int{
|
|
// service_state_table, offset 0x0000
|
|
"running": 0x0000,
|
|
"dead": 0x0001,
|
|
"start-pre": 0x0002,
|
|
"start": 0x0003,
|
|
"exited": 0x0004,
|
|
"reload": 0x0005,
|
|
"stop": 0x0006,
|
|
"stop-watchdog": 0x0007,
|
|
"stop-sigterm": 0x0008,
|
|
"stop-sigkill": 0x0009,
|
|
"stop-post": 0x000a,
|
|
"final-sigterm": 0x000b,
|
|
"failed": 0x000c,
|
|
"auto-restart": 0x000d,
|
|
"condition": 0x000e,
|
|
"cleaning": 0x000f,
|
|
|
|
// automount_state_table, offset 0x0010
|
|
// continuation of service_state_table
|
|
"waiting": 0x0010,
|
|
"reload-signal": 0x0011,
|
|
"reload-notify": 0x0012,
|
|
"final-watchdog": 0x0013,
|
|
"dead-before-auto-restart": 0x0014,
|
|
"failed-before-auto-restart": 0x0015,
|
|
"dead-resources-pinned": 0x0016,
|
|
"auto-restart-queued": 0x0017,
|
|
|
|
// device_state_table, offset 0x0020
|
|
"tentative": 0x0020,
|
|
"plugged": 0x0021,
|
|
|
|
// mount_state_table, offset 0x0030
|
|
"mounting": 0x0030,
|
|
"mounting-done": 0x0031,
|
|
"mounted": 0x0032,
|
|
"remounting": 0x0033,
|
|
"unmounting": 0x0034,
|
|
"remounting-sigterm": 0x0035,
|
|
"remounting-sigkill": 0x0036,
|
|
"unmounting-sigterm": 0x0037,
|
|
"unmounting-sigkill": 0x0038,
|
|
|
|
// path_state_table, offset 0x0040
|
|
|
|
// scope_state_table, offset 0x0050
|
|
"abandoned": 0x0050,
|
|
|
|
// slice_state_table, offset 0x0060
|
|
"active": 0x0060,
|
|
|
|
// socket_state_table, offset 0x0070
|
|
"start-chown": 0x0070,
|
|
"start-post": 0x0071,
|
|
"listening": 0x0072,
|
|
"stop-pre": 0x0073,
|
|
"stop-pre-sigterm": 0x0074,
|
|
"stop-pre-sigkill": 0x0075,
|
|
"final-sigkill": 0x0076,
|
|
|
|
// swap_state_table, offset 0x0080
|
|
"activating": 0x0080,
|
|
"activating-done": 0x0081,
|
|
"deactivating": 0x0082,
|
|
"deactivating-sigterm": 0x0083,
|
|
"deactivating-sigkill": 0x0084,
|
|
|
|
// target_state_table, offset 0x0090
|
|
|
|
// timer_state_table, offset 0x00a0
|
|
"elapsed": 0x00a0,
|
|
}
|
|
)
|
|
|
|
type SystemdUnits struct {
|
|
Pattern string `toml:"pattern"`
|
|
UnitType string `toml:"unittype"`
|
|
Scope string `toml:"scope"`
|
|
Details bool `toml:"details"`
|
|
CollectDisabled bool `toml:"collect_disabled_units"`
|
|
Timeout config.Duration `toml:"timeout"`
|
|
Log telegraf.Logger `toml:"-"`
|
|
archParams
|
|
}
|
|
|
|
type archParams struct {
|
|
client client
|
|
pattern []string
|
|
filter filter.Filter
|
|
unitTypeDBus string
|
|
scope string
|
|
user string
|
|
warnUnitProps map[string]bool
|
|
}
|
|
|
|
type client interface {
|
|
// Connected returns whether client is connected
|
|
Connected() bool
|
|
|
|
// Close closes an established connection.
|
|
Close()
|
|
|
|
// ListUnitFilesByPatternsContext returns an array of all available units on disk matched the patterns.
|
|
ListUnitFilesByPatternsContext(ctx context.Context, states, pattern []string) ([]dbus.UnitFile, error)
|
|
|
|
// ListUnitsByNamesContext returns an array with units.
|
|
ListUnitsByNamesContext(ctx context.Context, units []string) ([]dbus.UnitStatus, error)
|
|
|
|
// GetUnitTypePropertiesContext returns the extra properties for a unit, specific to the unit type.
|
|
GetUnitTypePropertiesContext(ctx context.Context, unit, unitType string) (map[string]interface{}, error)
|
|
|
|
// GetUnitPropertiesContext takes the (unescaped) unit name and returns all of its dbus object properties.
|
|
GetUnitPropertiesContext(ctx context.Context, unit string) (map[string]interface{}, error)
|
|
|
|
// ListUnitsContext returns an array with all currently loaded units.
|
|
ListUnitsContext(ctx context.Context) ([]dbus.UnitStatus, error)
|
|
}
|
|
|
|
func (*SystemdUnits) SampleConfig() string {
|
|
return sampleConfig
|
|
}
|
|
|
|
func (s *SystemdUnits) Init() error {
|
|
// Set default pattern
|
|
if s.Pattern == "" {
|
|
s.Pattern = "*"
|
|
}
|
|
|
|
// Check unit-type and convert the first letter to uppercase as this is
|
|
// what dbus expects.
|
|
switch s.UnitType {
|
|
case "":
|
|
s.UnitType = "service"
|
|
case "service", "socket", "target", "device", "mount", "automount", "swap",
|
|
"timer", "path", "slice", "scope":
|
|
default:
|
|
return fmt.Errorf("invalid 'unittype' %q", s.UnitType)
|
|
}
|
|
s.unitTypeDBus = strings.ToUpper(s.UnitType[0:1]) + strings.ToLower(s.UnitType[1:])
|
|
|
|
s.pattern = strings.Split(s.Pattern, " ")
|
|
f, err := filter.Compile(s.pattern)
|
|
if err != nil {
|
|
return fmt.Errorf("compiling filter failed: %w", err)
|
|
}
|
|
s.filter = f
|
|
|
|
switch s.Scope {
|
|
case "", "system":
|
|
s.scope = "system"
|
|
case "user":
|
|
u, err := user.Current()
|
|
if err != nil {
|
|
return fmt.Errorf("unable to determine user: %w", err)
|
|
}
|
|
|
|
s.scope = "user"
|
|
s.user = u.Username
|
|
default:
|
|
return fmt.Errorf("invalid 'scope' %q", s.Scope)
|
|
}
|
|
|
|
s.warnUnitProps = make(map[string]bool)
|
|
|
|
return nil
|
|
}
|
|
|
|
func (s *SystemdUnits) Start(telegraf.Accumulator) error {
|
|
ctx := context.Background()
|
|
|
|
var client *dbus.Conn
|
|
var err error
|
|
if s.scope == "user" {
|
|
client, err = dbus.NewUserConnectionContext(ctx)
|
|
} else {
|
|
client, err = dbus.NewSystemConnectionContext(ctx)
|
|
}
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
s.client = client
|
|
|
|
return nil
|
|
}
|
|
|
|
func (s *SystemdUnits) Gather(acc telegraf.Accumulator) error {
|
|
// Reconnect in case the connection was lost
|
|
if !s.client.Connected() {
|
|
s.Log.Debug("Connection to systemd daemon lost, trying to reconnect...")
|
|
s.Stop()
|
|
if err := s.Start(acc); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(s.Timeout))
|
|
defer cancel()
|
|
|
|
// List all loaded units to handle multi-instance units correctly
|
|
loaded, err := s.client.ListUnitsContext(ctx)
|
|
if err != nil {
|
|
return fmt.Errorf("listing loaded units failed: %w", err)
|
|
}
|
|
|
|
var files []dbus.UnitFile
|
|
if s.CollectDisabled {
|
|
// List all unit files matching the pattern to also get disabled units
|
|
list := []string{"enabled", "disabled", "static"}
|
|
files, err = s.client.ListUnitFilesByPatternsContext(ctx, list, s.pattern)
|
|
if err != nil {
|
|
return fmt.Errorf("listing unit files failed: %w", err)
|
|
}
|
|
}
|
|
|
|
// Collect all matching units, the loaded ones and the disabled ones
|
|
states := make([]dbus.UnitStatus, 0, len(loaded))
|
|
|
|
// Match all loaded units first
|
|
seen := make(map[string]bool)
|
|
for _, u := range loaded {
|
|
if !s.filter.Match(u.Name) {
|
|
continue
|
|
}
|
|
states = append(states, u)
|
|
|
|
// Remember multi-instance units to remove duplicates from files
|
|
instance := u.Name
|
|
if strings.Contains(u.Name, "@") {
|
|
prefix, _, _ := strings.Cut(u.Name, "@")
|
|
suffix := path.Ext(u.Name)
|
|
instance = prefix + "@" + suffix
|
|
}
|
|
seen[instance] = true
|
|
}
|
|
|
|
// Now split the unit-files into disabled ones and static ones, ignore
|
|
// enabled units as those are already contained in the "loaded" list.
|
|
if len(files) > 0 {
|
|
disabled := make([]string, 0, len(files))
|
|
static := make([]string, 0, len(files))
|
|
for _, f := range files {
|
|
name := path.Base(f.Path)
|
|
|
|
switch f.Type {
|
|
case "disabled":
|
|
if seen[name] {
|
|
continue
|
|
}
|
|
seen[name] = true
|
|
|
|
// Detect disabled multi-instance units and declare them as static
|
|
_, suffix, found := strings.Cut(name, "@")
|
|
instance, _, _ := strings.Cut(suffix, ".")
|
|
if found && instance == "" {
|
|
static = append(static, name)
|
|
continue
|
|
}
|
|
disabled = append(disabled, name)
|
|
case "static":
|
|
// Make sure we filter already loaded static multi-instance units
|
|
instance := name
|
|
if strings.Contains(name, "@") {
|
|
prefix, _, _ := strings.Cut(name, "@")
|
|
suffix := path.Ext(name)
|
|
instance = prefix + "@" + suffix
|
|
}
|
|
if seen[instance] || seen[name] {
|
|
continue
|
|
}
|
|
seen[instance] = true
|
|
static = append(static, name)
|
|
}
|
|
}
|
|
|
|
// Resolve the disabled and remaining static units
|
|
disabledStates, err := s.client.ListUnitsByNamesContext(ctx, disabled)
|
|
if err != nil {
|
|
return fmt.Errorf("listing unit states failed: %w", err)
|
|
}
|
|
states = append(states, disabledStates...)
|
|
|
|
// Add special information about unused static units
|
|
for _, name := range static {
|
|
if !strings.EqualFold(strings.TrimPrefix(path.Ext(name), "."), s.UnitType) {
|
|
continue
|
|
}
|
|
|
|
states = append(states, dbus.UnitStatus{
|
|
Name: name,
|
|
LoadState: "stub",
|
|
ActiveState: "inactive",
|
|
SubState: "dead",
|
|
})
|
|
}
|
|
}
|
|
|
|
// Merge the unit information into one struct
|
|
for _, state := range states {
|
|
// Filter units of the wrong type
|
|
if idx := strings.LastIndex(state.Name, "."); idx < 0 || state.Name[idx+1:] != s.UnitType {
|
|
continue
|
|
}
|
|
|
|
// Map the state names to numerical values
|
|
load, ok := loadMap[state.LoadState]
|
|
if !ok {
|
|
acc.AddError(fmt.Errorf("parsing field 'load' failed, value not in map: %s", state.LoadState))
|
|
continue
|
|
}
|
|
active, ok := activeMap[state.ActiveState]
|
|
if !ok {
|
|
acc.AddError(fmt.Errorf("parsing field 'active' failed, value not in map: %s", state.ActiveState))
|
|
continue
|
|
}
|
|
subState, ok := subMap[state.SubState]
|
|
if !ok {
|
|
acc.AddError(fmt.Errorf("parsing field 'sub' failed, value not in map: %s", state.SubState))
|
|
continue
|
|
}
|
|
|
|
// Create the metric
|
|
tags := map[string]string{
|
|
"name": state.Name,
|
|
"load": state.LoadState,
|
|
"active": state.ActiveState,
|
|
"sub": state.SubState,
|
|
}
|
|
if s.scope == "user" {
|
|
tags["user"] = s.user
|
|
}
|
|
|
|
fields := map[string]interface{}{
|
|
"load_code": load,
|
|
"active_code": active,
|
|
"sub_code": subState,
|
|
}
|
|
|
|
if s.Details {
|
|
properties, err := s.client.GetUnitTypePropertiesContext(ctx, state.Name, s.unitTypeDBus)
|
|
if err != nil {
|
|
// Skip units returning "Unknown interface" errors as those indicate
|
|
// that the unit is of the wrong type.
|
|
if strings.Contains(err.Error(), "Unknown interface") {
|
|
continue
|
|
}
|
|
// For other units we make up properties, usually those are
|
|
// disabled multi-instance units
|
|
properties = map[string]interface{}{
|
|
"StatusErrno": int64(-1),
|
|
"NRestarts": uint64(0),
|
|
}
|
|
}
|
|
|
|
// Get required unit file properties
|
|
unitProperties, err := s.client.GetUnitPropertiesContext(ctx, state.Name)
|
|
if err != nil && !s.warnUnitProps[state.Name] {
|
|
s.Log.Warnf("Cannot read unit properties for %q: %v", state.Name, err)
|
|
s.warnUnitProps[state.Name] = true
|
|
}
|
|
|
|
// Set tags
|
|
if v, found := unitProperties["UnitFileState"]; found {
|
|
tags["state"] = v.(string)
|
|
}
|
|
if v, found := unitProperties["UnitFilePreset"]; found {
|
|
tags["preset"] = v.(string)
|
|
}
|
|
|
|
// Set fields
|
|
if v, found := unitProperties["ActiveEnterTimestamp"]; found {
|
|
fields["active_enter_timestamp_us"] = v
|
|
}
|
|
|
|
fields["status_errno"] = properties["StatusErrno"]
|
|
fields["restarts"] = properties["NRestarts"]
|
|
fields["pid"] = properties["MainPID"]
|
|
|
|
fields["mem_current"] = properties["MemoryCurrent"]
|
|
fields["mem_peak"] = properties["MemoryPeak"]
|
|
fields["mem_avail"] = properties["MemoryAvailable"]
|
|
|
|
fields["swap_current"] = properties["MemorySwapCurrent"]
|
|
fields["swap_peak"] = properties["MemorySwapPeak"]
|
|
|
|
// Sanitize unset memory fields
|
|
for k, value := range fields {
|
|
switch {
|
|
case strings.HasPrefix(k, "mem_"), strings.HasPrefix(k, "swap_"):
|
|
v, ok := value.(uint64)
|
|
if ok && v == math.MaxUint64 || value == nil {
|
|
fields[k] = uint64(0)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
acc.AddFields("systemd_units", fields, tags)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (s *SystemdUnits) Stop() {
|
|
if s.client != nil && s.client.Connected() {
|
|
s.client.Close()
|
|
}
|
|
s.client = nil
|
|
}
|
|
|
|
func init() {
|
|
inputs.Add("systemd_units", func() telegraf.Input {
|
|
return &SystemdUnits{Timeout: config.Duration(5 * time.Second)}
|
|
})
|
|
}
|