1
0
Fork 0

Adding upstream version 1.34.4.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-05-24 07:26:29 +02:00
parent e393c3af3f
commit 4978089aab
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
4963 changed files with 677545 additions and 0 deletions

View file

@ -0,0 +1,300 @@
# Procstat Input Plugin
The procstat plugin can be used to monitor the system resource usage of one or
more processes. The procstat_lookup metric displays the query information,
specifically the number of PIDs returned on a search
Processes can be selected for monitoring using one of several methods:
- pidfile
- exe
- pattern
- user
- systemd_unit
- cgroup
- supervisor_unit
- win_service
## Global configuration options <!-- @/docs/includes/plugin_config.md -->
In addition to the plugin-specific configuration settings, plugins support
additional global and plugin configuration settings. These settings are used to
modify metrics, tags, and field or create aliases and configure ordering, etc.
See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
[CONFIGURATION.md]: ../../../docs/CONFIGURATION.md#plugins
## Configuration
```toml @sample.conf
# Monitor process cpu and memory usage
[[inputs.procstat]]
## PID file to monitor process
pid_file = "/var/run/nginx.pid"
## executable name (ie, pgrep <exe>)
# exe = "nginx"
## pattern as argument for pgrep (ie, pgrep -f <pattern>)
# pattern = "nginx"
## user as argument for pgrep (ie, pgrep -u <user>)
# user = "nginx"
## Systemd unit name, supports globs when include_systemd_children is set to true
# systemd_unit = "nginx.service"
# include_systemd_children = false
## CGroup name or path, supports globs
# cgroup = "systemd/system.slice/nginx.service"
## Supervisor service names of hypervisorctl management
# supervisor_units = ["webserver", "proxy"]
## Windows service name
# win_service = ""
## override for process_name
## This is optional; default is sourced from /proc/<pid>/status
# process_name = "bar"
## Field name prefix
# prefix = ""
## Mode to use when calculating CPU usage. Can be one of 'solaris' or 'irix'.
# mode = "irix"
## Add the given information tag instead of a field
## This allows to create unique metrics/series when collecting processes with
## otherwise identical tags. However, please be careful as this can easily
## result in a large number of series, especially with short-lived processes,
## creating high cardinality at the output.
## Available options are:
## cmdline -- full commandline
## pid -- ID of the process
## ppid -- ID of the process' parent
## status -- state of the process
## user -- username owning the process
## socket only options:
## protocol -- protocol type of the process socket
## state -- state of the process socket
## src -- source address of the process socket (non-unix sockets)
## src_port -- source port of the process socket (non-unix sockets)
## dest -- destination address of the process socket (non-unix sockets)
## dest_port -- destination port of the process socket (non-unix sockets)
## name -- name of the process socket (unix sockets only)
## Available for procstat_lookup:
## level -- level of the process filtering
# tag_with = []
## Properties to collect
## Available options are
## cpu -- CPU usage statistics
## limits -- set resource limits
## memory -- memory usage statistics
## mmap -- mapped memory usage statistics (caution: can cause high load)
## sockets -- socket statistics for protocols in 'socket_protocols'
# properties = ["cpu", "limits", "memory", "mmap"]
## Protocol filter for the sockets property
## Available options are
## all -- all of the protocols below
## tcp4 -- TCP socket statistics for IPv4
## tcp6 -- TCP socket statistics for IPv6
## udp4 -- UDP socket statistics for IPv4
## udp6 -- UDP socket statistics for IPv6
## unix -- Unix socket statistics
# socket_protocols = ["all"]
## Method to use when finding process IDs. Can be one of 'pgrep', or
## 'native'. The pgrep finder calls the pgrep executable in the PATH while
## the native finder performs the search directly in a manor dependent on the
## platform. Default is 'pgrep'
# pid_finder = "pgrep"
## New-style filtering configuration (multiple filter sections are allowed)
# [[inputs.procstat.filter]]
# ## Name of the filter added as 'filter' tag
# name = "shell"
#
# ## Service filters, only one is allowed
# ## Systemd unit names (wildcards are supported)
# # systemd_units = []
# ## CGroup name or path (wildcards are supported)
# # cgroups = []
# ## Supervisor service names of hypervisorctl management
# # supervisor_units = []
# ## Windows service names
# # win_service = []
#
# ## Process filters, multiple are allowed
# ## Regular expressions to use for matching against the full command
# # patterns = ['.*']
# ## List of users owning the process (wildcards are supported)
# # users = ['*']
# ## List of executable paths of the process (wildcards are supported)
# # executables = ['*']
# ## List of process names (wildcards are supported)
# # process_names = ['*']
# ## Recursion depth for determining children of the matched processes
# ## A negative value means all children with infinite depth
# # recursion_depth = 0
```
### Windows support
Preliminary support for Windows has been added, however you may prefer using
the `win_perf_counters` input plugin as a more mature alternative.
### Darwin specifics
If you use this plugin with `supervisor_units` *and* `pattern` on Darwin, you
**have to** use the `pgrep` finder as the underlying library relies on `pgrep`.
### Permissions
Some files or directories may require elevated permissions. As such a user may
need to provide telegraf with higher levels of permissions to access and produce
metrics.
## Metrics
For descriptions of these tags and fields, consider reading one of the
following:
- [Linux Kernel /proc Filesystem][kernel /proc]
- [proc manpage][manpage]
[kernel /proc]: https://www.kernel.org/doc/html/latest/filesystems/proc.html
[manpage]: https://man7.org/linux/man-pages/man5/proc.5.html
Below are an example set of tags and fields:
- procstat
- tags:
- pid (if requested)
- cmdline (if requested)
- process_name
- pidfile (when defined)
- exe (when defined)
- pattern (when defined)
- user (when selected)
- systemd_unit (when defined)
- cgroup (when defined)
- cgroup_full (when cgroup or systemd_unit is used with glob)
- supervisor_unit (when defined)
- win_service (when defined)
- parent_pid (for child processes)
- child_level (for child processes)
- fields:
- child_major_faults (int)
- child_minor_faults (int)
- created_at (int) [epoch in nanoseconds]
- cpu_time (int)
- cpu_time_iowait (float) (zero for all OSes except Linux)
- cpu_time_system (float)
- cpu_time_user (float)
- cpu_usage (float)
- disk_read_bytes (int, Linux only, *telegraf* may need to be ran as **root**)
- disk_write_bytes (int, Linux only, *telegraf* may need to be ran as **root**)
- involuntary_context_switches (int)
- major_faults (int)
- memory_anonymous (int)
- memory_private_clean (int)
- memory_private_dirty (int)
- memory_pss (int)
- memory_referenced (int)
- memory_rss (int)
- memory_shared_clean (int)
- memory_shared_dirty (int)
- memory_size (int)
- memory_swap (int)
- memory_usage (float)
- memory_vms (int)
- minor_faults (int)
- nice_priority (int)
- num_fds (int, *telegraf* may need to be ran as **root**)
- num_threads (int)
- pid (int)
- ppid (int)
- status (string)
- read_bytes (int, *telegraf* may need to be ran as **root**)
- read_count (int, *telegraf* may need to be ran as **root**)
- realtime_priority (int)
- rlimit_cpu_time_hard (int)
- rlimit_cpu_time_soft (int)
- rlimit_file_locks_hard (int)
- rlimit_file_locks_soft (int)
- rlimit_memory_data_hard (int)
- rlimit_memory_data_soft (int)
- rlimit_memory_locked_hard (int)
- rlimit_memory_locked_soft (int)
- rlimit_memory_rss_hard (int)
- rlimit_memory_rss_soft (int)
- rlimit_memory_stack_hard (int)
- rlimit_memory_stack_soft (int)
- rlimit_memory_vms_hard (int)
- rlimit_memory_vms_soft (int)
- rlimit_nice_priority_hard (int)
- rlimit_nice_priority_soft (int)
- rlimit_num_fds_hard (int)
- rlimit_num_fds_soft (int)
- rlimit_realtime_priority_hard (int)
- rlimit_realtime_priority_soft (int)
- rlimit_signals_pending_hard (int)
- rlimit_signals_pending_soft (int)
- signals_pending (int)
- voluntary_context_switches (int)
- write_bytes (int, *telegraf* may need to be ran as **root**)
- write_count (int, *telegraf* may need to be ran as **root**)
- procstat_lookup
- tags:
- exe
- pid_finder
- pid_file
- pattern
- prefix
- user
- systemd_unit
- cgroup
- supervisor_unit
- win_service
- result
- fields:
- pid_count (int)
- running (int)
- result_code (int, success = 0, lookup_error = 1)
- procstat_socket (if configured, Linux only)
- tags:
- pid (if requested)
- protocol (if requested)
- cmdline (if requested)
- process_name
- pidfile (when defined)
- exe (when defined)
- pattern (when defined)
- user (when selected)
- systemd_unit (when defined)
- cgroup (when defined)
- cgroup_full (when cgroup or systemd_unit is used with glob)
- supervisor_unit (when defined)
- win_service (when defined)
- fields:
- protocol
- state
- pid
- src
- src_port (tcp and udp sockets only)
- dest (tcp and udp sockets only)
- dest_port (tcp and udp sockets only)
- bytes_received (tcp sockets only)
- bytes_sent (tcp sockets only)
- lost (tcp sockets only)
- retransmits (tcp sockets only)
- rx_queue
- tx_queue
- inode (unix sockets only)
*NOTE: Resource limit > 2147483647 will be reported as 2147483647.*
## Example Output
```text
procstat_lookup,host=prash-laptop,pattern=influxd,pid_finder=pgrep,result=success pid_count=1i,running=1i,result_code=0i 1582089700000000000
procstat,host=prash-laptop,pattern=influxd,process_name=influxd,user=root involuntary_context_switches=151496i,child_minor_faults=1061i,child_major_faults=8i,cpu_time_user=2564.81,pid=32025i,major_faults=8609i,created_at=1580107536000000000i,voluntary_context_switches=1058996i,cpu_time_system=616.98,memory_swap=0i,memory_locked=0i,memory_usage=1.7797634601593018,num_threads=18i,cpu_time_iowait=0,memory_rss=148643840i,memory_vms=1435688960i,memory_data=0i,memory_stack=0i,minor_faults=1856550i 1582089700000000000
procstat_socket,host=prash-laptop,process_name=browser,protocol=tcp4 bytes_received=826987i,bytes_sent=32869i,dest="192.168.0.2",dest_port=443i,lost=0i,pid=32025i,retransmits=0i,rx_queue=0i,src="192.168.0.1",src_port=52106i,state="established",tx_queue=0i 1582089700000000000
```

View file

@ -0,0 +1,234 @@
package procstat
import (
"errors"
"fmt"
"regexp"
"strconv"
"strings"
gopsprocess "github.com/shirou/gopsutil/v4/process"
"github.com/influxdata/telegraf"
telegraf_filter "github.com/influxdata/telegraf/filter"
)
type filter struct {
Name string `toml:"name"`
PidFiles []string `toml:"pid_files"`
SystemdUnits []string `toml:"systemd_units"`
SupervisorUnits []string `toml:"supervisor_units"`
WinService []string `toml:"win_services"`
CGroups []string `toml:"cgroups"`
Patterns []string `toml:"patterns"`
Users []string `toml:"users"`
Executables []string `toml:"executables"`
ProcessNames []string `toml:"process_names"`
RecursionDepth int `toml:"recursion_depth"`
Log telegraf.Logger `toml:"-"`
filterSupervisorUnit string
filterCmds []*regexp.Regexp
filterUser telegraf_filter.Filter
filterExecutable telegraf_filter.Filter
filterProcessName telegraf_filter.Filter
finder *processFinder
}
func (f *filter) init() error {
if f.Name == "" {
return errors.New("filter must be named")
}
// Check for only one service selector being active
var active []string
if len(f.PidFiles) > 0 {
active = append(active, "pid_files")
}
if len(f.CGroups) > 0 {
active = append(active, "cgroups")
}
if len(f.SystemdUnits) > 0 {
active = append(active, "systemd_units")
}
if len(f.SupervisorUnits) > 0 {
active = append(active, "supervisor_units")
}
if len(f.WinService) > 0 {
active = append(active, "win_services")
}
if len(active) > 1 {
return fmt.Errorf("cannot select multiple services %q", strings.Join(active, ", "))
}
// Prepare the filters
f.filterCmds = make([]*regexp.Regexp, 0, len(f.Patterns))
for _, p := range f.Patterns {
re, err := regexp.Compile(p)
if err != nil {
return fmt.Errorf("compiling pattern %q of filter %q failed: %w", p, f.Name, err)
}
f.filterCmds = append(f.filterCmds, re)
}
f.filterSupervisorUnit = strings.TrimSpace(strings.Join(f.SupervisorUnits, " "))
var err error
if f.filterUser, err = telegraf_filter.Compile(f.Users); err != nil {
return fmt.Errorf("compiling users filter for %q failed: %w", f.Name, err)
}
if f.filterExecutable, err = telegraf_filter.Compile(f.Executables); err != nil {
return fmt.Errorf("compiling executables filter for %q failed: %w", f.Name, err)
}
if f.filterProcessName, err = telegraf_filter.Compile(f.ProcessNames); err != nil {
return fmt.Errorf("compiling process-names filter for %q failed: %w", f.Name, err)
}
// Setup the process finder
f.finder = newProcessFinder(f.Log)
return nil
}
func (f *filter) applyFilter() ([]processGroup, error) {
// Determine processes on service level. if there is no constraint on the
// services, use all processes for matching.
var groups []processGroup
switch {
case len(f.PidFiles) > 0:
g, err := f.finder.findByPidFiles(f.PidFiles)
if err != nil {
return nil, err
}
groups = append(groups, g...)
case len(f.CGroups) > 0:
g, err := findByCgroups(f.CGroups)
if err != nil {
return nil, err
}
groups = append(groups, g...)
case len(f.SystemdUnits) > 0:
g, err := findBySystemdUnits(f.SystemdUnits)
if err != nil {
return nil, err
}
groups = append(groups, g...)
case f.filterSupervisorUnit != "":
g, err := findBySupervisorUnits(f.filterSupervisorUnit)
if err != nil {
return nil, err
}
groups = append(groups, g...)
case len(f.WinService) > 0:
g, err := findByWindowsServices(f.WinService)
if err != nil {
return nil, err
}
groups = append(groups, g...)
default:
procs, err := gopsprocess.Processes()
if err != nil {
return nil, err
}
groups = append(groups, processGroup{processes: procs, tags: make(map[string]string)})
}
// Filter by additional properties such as users, patterns etc
result := make([]processGroup, 0, len(groups))
for _, g := range groups {
var matched []*gopsprocess.Process
for _, p := range g.processes {
// Users
if f.filterUser != nil {
if username, err := p.Username(); err != nil || !f.filterUser.Match(username) {
// Errors can happen if we don't have permissions or the process no longer exists
continue
}
}
// Executables
if f.filterExecutable != nil {
if exe, err := p.Exe(); err != nil || !f.filterExecutable.Match(exe) {
continue
}
}
// Process names
if f.filterProcessName != nil {
if name, err := p.Name(); err != nil || !f.filterProcessName.Match(name) {
continue
}
}
// Patterns
if len(f.filterCmds) > 0 {
cmd, err := p.Cmdline()
if err != nil {
// This can happen if we don't have permissions or the process no longer exists
continue
}
var found bool
for _, re := range f.filterCmds {
if re.MatchString(cmd) {
found = true
break
}
}
if !found {
continue
}
}
matched = append(matched, p)
}
result = append(result, processGroup{processes: matched, tags: g.tags})
}
// Resolve children down to the requested depth
previous := result
for depth := 0; depth < f.RecursionDepth || f.RecursionDepth < 0; depth++ {
children := make([]processGroup, 0, len(previous))
for _, group := range previous {
for _, p := range group.processes {
c, err := getChildren(p)
if err != nil {
return nil, fmt.Errorf("unable to get children of process %d: %w", p.Pid, err)
}
if len(c) == 0 {
continue
}
tags := make(map[string]string, len(group.tags)+1)
for k, v := range group.tags {
tags[k] = v
}
tags["parent_pid"] = strconv.FormatInt(int64(p.Pid), 10)
children = append(children, processGroup{
processes: c,
tags: tags,
level: depth + 1,
})
}
}
if len(children) == 0 {
break
}
result = append(result, children...)
previous = children
}
return result, nil
}
func getChildren(p *gopsprocess.Process) ([]*gopsprocess.Process, error) {
children, err := p.Children()
// Check for cases that do not really mean error but rather means that there
// is no match.
switch {
case err == nil,
errors.Is(err, gopsprocess.ErrorNoChildren),
strings.Contains(err.Error(), "exit status 1"):
return children, nil
}
return nil, fmt.Errorf("unable to get children of process %d: %w", p.Pid, err)
}

View file

@ -0,0 +1,131 @@
package procstat
import (
"fmt"
"os"
"regexp"
"strconv"
"strings"
gopsprocess "github.com/shirou/gopsutil/v4/process"
)
// NativeFinder uses gopsutil to find processes
type NativeFinder struct{}
// Uid will return all pids for the given user
func (*NativeFinder) uid(user string) ([]pid, error) {
var dst []pid
procs, err := gopsprocess.Processes()
if err != nil {
return dst, err
}
for _, p := range procs {
username, err := p.Username()
if err != nil {
// skip, this can be caused by the pid no longer exists, or you don't have permissions to access it
continue
}
if username == user {
dst = append(dst, pid(p.Pid))
}
}
return dst, nil
}
// PidFile returns the pid from the pid file given.
func (*NativeFinder) pidFile(path string) ([]pid, error) {
var pids []pid
pidString, err := os.ReadFile(path)
if err != nil {
return pids, fmt.Errorf("failed to read pidfile %q: %w", path, err)
}
processID, err := strconv.ParseInt(strings.TrimSpace(string(pidString)), 10, 32)
if err != nil {
return pids, err
}
pids = append(pids, pid(processID))
return pids, nil
}
// FullPattern matches on the command line when the process was executed
func (*NativeFinder) fullPattern(pattern string) ([]pid, error) {
var pids []pid
regxPattern, err := regexp.Compile(pattern)
if err != nil {
return pids, err
}
procs, err := fastProcessList()
if err != nil {
return pids, err
}
for _, p := range procs {
cmd, err := p.Cmdline()
if err != nil {
// skip, this can be caused by the pid no longer exists, or you don't have permissions to access it
continue
}
if regxPattern.MatchString(cmd) {
pids = append(pids, pid(p.Pid))
}
}
return pids, err
}
// Children matches children pids on the command line when the process was executed
func (*NativeFinder) children(processID pid) ([]pid, error) {
// Get all running processes
p, err := gopsprocess.NewProcess(int32(processID))
if err != nil {
return nil, fmt.Errorf("getting process %d failed: %w", processID, err)
}
// Get all children of the current process
children, err := p.Children()
if err != nil {
return nil, fmt.Errorf("unable to get children of process %d: %w", p.Pid, err)
}
pids := make([]pid, 0, len(children))
for _, child := range children {
pids = append(pids, pid(child.Pid))
}
return pids, err
}
func fastProcessList() ([]*gopsprocess.Process, error) {
pids, err := gopsprocess.Pids()
if err != nil {
return nil, err
}
result := make([]*gopsprocess.Process, 0, len(pids))
for _, pid := range pids {
result = append(result, &gopsprocess.Process{Pid: pid})
}
return result, nil
}
// Pattern matches on the process name
func (*NativeFinder) pattern(pattern string) ([]pid, error) {
var pids []pid
regxPattern, err := regexp.Compile(pattern)
if err != nil {
return pids, err
}
procs, err := fastProcessList()
if err != nil {
return pids, err
}
for _, p := range procs {
name, err := processName(p)
if err != nil {
// skip, this can be caused by the pid no longer exists, or you don't have permissions to access it
continue
}
if regxPattern.MatchString(name) {
pids = append(pids, pid(p.Pid))
}
}
return pids, err
}

View file

@ -0,0 +1,98 @@
package procstat
import (
"context"
"os"
"os/exec"
"os/user"
"runtime"
"testing"
"github.com/stretchr/testify/require"
)
func BenchmarkPattern(b *testing.B) {
finder := &NativeFinder{}
for n := 0; n < b.N; n++ {
_, err := finder.pattern(".*")
require.NoError(b, err)
}
}
func BenchmarkFullPattern(b *testing.B) {
finder := &NativeFinder{}
for n := 0; n < b.N; n++ {
_, err := finder.fullPattern(".*")
require.NoError(b, err)
}
}
func TestChildPattern(t *testing.T) {
if runtime.GOOS == "windows" || runtime.GOOS == "darwin" {
t.Skip("Skipping test on unsupported platform")
}
// Get our own process name
parentName, err := os.Executable()
require.NoError(t, err)
// Spawn two child processes and get their PIDs
expected := make([]pid, 0, 2)
ctx, cancel := context.WithCancel(t.Context())
defer cancel()
// First process
cmd1 := exec.CommandContext(ctx, "/bin/sh")
require.NoError(t, cmd1.Start(), "starting first command failed")
expected = append(expected, pid(cmd1.Process.Pid))
// Second process
cmd2 := exec.CommandContext(ctx, "/bin/sh")
require.NoError(t, cmd2.Start(), "starting first command failed")
expected = append(expected, pid(cmd2.Process.Pid))
// Use the plugin to find the children
finder := &NativeFinder{}
parent, err := finder.pattern(parentName)
require.NoError(t, err)
require.Len(t, parent, 1)
children, err := finder.children(parent[0])
require.NoError(t, err)
require.ElementsMatch(t, expected, children)
}
func TestGather_RealPatternIntegration(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration test in short mode")
}
pg := &NativeFinder{}
pids, err := pg.pattern(`procstat`)
require.NoError(t, err)
require.NotEmpty(t, pids)
}
func TestGather_RealFullPatternIntegration(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration test in short mode")
}
if runtime.GOOS != "windows" {
t.Skip("Skipping integration test on Non-Windows OS")
}
pg := &NativeFinder{}
pids, err := pg.fullPattern(`%procstat%`)
require.NoError(t, err)
require.NotEmpty(t, pids)
}
func TestGather_RealUserIntegration(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration test in short mode")
}
currentUser, err := user.Current()
require.NoError(t, err)
pg := &NativeFinder{}
pids, err := pg.uid(currentUser.Username)
require.NoError(t, err)
require.NotEmpty(t, pids)
}

View file

@ -0,0 +1,381 @@
//go:build linux
package procstat
import (
"context"
"errors"
"fmt"
"os"
"strconv"
"strings"
"syscall"
"github.com/coreos/go-systemd/v22/dbus"
"github.com/prometheus/procfs"
gopsnet "github.com/shirou/gopsutil/v4/net"
gopsprocess "github.com/shirou/gopsutil/v4/process"
"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
"github.com/influxdata/telegraf/internal"
)
func processName(p *gopsprocess.Process) (string, error) {
return p.Exe()
}
func queryPidWithWinServiceName(_ string) (uint32, error) {
return 0, errors.New("os not supporting win_service option")
}
func collectMemmap(proc process, prefix string, fields map[string]any) {
memMapStats, err := proc.MemoryMaps(true)
if err == nil && len(*memMapStats) == 1 {
memMap := (*memMapStats)[0]
fields[prefix+"memory_size"] = memMap.Size
fields[prefix+"memory_pss"] = memMap.Pss
fields[prefix+"memory_shared_clean"] = memMap.SharedClean
fields[prefix+"memory_shared_dirty"] = memMap.SharedDirty
fields[prefix+"memory_private_clean"] = memMap.PrivateClean
fields[prefix+"memory_private_dirty"] = memMap.PrivateDirty
fields[prefix+"memory_referenced"] = memMap.Referenced
fields[prefix+"memory_anonymous"] = memMap.Anonymous
fields[prefix+"memory_swap"] = memMap.Swap
}
}
func findBySystemdUnits(units []string) ([]processGroup, error) {
ctx := context.Background()
conn, err := dbus.NewSystemConnectionContext(ctx)
if err != nil {
return nil, fmt.Errorf("failed to connect to systemd: %w", err)
}
defer conn.Close()
sdunits, err := conn.ListUnitsByPatternsContext(ctx, []string{"enabled", "disabled", "static"}, units)
if err != nil {
return nil, fmt.Errorf("failed to list units: %w", err)
}
groups := make([]processGroup, 0, len(sdunits))
for _, u := range sdunits {
prop, err := conn.GetUnitTypePropertyContext(ctx, u.Name, "Service", "MainPID")
if err != nil {
// This unit might not be a service or similar
continue
}
raw := prop.Value.Value()
pid, ok := raw.(uint32)
if !ok {
return nil, fmt.Errorf("failed to parse PID %v of unit %q: invalid type %T", raw, u, raw)
}
p, err := gopsprocess.NewProcess(int32(pid))
if err != nil {
return nil, fmt.Errorf("failed to find process for PID %d of unit %q: %w", pid, u, err)
}
groups = append(groups, processGroup{
processes: []*gopsprocess.Process{p},
tags: map[string]string{"systemd_unit": u.Name},
})
}
return groups, nil
}
func findByWindowsServices(_ []string) ([]processGroup, error) {
return nil, nil
}
func collectTotalReadWrite(proc process) (r, w uint64, err error) {
path := internal.GetProcPath()
fs, err := procfs.NewFS(path)
if err != nil {
return 0, 0, err
}
p, err := fs.Proc(int(proc.pid()))
if err != nil {
return 0, 0, err
}
stat, err := p.IO()
if err != nil {
return 0, 0, err
}
return stat.RChar, stat.WChar, nil
}
/* Socket statistics functions */
func socketStateName(s uint8) string {
switch s {
case unix.BPF_TCP_ESTABLISHED:
return "established"
case unix.BPF_TCP_SYN_SENT:
return "syn-sent"
case unix.BPF_TCP_SYN_RECV:
return "syn-recv"
case unix.BPF_TCP_FIN_WAIT1:
return "fin-wait1"
case unix.BPF_TCP_FIN_WAIT2:
return "fin-wait2"
case unix.BPF_TCP_TIME_WAIT:
return "time-wait"
case unix.BPF_TCP_CLOSE:
return "closed"
case unix.BPF_TCP_CLOSE_WAIT:
return "close-wait"
case unix.BPF_TCP_LAST_ACK:
return "last-ack"
case unix.BPF_TCP_LISTEN:
return "listen"
case unix.BPF_TCP_CLOSING:
return "closing"
case unix.BPF_TCP_NEW_SYN_RECV:
return "sync-recv"
}
return "unknown"
}
func socketTypeName(t uint8) string {
switch t {
case syscall.SOCK_STREAM:
return "stream"
case syscall.SOCK_DGRAM:
return "dgram"
case syscall.SOCK_RAW:
return "raw"
case syscall.SOCK_RDM:
return "rdm"
case syscall.SOCK_SEQPACKET:
return "seqpacket"
case syscall.SOCK_DCCP:
return "dccp"
case syscall.SOCK_PACKET:
return "packet"
}
return "unknown"
}
func mapFdToInode(pid int32, fd uint32) (uint32, error) {
root := internal.GetProcPath()
fn := fmt.Sprintf("%s/%d/fd/%d", root, pid, fd)
link, err := os.Readlink(fn)
if err != nil {
return 0, fmt.Errorf("reading link failed: %w", err)
}
target := strings.TrimPrefix(link, "socket:[")
target = strings.TrimSuffix(target, "]")
inode, err := strconv.ParseUint(target, 10, 32)
if err != nil {
return 0, fmt.Errorf("parsing link %q: %w", link, err)
}
return uint32(inode), nil
}
func statsTCP(conns []gopsnet.ConnectionStat, family uint8) ([]map[string]interface{}, error) {
if len(conns) == 0 {
return nil, nil
}
// For TCP we need the inode for each connection to relate the connection
// statistics to the actual process socket. Therefore, map the
// file-descriptors to inodes using the /proc/<pid>/fd entries.
inodes := make(map[uint32]gopsnet.ConnectionStat, len(conns))
for _, c := range conns {
inode, err := mapFdToInode(c.Pid, c.Fd)
if err != nil {
return nil, fmt.Errorf("mapping fd %d of pid %d failed: %w", c.Fd, c.Pid, err)
}
inodes[inode] = c
}
// Get the TCP socket statistics from the netlink socket.
responses, err := netlink.SocketDiagTCPInfo(family)
if err != nil {
return nil, fmt.Errorf("connecting to diag socket failed: %w", err)
}
// Filter the responses via the inodes belonging to the process
fieldslist := make([]map[string]interface{}, 0)
for _, r := range responses {
c, found := inodes[r.InetDiagMsg.INode]
if !found {
// The inode does not belong to the process.
continue
}
var proto string
switch r.InetDiagMsg.Family {
case syscall.AF_INET:
proto = "tcp4"
case syscall.AF_INET6:
proto = "tcp6"
default:
continue
}
fields := map[string]interface{}{
"protocol": proto,
"state": socketStateName(r.InetDiagMsg.State),
"pid": c.Pid,
"src": r.InetDiagMsg.ID.Source.String(),
"src_port": r.InetDiagMsg.ID.SourcePort,
"dest": r.InetDiagMsg.ID.Destination.String(),
"dest_port": r.InetDiagMsg.ID.DestinationPort,
"bytes_received": r.TCPInfo.Bytes_received,
"bytes_sent": r.TCPInfo.Bytes_sent,
"lost": r.TCPInfo.Lost,
"retransmits": r.TCPInfo.Retransmits,
"rx_queue": r.InetDiagMsg.RQueue,
"tx_queue": r.InetDiagMsg.WQueue,
}
fieldslist = append(fieldslist, fields)
}
return fieldslist, nil
}
func statsUDP(conns []gopsnet.ConnectionStat, family uint8) ([]map[string]interface{}, error) {
if len(conns) == 0 {
return nil, nil
}
// For UDP we need the inode for each connection to relate the connection
// statistics to the actual process socket. Therefore, map the
// file-descriptors to inodes using the /proc/<pid>/fd entries.
inodes := make(map[uint32]gopsnet.ConnectionStat, len(conns))
for _, c := range conns {
inode, err := mapFdToInode(c.Pid, c.Fd)
if err != nil {
return nil, fmt.Errorf("mapping fd %d of pid %d failed: %w", c.Fd, c.Pid, err)
}
inodes[inode] = c
}
// Get the UDP socket statistics from the netlink socket.
responses, err := netlink.SocketDiagUDPInfo(family)
if err != nil {
return nil, fmt.Errorf("connecting to diag socket failed: %w", err)
}
// Filter the responses via the inodes belonging to the process
fieldslist := make([]map[string]interface{}, 0)
for _, r := range responses {
c, found := inodes[r.InetDiagMsg.INode]
if !found {
// The inode does not belong to the process.
continue
}
var proto string
switch r.InetDiagMsg.Family {
case syscall.AF_INET:
proto = "udp4"
case syscall.AF_INET6:
proto = "udp6"
default:
continue
}
fields := map[string]interface{}{
"protocol": proto,
"state": socketStateName(r.InetDiagMsg.State),
"pid": c.Pid,
"src": r.InetDiagMsg.ID.Source.String(),
"src_port": r.InetDiagMsg.ID.SourcePort,
"dest": r.InetDiagMsg.ID.Destination.String(),
"dest_port": r.InetDiagMsg.ID.DestinationPort,
"rx_queue": r.InetDiagMsg.RQueue,
"tx_queue": r.InetDiagMsg.WQueue,
}
fieldslist = append(fieldslist, fields)
}
return fieldslist, nil
}
func statsUnix(conns []gopsnet.ConnectionStat) ([]map[string]interface{}, error) {
if len(conns) == 0 {
return nil, nil
}
// We need to read the inode for each connection to relate the connection
// statistics to the actual process socket. Therefore, map the
// file-descriptors to inodes using the /proc/<pid>/fd entries.
inodes := make(map[uint32]gopsnet.ConnectionStat, len(conns))
for _, c := range conns {
inode, err := mapFdToInode(c.Pid, c.Fd)
if err != nil {
return nil, fmt.Errorf("mapping fd %d of pid %d failed: %w", c.Fd, c.Pid, err)
}
inodes[inode] = c
}
// Get the UDP socket statistics from the netlink socket.
responses, err := netlink.UnixSocketDiagInfo()
if err != nil {
return nil, fmt.Errorf("connecting to diag socket failed: %w", err)
}
// Filter the responses via the inodes belonging to the process
fieldslist := make([]map[string]interface{}, 0)
for _, r := range responses {
// Check if the inode belongs to the process and skip otherwise
c, found := inodes[r.DiagMsg.INode]
if !found {
continue
}
name := c.Laddr.IP
if name == "" {
name = fmt.Sprintf("inode-%d", r.DiagMsg.INode)
}
fields := map[string]interface{}{
"protocol": "unix",
"type": "stream",
"state": socketStateName(r.DiagMsg.State),
"pid": c.Pid,
"name": name,
"rx_queue": r.Queue.RQueue,
"tx_queue": r.Queue.WQueue,
"inode": r.DiagMsg.INode,
}
if r.Peer != nil {
fields["peer"] = *r.Peer
}
fieldslist = append(fieldslist, fields)
}
// Diagnosis only works for stream sockets, so add all non-stream sockets
// of the process without further data
for inode, c := range inodes {
if c.Type == syscall.SOCK_STREAM {
continue
}
name := c.Laddr.IP
if name == "" {
name = fmt.Sprintf("inode-%d", inode)
}
fields := map[string]interface{}{
"protocol": "unix",
"type": socketTypeName(uint8(c.Type)),
"state": "close",
"pid": c.Pid,
"name": name,
"rx_queue": uint32(0),
"tx_queue": uint32(0),
"inode": inode,
}
fieldslist = append(fieldslist, fields)
}
return fieldslist, nil
}

View file

@ -0,0 +1,103 @@
//go:build !linux && !windows
package procstat
import (
"errors"
"syscall"
gopsnet "github.com/shirou/gopsutil/v4/net"
gopsprocess "github.com/shirou/gopsutil/v4/process"
)
func processName(p *gopsprocess.Process) (string, error) {
return p.Exe()
}
func queryPidWithWinServiceName(string) (uint32, error) {
return 0, errors.New("os not supporting win_service option")
}
func collectMemmap(process, string, map[string]any) {}
func findBySystemdUnits([]string) ([]processGroup, error) {
return nil, nil
}
func findByWindowsServices([]string) ([]processGroup, error) {
return nil, nil
}
func collectTotalReadWrite(process) (r, w uint64, err error) {
return 0, 0, errors.ErrUnsupported
}
func statsTCP(conns []gopsnet.ConnectionStat, _ uint8) ([]map[string]interface{}, error) {
if len(conns) == 0 {
return nil, nil
}
// Filter the responses via the inodes belonging to the process
fieldslist := make([]map[string]interface{}, 0, len(conns))
for _, c := range conns {
var proto string
switch c.Family {
case syscall.AF_INET:
proto = "tcp4"
case syscall.AF_INET6:
proto = "tcp6"
default:
continue
}
fields := map[string]interface{}{
"protocol": proto,
"state": c.Status,
"pid": c.Pid,
"src": c.Laddr.IP,
"src_port": c.Laddr.Port,
"dest": c.Raddr.IP,
"dest_port": c.Raddr.Port,
}
fieldslist = append(fieldslist, fields)
}
return fieldslist, nil
}
func statsUDP(conns []gopsnet.ConnectionStat, _ uint8) ([]map[string]interface{}, error) {
if len(conns) == 0 {
return nil, nil
}
// Filter the responses via the inodes belonging to the process
fieldslist := make([]map[string]interface{}, 0, len(conns))
for _, c := range conns {
var proto string
switch c.Family {
case syscall.AF_INET:
proto = "udp4"
case syscall.AF_INET6:
proto = "udp6"
default:
continue
}
fields := map[string]interface{}{
"protocol": proto,
"state": c.Status,
"pid": c.Pid,
"src": c.Laddr.IP,
"src_port": c.Laddr.Port,
"dest": c.Raddr.IP,
"dest_port": c.Raddr.Port,
}
fieldslist = append(fieldslist, fields)
}
return fieldslist, nil
}
func statsUnix([]gopsnet.ConnectionStat) ([]map[string]interface{}, error) {
return nil, errors.ErrUnsupported
}

View file

@ -0,0 +1,160 @@
//go:build windows
package procstat
import (
"errors"
"fmt"
"syscall"
"unsafe"
gopsnet "github.com/shirou/gopsutil/v4/net"
gopsprocess "github.com/shirou/gopsutil/v4/process"
"golang.org/x/sys/windows"
"golang.org/x/sys/windows/svc/mgr"
)
func processName(p *gopsprocess.Process) (string, error) {
return p.Name()
}
func getService(name string) (*mgr.Service, error) {
m, err := mgr.Connect()
if err != nil {
return nil, err
}
defer m.Disconnect()
srv, err := m.OpenService(name)
if err != nil {
return nil, err
}
return srv, nil
}
func queryPidWithWinServiceName(winServiceName string) (uint32, error) {
srv, err := getService(winServiceName)
if err != nil {
return 0, err
}
var p *windows.SERVICE_STATUS_PROCESS
var bytesNeeded uint32
var buf []byte
err = windows.QueryServiceStatusEx(srv.Handle, windows.SC_STATUS_PROCESS_INFO, nil, 0, &bytesNeeded)
if !errors.Is(err, windows.ERROR_INSUFFICIENT_BUFFER) {
return 0, err
}
buf = make([]byte, bytesNeeded)
p = (*windows.SERVICE_STATUS_PROCESS)(unsafe.Pointer(&buf[0])) //nolint:gosec // G103: Valid use of unsafe call to create SERVICE_STATUS_PROCESS
if err := windows.QueryServiceStatusEx(srv.Handle, windows.SC_STATUS_PROCESS_INFO, &buf[0], uint32(len(buf)), &bytesNeeded); err != nil {
return 0, err
}
return p.ProcessId, nil
}
func collectMemmap(process, string, map[string]any) {}
func findBySystemdUnits([]string) ([]processGroup, error) {
return nil, nil
}
func findByWindowsServices(services []string) ([]processGroup, error) {
groups := make([]processGroup, 0, len(services))
for _, service := range services {
pid, err := queryPidWithWinServiceName(service)
if err != nil {
return nil, fmt.Errorf("failed to query PID of service %q: %w", service, err)
}
p, err := gopsprocess.NewProcess(int32(pid))
if err != nil {
return nil, fmt.Errorf("failed to find process for PID %d of service %q: %w", pid, service, err)
}
groups = append(groups, processGroup{
processes: []*gopsprocess.Process{p},
tags: map[string]string{"win_service": service},
})
}
return groups, nil
}
func collectTotalReadWrite(process) (r, w uint64, err error) {
return 0, 0, errors.ErrUnsupported
}
func statsTCP(conns []gopsnet.ConnectionStat, _ uint8) ([]map[string]interface{}, error) {
if len(conns) == 0 {
return nil, nil
}
// Filter the responses via the inodes belonging to the process
fieldslist := make([]map[string]interface{}, 0, len(conns))
for _, c := range conns {
var proto string
switch c.Family {
case syscall.AF_INET:
proto = "tcp4"
case syscall.AF_INET6:
proto = "tcp6"
default:
continue
}
fields := map[string]interface{}{
"protocol": proto,
"state": c.Status,
"pid": c.Pid,
"src": c.Laddr.IP,
"src_port": c.Laddr.Port,
"dest": c.Raddr.IP,
"dest_port": c.Raddr.Port,
}
fieldslist = append(fieldslist, fields)
}
return fieldslist, nil
}
func statsUDP(conns []gopsnet.ConnectionStat, _ uint8) ([]map[string]interface{}, error) {
if len(conns) == 0 {
return nil, nil
}
// Filter the responses via the inodes belonging to the process
fieldslist := make([]map[string]interface{}, 0, len(conns))
for _, c := range conns {
var proto string
switch c.Family {
case syscall.AF_INET:
proto = "udp4"
case syscall.AF_INET6:
proto = "udp6"
default:
continue
}
fields := map[string]interface{}{
"protocol": proto,
"state": c.Status,
"pid": c.Pid,
"src": c.Laddr.IP,
"src_port": c.Laddr.Port,
"dest": c.Raddr.IP,
"dest_port": c.Raddr.Port,
}
fieldslist = append(fieldslist, fields)
}
return fieldslist, nil
}
func statsUnix([]gopsnet.ConnectionStat) ([]map[string]interface{}, error) {
return nil, nil
}

View file

@ -0,0 +1,85 @@
package procstat
import (
"fmt"
"os"
"os/exec"
"strconv"
"strings"
"github.com/influxdata/telegraf/internal"
)
// Implementation of PIDGatherer that execs pgrep to find processes
type pgrep struct {
path string
}
func newPgrepFinder() (pidFinder, error) {
path, err := exec.LookPath("pgrep")
if err != nil {
return nil, fmt.Errorf("could not find pgrep binary: %w", err)
}
return &pgrep{path}, nil
}
func (*pgrep) pidFile(path string) ([]pid, error) {
var pids []pid
pidString, err := os.ReadFile(path)
if err != nil {
return pids, fmt.Errorf("failed to read pidfile %q: %w",
path, err)
}
processID, err := strconv.ParseInt(strings.TrimSpace(string(pidString)), 10, 32)
if err != nil {
return pids, err
}
pids = append(pids, pid(processID))
return pids, nil
}
func (pg *pgrep) pattern(pattern string) ([]pid, error) {
args := []string{pattern}
return pg.find(args)
}
func (pg *pgrep) uid(user string) ([]pid, error) {
args := []string{"-u", user}
return pg.find(args)
}
func (pg *pgrep) fullPattern(pattern string) ([]pid, error) {
args := []string{"-f", pattern}
return pg.find(args)
}
func (pg *pgrep) children(pid pid) ([]pid, error) {
args := []string{"-P", strconv.FormatInt(int64(pid), 10)}
return pg.find(args)
}
func (pg *pgrep) find(args []string) ([]pid, error) {
// Execute pgrep with the given arguments
buf, err := exec.Command(pg.path, args...).Output()
if err != nil {
// Exit code 1 means "no processes found" so we should not return
// an error in this case.
if status, _ := internal.ExitStatus(err); status == 1 {
return nil, nil
}
return nil, fmt.Errorf("error running %q: %w", pg.path, err)
}
out := string(buf)
// Parse the command output to extract the PIDs
fields := strings.Fields(out)
pids := make([]pid, 0, len(fields))
for _, field := range fields {
processID, err := strconv.ParseInt(field, 10, 32)
if err != nil {
return nil, err
}
pids = append(pids, pid(processID))
}
return pids, nil
}

View file

@ -0,0 +1,380 @@
package procstat
import (
"errors"
"fmt"
"runtime"
"strconv"
"syscall"
"time"
gopsnet "github.com/shirou/gopsutil/v4/net"
gopsprocess "github.com/shirou/gopsutil/v4/process"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/metric"
)
type process interface {
Name() (string, error)
MemoryMaps(bool) (*[]gopsprocess.MemoryMapsStat, error)
pid() pid
setTag(string, string)
metrics(string, *collectionConfig, time.Time) ([]telegraf.Metric, error)
}
type pidFinder interface {
pidFile(path string) ([]pid, error)
pattern(pattern string) ([]pid, error)
uid(user string) ([]pid, error)
fullPattern(path string) ([]pid, error)
children(pid pid) ([]pid, error)
}
type proc struct {
hasCPUTimes bool
tags map[string]string
*gopsprocess.Process
}
func newProc(pid pid) (process, error) {
p, err := gopsprocess.NewProcess(int32(pid))
if err != nil {
return nil, err
}
proc := &proc{
Process: p,
hasCPUTimes: false,
tags: make(map[string]string),
}
return proc, nil
}
func (p *proc) pid() pid {
return pid(p.Process.Pid)
}
func (p *proc) setTag(k, v string) {
p.tags[k] = v
}
func (p *proc) percent(_ time.Duration) (float64, error) {
cpuPerc, err := p.Process.Percent(time.Duration(0))
if !p.hasCPUTimes && err == nil {
p.hasCPUTimes = true
return 0, errors.New("must call Percent twice to compute percent cpu")
}
return cpuPerc, err
}
// Add metrics a single process
func (p *proc) metrics(prefix string, cfg *collectionConfig, t time.Time) ([]telegraf.Metric, error) {
if prefix != "" {
prefix += "_"
}
fields := make(map[string]interface{})
numThreads, err := p.NumThreads()
if err == nil {
fields[prefix+"num_threads"] = numThreads
}
fds, err := p.NumFDs()
if err == nil {
fields[prefix+"num_fds"] = fds
}
ctx, err := p.NumCtxSwitches()
if err == nil {
fields[prefix+"voluntary_context_switches"] = ctx.Voluntary
fields[prefix+"involuntary_context_switches"] = ctx.Involuntary
}
faults, err := p.PageFaults()
if err == nil {
fields[prefix+"minor_faults"] = faults.MinorFaults
fields[prefix+"major_faults"] = faults.MajorFaults
fields[prefix+"child_minor_faults"] = faults.ChildMinorFaults
fields[prefix+"child_major_faults"] = faults.ChildMajorFaults
}
io, err := p.IOCounters()
if err == nil {
fields[prefix+"read_count"] = io.ReadCount
fields[prefix+"write_count"] = io.WriteCount
fields[prefix+"read_bytes"] = io.ReadBytes
fields[prefix+"write_bytes"] = io.WriteBytes
}
// Linux fixup for gopsutils exposing the disk-only-IO instead of the total
// I/O as for example on Windows
if rc, wc, err := collectTotalReadWrite(p); err == nil {
fields[prefix+"read_bytes"] = rc
fields[prefix+"write_bytes"] = wc
fields[prefix+"disk_read_bytes"] = io.ReadBytes
fields[prefix+"disk_write_bytes"] = io.WriteBytes
}
createdAt, err := p.CreateTime() // returns epoch in ms
if err == nil {
fields[prefix+"created_at"] = createdAt * 1000000 // ms to ns
}
if cfg.features["cpu"] {
cpuTime, err := p.Times()
if err == nil {
fields[prefix+"cpu_time_user"] = cpuTime.User
fields[prefix+"cpu_time_system"] = cpuTime.System
fields[prefix+"cpu_time_iowait"] = cpuTime.Iowait // only reported on Linux
}
cpuPerc, err := p.percent(time.Duration(0))
if err == nil {
if cfg.solarisMode {
fields[prefix+"cpu_usage"] = cpuPerc / float64(runtime.NumCPU())
} else {
fields[prefix+"cpu_usage"] = cpuPerc
}
}
}
// This only returns values for RSS and VMS
if cfg.features["memory"] {
mem, err := p.MemoryInfo()
if err == nil {
fields[prefix+"memory_rss"] = mem.RSS
fields[prefix+"memory_vms"] = mem.VMS
}
memPerc, err := p.MemoryPercent()
if err == nil {
fields[prefix+"memory_usage"] = memPerc
}
}
if cfg.features["mmap"] {
collectMemmap(p, prefix, fields)
}
if cfg.features["limits"] {
rlims, err := p.RlimitUsage(true)
if err == nil {
for _, rlim := range rlims {
var name string
switch rlim.Resource {
case gopsprocess.RLIMIT_CPU:
name = "cpu_time"
case gopsprocess.RLIMIT_DATA:
name = "memory_data"
case gopsprocess.RLIMIT_STACK:
name = "memory_stack"
case gopsprocess.RLIMIT_RSS:
name = "memory_rss"
case gopsprocess.RLIMIT_NOFILE:
name = "num_fds"
case gopsprocess.RLIMIT_MEMLOCK:
name = "memory_locked"
case gopsprocess.RLIMIT_AS:
name = "memory_vms"
case gopsprocess.RLIMIT_LOCKS:
name = "file_locks"
case gopsprocess.RLIMIT_SIGPENDING:
name = "signals_pending"
case gopsprocess.RLIMIT_NICE:
name = "nice_priority"
case gopsprocess.RLIMIT_RTPRIO:
name = "realtime_priority"
default:
continue
}
fields[prefix+"rlimit_"+name+"_soft"] = rlim.Soft
fields[prefix+"rlimit_"+name+"_hard"] = rlim.Hard
if name != "file_locks" { // gopsutil doesn't currently track the used file locks count
fields[prefix+name] = rlim.Used
}
}
}
}
// Add the tags as requested by the user
cmdline, err := p.Cmdline()
if err == nil {
if cfg.tagging["cmdline"] {
p.tags["cmdline"] = cmdline
} else {
fields[prefix+"cmdline"] = cmdline
}
}
if cfg.tagging["pid"] {
p.tags["pid"] = strconv.Itoa(int(p.Pid))
} else {
fields["pid"] = p.Pid
}
ppid, err := p.Ppid()
if err == nil {
if cfg.tagging["ppid"] {
p.tags["ppid"] = strconv.Itoa(int(ppid))
} else {
fields[prefix+"ppid"] = ppid
}
}
status, err := p.Status()
if err == nil {
if cfg.tagging["status"] {
p.tags["status"] = status[0]
} else {
fields[prefix+"status"] = status[0]
}
}
user, err := p.Username()
if err == nil {
if cfg.tagging["user"] {
p.tags["user"] = user
} else {
fields[prefix+"user"] = user
}
}
if _, exists := p.tags["process_name"]; !exists {
name, err := p.Name()
if err == nil {
p.tags["process_name"] = name
}
}
metrics := []telegraf.Metric{metric.New("procstat", p.tags, fields, t)}
// Collect the socket statistics if requested
if cfg.features["sockets"] {
for _, protocol := range cfg.socketProtos {
// Get the requested connections for the PID
var fieldlist []map[string]interface{}
switch protocol {
case "all":
conns, err := gopsnet.ConnectionsPid(protocol, p.Pid)
if err != nil {
return metrics, fmt.Errorf("cannot get connections for %q of PID %d", protocol, p.Pid)
}
var connsTCPv4, connsTCPv6, connsUDPv4, connsUDPv6, connsUnix []gopsnet.ConnectionStat
for _, c := range conns {
switch {
case c.Family == syscall.AF_INET && c.Type == syscall.SOCK_STREAM:
connsTCPv4 = append(connsTCPv4, c)
case c.Family == syscall.AF_INET6 && c.Type == syscall.SOCK_STREAM:
connsTCPv6 = append(connsTCPv6, c)
case c.Family == syscall.AF_INET && c.Type == syscall.SOCK_DGRAM:
connsUDPv4 = append(connsUDPv4, c)
case c.Family == syscall.AF_INET6 && c.Type == syscall.SOCK_DGRAM:
connsUDPv6 = append(connsUDPv6, c)
case c.Family == syscall.AF_UNIX:
connsUnix = append(connsUnix, c)
}
}
fl, err := statsTCP(connsTCPv4, syscall.AF_INET)
if err != nil {
return metrics, fmt.Errorf("cannot get statistics for \"tcp4\" of PID %d", p.Pid)
}
fieldlist = append(fieldlist, fl...)
fl, err = statsTCP(connsTCPv6, syscall.AF_INET6)
if err != nil {
return metrics, fmt.Errorf("cannot get statistics for \"tcp6\" of PID %d", p.Pid)
}
fieldlist = append(fieldlist, fl...)
fl, err = statsUDP(connsUDPv4, syscall.AF_INET)
if err != nil {
return metrics, fmt.Errorf("cannot get statistics for \"udp4\" of PID %d", p.Pid)
}
fieldlist = append(fieldlist, fl...)
fl, err = statsUDP(connsUDPv6, syscall.AF_INET6)
if err != nil {
return metrics, fmt.Errorf("cannot get statistics for \"udp6\" of PID %d", p.Pid)
}
fieldlist = append(fieldlist, fl...)
fl, err = statsUnix(connsUnix)
if err != nil {
return metrics, fmt.Errorf("cannot get statistics for \"unix\" of PID %d", p.Pid)
}
fieldlist = append(fieldlist, fl...)
case "tcp4", "tcp6":
family := uint8(syscall.AF_INET)
if protocol == "tcp6" {
family = syscall.AF_INET6
}
conns, err := gopsnet.ConnectionsPid(protocol, p.Pid)
if err != nil {
return metrics, fmt.Errorf("cannot get connections for %q of PID %d", protocol, p.Pid)
}
if fieldlist, err = statsTCP(conns, family); err != nil {
return metrics, fmt.Errorf("cannot get statistics for %q of PID %d", protocol, p.Pid)
}
case "udp4", "udp6":
family := uint8(syscall.AF_INET)
if protocol == "udp6" {
family = syscall.AF_INET6
}
conns, err := gopsnet.ConnectionsPid(protocol, p.Pid)
if err != nil {
return metrics, fmt.Errorf("cannot get connections for %q of PID %d", protocol, p.Pid)
}
if fieldlist, err = statsUDP(conns, family); err != nil {
return metrics, fmt.Errorf("cannot get statistics for %q of PID %d", protocol, p.Pid)
}
case "unix":
conns, err := gopsnet.ConnectionsPid(protocol, p.Pid)
if err != nil {
return metrics, fmt.Errorf("cannot get connections for %q of PID %d", protocol, p.Pid)
}
if fieldlist, err = statsUnix(conns); err != nil {
return metrics, fmt.Errorf("cannot get statistics for %q of PID %d", protocol, p.Pid)
}
}
for _, fields := range fieldlist {
if cfg.tagging["protocol"] {
p.tags["protocol"] = fields["protocol"].(string)
delete(fields, "protocol")
}
if cfg.tagging["state"] {
p.tags["state"] = fields["state"].(string)
delete(fields, "state")
}
if cfg.tagging["src"] && fields["src"] != nil {
p.tags["src"] = fields["src"].(string)
delete(fields, "src")
}
if cfg.tagging["src_port"] && fields["src_port"] != nil {
port := uint64(fields["src_port"].(uint16))
p.tags["src_port"] = strconv.FormatUint(port, 10)
delete(fields, "src_port")
}
if cfg.tagging["dest"] && fields["dest"] != nil {
p.tags["dest"] = fields["dest"].(string)
delete(fields, "dest")
}
if cfg.tagging["dest_port"] && fields["dest_port"] != nil {
port := uint64(fields["dest_port"].(uint16))
p.tags["dest_port"] = strconv.FormatUint(port, 10)
delete(fields, "dest_port")
}
if cfg.tagging["name"] && fields["name"] != nil {
p.tags["name"] = fields["name"].(string)
delete(fields, "name")
}
metrics = append(metrics, metric.New("procstat_socket", p.tags, fields, t))
}
}
}
return metrics, nil
}

View file

@ -0,0 +1,711 @@
//go:generate ../../../tools/readme_config_includer/generator
package procstat
import (
"bytes"
_ "embed"
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
"runtime"
"slices"
"strconv"
"strings"
"time"
gopsprocess "github.com/shirou/gopsutil/v4/process"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal/choice"
"github.com/influxdata/telegraf/plugins/inputs"
)
//go:embed sample.conf
var sampleConfig string
// execCommand is so tests can mock out exec.Command usage.
var execCommand = exec.Command
type pid int32
type Procstat struct {
PidFinder string `toml:"pid_finder"`
PidFile string `toml:"pid_file"`
Exe string `toml:"exe"`
Pattern string `toml:"pattern"`
Prefix string `toml:"prefix"`
CmdLineTag bool `toml:"cmdline_tag" deprecated:"1.29.0;1.40.0;use 'tag_with' instead"`
ProcessName string `toml:"process_name"`
User string `toml:"user"`
SystemdUnit string `toml:"systemd_unit"`
SupervisorUnit []string `toml:"supervisor_unit" deprecated:"1.29.0;1.40.0;use 'supervisor_units' instead"`
SupervisorUnits []string `toml:"supervisor_units"`
IncludeSystemdChildren bool `toml:"include_systemd_children"`
CGroup string `toml:"cgroup"`
PidTag bool `toml:"pid_tag" deprecated:"1.29.0;1.40.0;use 'tag_with' instead"`
WinService string `toml:"win_service"`
Mode string `toml:"mode"`
Properties []string `toml:"properties"`
SocketProtocols []string `toml:"socket_protocols"`
TagWith []string `toml:"tag_with"`
Filter []filter `toml:"filter"`
Log telegraf.Logger `toml:"-"`
finder pidFinder
processes map[pid]process
cfg collectionConfig
oldMode bool
createProcess func(pid) (process, error)
}
type collectionConfig struct {
solarisMode bool
tagging map[string]bool
features map[string]bool
socketProtos []string
}
type pidsTags struct {
PIDs []pid
Tags map[string]string
}
type processGroup struct {
processes []*gopsprocess.Process
tags map[string]string
level int
}
func (*Procstat) SampleConfig() string {
return sampleConfig
}
func (p *Procstat) Init() error {
// Keep the old settings for compatibility
if p.PidTag && !choice.Contains("pid", p.TagWith) {
p.TagWith = append(p.TagWith, "pid")
}
if p.CmdLineTag && !choice.Contains("cmdline", p.TagWith) {
p.TagWith = append(p.TagWith, "cmdline")
}
// Configure metric collection features
p.cfg.solarisMode = strings.EqualFold(p.Mode, "solaris")
// Convert tagging settings
p.cfg.tagging = make(map[string]bool, len(p.TagWith))
for _, tag := range p.TagWith {
switch tag {
case "cmdline", "pid", "ppid", "status", "user", "child_level", "parent_pid", "level":
case "protocol", "state", "src", "src_port", "dest", "dest_port", "name": // socket only
if !slices.Contains(p.Properties, "sockets") {
return fmt.Errorf("socket tagging option %q specified without sockets enabled", tag)
}
default:
return fmt.Errorf("invalid 'tag_with' setting %q", tag)
}
p.cfg.tagging[tag] = true
}
// Convert collection properties
p.cfg.features = make(map[string]bool, len(p.Properties))
for _, prop := range p.Properties {
switch prop {
case "cpu", "limits", "memory", "mmap":
case "sockets":
if len(p.SocketProtocols) == 0 {
p.SocketProtocols = []string{"all"}
}
protos := make(map[string]bool, len(p.SocketProtocols))
for _, proto := range p.SocketProtocols {
switch proto {
case "all":
if len(protos) > 0 || len(p.SocketProtocols) > 1 {
return errors.New("additional 'socket_protocol' settings besides 'all' are not allowed")
}
case "tcp4", "tcp6", "udp4", "udp6", "unix":
default:
return fmt.Errorf("invalid 'socket_protocol' setting %q", proto)
}
if protos[proto] {
return fmt.Errorf("duplicate %q in 'socket_protocol' setting", proto)
}
protos[proto] = true
p.cfg.socketProtos = append(p.cfg.socketProtos, proto)
}
default:
return fmt.Errorf("invalid 'properties' setting %q", prop)
}
p.cfg.features[prop] = true
}
// Check if we got any new-style configuration options and determine
// operation mode.
p.oldMode = len(p.Filter) == 0
if p.oldMode {
// Keep the old settings for compatibility
for _, u := range p.SupervisorUnit {
if !choice.Contains(u, p.SupervisorUnits) {
p.SupervisorUnits = append(p.SupervisorUnits, u)
}
}
// Check filtering
switch {
case len(p.SupervisorUnits) > 0, p.SystemdUnit != "", p.WinService != "",
p.CGroup != "", p.PidFile != "", p.Exe != "", p.Pattern != "",
p.User != "":
// Do nothing as those are valid settings
default:
return errors.New("require filter option but none set")
}
// Instantiate the finder
switch p.PidFinder {
case "", "pgrep":
p.PidFinder = "pgrep"
finder, err := newPgrepFinder()
if err != nil {
return fmt.Errorf("creating pgrep finder failed: %w", err)
}
p.finder = finder
case "native":
// gopsutil relies on pgrep when looking up children on darwin
// see https://github.com/shirou/gopsutil/blob/v3.23.10/process/process_darwin.go#L235
requiresChildren := len(p.SupervisorUnits) > 0 && p.Pattern != ""
if requiresChildren && runtime.GOOS == "darwin" {
return errors.New("configuration requires 'pgrep' finder on your OS")
}
p.finder = &NativeFinder{}
case "test":
p.Log.Warn("running in test mode")
default:
return fmt.Errorf("unknown pid_finder %q", p.PidFinder)
}
} else {
// Check for mixed mode
switch {
case p.PidFile != "", p.Exe != "", p.Pattern != "", p.User != "",
p.SystemdUnit != "", len(p.SupervisorUnit) > 0,
len(p.SupervisorUnits) > 0, p.CGroup != "", p.WinService != "":
return errors.New("cannot operate in mixed mode with filters and old-style config")
}
// New-style operations
for i := range p.Filter {
p.Filter[i].Log = p.Log
if err := p.Filter[i].init(); err != nil {
return fmt.Errorf("initializing filter %d failed: %w", i, err)
}
}
}
// Initialize the running process cache
p.processes = make(map[pid]process)
return nil
}
func (p *Procstat) Gather(acc telegraf.Accumulator) error {
if p.oldMode {
return p.gatherOld(acc)
}
return p.gatherNew(acc)
}
func (p *Procstat) gatherOld(acc telegraf.Accumulator) error {
now := time.Now()
results, err := p.findPids()
if err != nil {
// Add lookup error-metric
fields := map[string]interface{}{
"pid_count": 0,
"running": 0,
"result_code": 1,
}
tags := map[string]string{
"pid_finder": p.PidFinder,
"result": "lookup_error",
}
for _, pidTag := range results {
for key, value := range pidTag.Tags {
tags[key] = value
}
}
acc.AddFields("procstat_lookup", fields, tags, now)
return err
}
var count int
running := make(map[pid]bool)
for _, r := range results {
if len(r.PIDs) < 1 && len(p.SupervisorUnits) > 0 {
continue
}
count += len(r.PIDs)
for _, pid := range r.PIDs {
// Check if the process is still running
proc, err := p.createProcess(pid)
if err != nil {
// No problem; process may have ended after we found it or it
// might be delivered from a non-checking source like a PID file
// of a dead process.
continue
}
// Use the cached processes as we need the existing instances
// to compute delta-metrics (e.g. cpu-usage).
if cached, found := p.processes[pid]; found {
proc = cached
} else {
// We've found a process that was not recorded before so add it
// to the list of processes
//nolint:errcheck // Assumption: if a process has no name, it probably does not exist
if name, _ := proc.Name(); name == "" {
continue
}
// Add initial tags
for k, v := range r.Tags {
proc.setTag(k, v)
}
if p.ProcessName != "" {
proc.setTag("process_name", p.ProcessName)
}
p.processes[pid] = proc
}
running[pid] = true
metrics, err := proc.metrics(p.Prefix, &p.cfg, now)
if err != nil {
// Continue after logging an error as there might still be
// metrics available
acc.AddError(err)
}
for _, m := range metrics {
acc.AddMetric(m)
}
}
}
// Cleanup processes that are not running anymore
for pid := range p.processes {
if !running[pid] {
delete(p.processes, pid)
}
}
// Add lookup statistics-metric
fields := map[string]interface{}{
"pid_count": count,
"running": len(running),
"result_code": 0,
}
tags := map[string]string{
"pid_finder": p.PidFinder,
"result": "success",
}
for _, pidTag := range results {
for key, value := range pidTag.Tags {
tags[key] = value
}
}
if len(p.SupervisorUnits) > 0 {
tags["supervisor_unit"] = strings.Join(p.SupervisorUnits, ";")
}
acc.AddFields("procstat_lookup", fields, tags, now)
return nil
}
func (p *Procstat) gatherNew(acc telegraf.Accumulator) error {
now := time.Now()
running := make(map[pid]bool)
for _, f := range p.Filter {
groups, err := f.applyFilter()
if err != nil {
// Add lookup error-metric
acc.AddFields(
"procstat_lookup",
map[string]interface{}{
"pid_count": 0,
"running": 0,
"result_code": 1,
},
map[string]string{
"filter": f.Name,
"result": "lookup_error",
},
now,
)
acc.AddError(fmt.Errorf("applying filter %q failed: %w", f.Name, err))
continue
}
var count int
for _, g := range groups {
count += len(g.processes)
level := strconv.Itoa(g.level)
for _, gp := range g.processes {
// Skip over non-running processes
if running, err := gp.IsRunning(); err != nil || !running {
continue
}
// Use the cached processes as we need the existing instances
// to compute delta-metrics (e.g. cpu-usage).
pid := pid(gp.Pid)
process, found := p.processes[pid]
if !found {
//nolint:errcheck // Assumption: if a process has no name, it probably does not exist
if name, _ := gp.Name(); name == "" {
continue
}
// We've found a process that was not recorded before so add it
// to the list of processes
tags := make(map[string]string, len(g.tags)+1)
for k, v := range g.tags {
tags[k] = v
}
if p.ProcessName != "" {
process.setTag("process_name", p.ProcessName)
}
tags["filter"] = f.Name
if p.cfg.tagging["level"] {
tags["level"] = level
}
process = &proc{
Process: gp,
hasCPUTimes: false,
tags: tags,
}
p.processes[pid] = process
}
running[pid] = true
metrics, err := process.metrics(p.Prefix, &p.cfg, now)
if err != nil {
// Continue after logging an error as there might still be
// metrics available
acc.AddError(err)
}
for _, m := range metrics {
acc.AddMetric(m)
}
}
if p.cfg.tagging["level"] {
// Add lookup statistics-metric
acc.AddFields(
"procstat_lookup",
map[string]interface{}{
"pid_count": len(g.processes),
"running": len(running),
"result_code": 0,
"level": g.level,
},
map[string]string{
"filter": f.Name,
"result": "success",
},
now,
)
}
}
// Add lookup statistics-metric
acc.AddFields(
"procstat_lookup",
map[string]interface{}{
"pid_count": count,
"running": len(running),
"result_code": 0,
},
map[string]string{
"filter": f.Name,
"result": "success",
},
now,
)
}
// Cleanup processes that are not running anymore across all filters/groups
for pid := range p.processes {
if !running[pid] {
delete(p.processes, pid)
}
}
return nil
}
// Get matching PIDs and their initial tags
func (p *Procstat) findPids() ([]pidsTags, error) {
switch {
case len(p.SupervisorUnits) > 0:
return p.findSupervisorUnits()
case p.SystemdUnit != "":
return p.systemdUnitPIDs()
case p.WinService != "":
pids, err := p.winServicePIDs()
if err != nil {
return nil, err
}
tags := map[string]string{"win_service": p.WinService}
return []pidsTags{{pids, tags}}, nil
case p.CGroup != "":
return p.cgroupPIDs()
case p.PidFile != "":
pids, err := p.finder.pidFile(p.PidFile)
if err != nil {
return nil, err
}
tags := map[string]string{"pidfile": p.PidFile}
return []pidsTags{{pids, tags}}, nil
case p.Exe != "":
pids, err := p.finder.pattern(p.Exe)
if err != nil {
return nil, err
}
tags := map[string]string{"exe": p.Exe}
return []pidsTags{{pids, tags}}, nil
case p.Pattern != "":
pids, err := p.finder.fullPattern(p.Pattern)
if err != nil {
return nil, err
}
tags := map[string]string{"pattern": p.Pattern}
return []pidsTags{{pids, tags}}, nil
case p.User != "":
pids, err := p.finder.uid(p.User)
if err != nil {
return nil, err
}
tags := map[string]string{"user": p.User}
return []pidsTags{{pids, tags}}, nil
}
return nil, errors.New("no filter option set")
}
func (p *Procstat) findSupervisorUnits() ([]pidsTags, error) {
groups, groupsTags, err := p.supervisorPIDs()
if err != nil {
return nil, fmt.Errorf("getting supervisor PIDs failed: %w", err)
}
// According to the PID, find the system process number and get the child processes
pidTags := make([]pidsTags, 0, len(groups))
for _, group := range groups {
grppid := groupsTags[group]["pid"]
if grppid == "" {
pidTags = append(pidTags, pidsTags{nil, groupsTags[group]})
continue
}
processID, err := strconv.ParseInt(grppid, 10, 32)
if err != nil {
return nil, fmt.Errorf("converting PID %q failed: %w", grppid, err)
}
// Get all children of the supervisor unit
pids, err := p.finder.children(pid(processID))
if err != nil {
return nil, fmt.Errorf("getting children for %d failed: %w", processID, err)
}
tags := map[string]string{"pattern": p.Pattern, "parent_pid": p.Pattern}
// Handle situations where the PID does not exist
if len(pids) == 0 {
continue
}
// Merge tags map
for k, v := range groupsTags[group] {
_, ok := tags[k]
if !ok {
tags[k] = v
}
}
// Remove duplicate pid tags
delete(tags, "pid")
pidTags = append(pidTags, pidsTags{pids, tags})
}
return pidTags, nil
}
func (p *Procstat) supervisorPIDs() ([]string, map[string]map[string]string, error) {
out, err := execCommand("supervisorctl", "status", strings.Join(p.SupervisorUnits, " ")).Output()
if err != nil {
if !strings.Contains(err.Error(), "exit status 3") {
return nil, nil, err
}
}
lines := strings.Split(string(out), "\n")
// Get the PID, running status, running time and boot time of the main process:
// pid 11779, uptime 17:41:16
// Exited too quickly (process log may have details)
mainPids := make(map[string]map[string]string)
for _, line := range lines {
if line == "" {
continue
}
kv := strings.Fields(line)
if len(kv) < 2 {
// Not a key-value pair
continue
}
name := kv[0]
statusMap := map[string]string{
"supervisor_unit": name,
"status": kv[1],
}
switch kv[1] {
case "FATAL", "EXITED", "BACKOFF", "STOPPING":
statusMap["error"] = strings.Join(kv[2:], " ")
case "RUNNING":
statusMap["pid"] = strings.ReplaceAll(kv[3], ",", "")
statusMap["uptimes"] = kv[5]
case "STOPPED", "UNKNOWN", "STARTING":
// No additional info
}
mainPids[name] = statusMap
}
return p.SupervisorUnits, mainPids, nil
}
func (p *Procstat) systemdUnitPIDs() ([]pidsTags, error) {
if p.IncludeSystemdChildren {
p.CGroup = "systemd/system.slice/" + p.SystemdUnit
return p.cgroupPIDs()
}
var pidTags []pidsTags
pids, err := p.simpleSystemdUnitPIDs()
if err != nil {
return nil, err
}
tags := map[string]string{"systemd_unit": p.SystemdUnit}
pidTags = append(pidTags, pidsTags{pids, tags})
return pidTags, nil
}
func (p *Procstat) simpleSystemdUnitPIDs() ([]pid, error) {
out, err := execCommand("systemctl", "show", p.SystemdUnit).Output()
if err != nil {
return nil, err
}
lines := bytes.Split(out, []byte{'\n'})
pids := make([]pid, 0, len(lines))
for _, line := range lines {
kv := bytes.SplitN(line, []byte{'='}, 2)
if len(kv) != 2 {
continue
}
if !bytes.Equal(kv[0], []byte("MainPID")) {
continue
}
if len(kv[1]) == 0 || bytes.Equal(kv[1], []byte("0")) {
return nil, nil
}
processID, err := strconv.ParseInt(string(kv[1]), 10, 32)
if err != nil {
return nil, fmt.Errorf("invalid pid %q", kv[1])
}
pids = append(pids, pid(processID))
}
return pids, nil
}
func (p *Procstat) cgroupPIDs() ([]pidsTags, error) {
procsPath := p.CGroup
if procsPath[0] != '/' {
procsPath = "/sys/fs/cgroup/" + procsPath
}
items, err := filepath.Glob(procsPath)
if err != nil {
return nil, fmt.Errorf("glob failed: %w", err)
}
pidTags := make([]pidsTags, 0, len(items))
for _, item := range items {
pids, err := singleCgroupPIDs(item)
if err != nil {
return nil, err
}
tags := map[string]string{"cgroup": p.CGroup, "cgroup_full": item}
pidTags = append(pidTags, pidsTags{pids, tags})
}
return pidTags, nil
}
func singleCgroupPIDs(path string) ([]pid, error) {
ok, err := isDir(path)
if err != nil {
return nil, err
}
if !ok {
return nil, fmt.Errorf("not a directory %s", path)
}
procsPath := filepath.Join(path, "cgroup.procs")
out, err := os.ReadFile(procsPath)
if err != nil {
return nil, err
}
lines := bytes.Split(out, []byte{'\n'})
pids := make([]pid, 0, len(lines))
for _, pidBS := range lines {
if len(pidBS) == 0 {
continue
}
processID, err := strconv.ParseInt(string(pidBS), 10, 32)
if err != nil {
return nil, fmt.Errorf("invalid pid %q", pidBS)
}
pids = append(pids, pid(processID))
}
return pids, nil
}
func isDir(path string) (bool, error) {
result, err := os.Stat(path)
if err != nil {
return false, err
}
return result.IsDir(), nil
}
func (p *Procstat) winServicePIDs() ([]pid, error) {
var pids []pid
processID, err := queryPidWithWinServiceName(p.WinService)
if err != nil {
return pids, err
}
pids = append(pids, pid(processID))
return pids, nil
}
func init() {
inputs.Add("procstat", func() telegraf.Input {
return &Procstat{
Properties: []string{"cpu", "memory", "mmap"},
createProcess: newProc,
}
})
}

View file

@ -0,0 +1,676 @@
package procstat
import (
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
"runtime"
"strconv"
"strings"
"testing"
"time"
gopsprocess "github.com/shirou/gopsutil/v4/process"
"github.com/stretchr/testify/require"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/metric"
"github.com/influxdata/telegraf/testutil"
)
func init() {
execCommand = mockExecCommand
}
func mockExecCommand(arg0 string, args ...string) *exec.Cmd {
args = append([]string{"-test.run=TestMockExecCommand", "--", arg0}, args...)
cmd := exec.Command(os.Args[0], args...)
cmd.Stderr = os.Stderr
return cmd
}
func TestMockExecCommand(_ *testing.T) {
var cmd []string //nolint:prealloc // Pre-allocated this slice would break the algorithm
for _, arg := range os.Args {
if arg == "--" {
cmd = make([]string, 0)
continue
}
if cmd == nil {
continue
}
cmd = append(cmd, arg)
}
if cmd == nil {
return
}
cmdline := strings.Join(cmd, " ")
if cmdline == "systemctl show TestGather_systemdUnitPIDs" {
fmt.Printf(`PIDFile=
GuessMainPID=yes
MainPID=11408
ControlPID=0
ExecMainPID=11408
`)
//nolint:revive // error code is important for this "test"
os.Exit(0)
}
if cmdline == "supervisorctl status TestGather_supervisorUnitPIDs" {
fmt.Printf(`TestGather_supervisorUnitPIDs RUNNING pid 7311, uptime 0:00:19
`)
//nolint:revive // error code is important for this "test"
os.Exit(0)
}
if cmdline == "supervisorctl status TestGather_STARTINGsupervisorUnitPIDs TestGather_FATALsupervisorUnitPIDs" {
fmt.Printf(`TestGather_FATALsupervisorUnitPIDs FATAL Exited too quickly (process log may have details)
TestGather_STARTINGsupervisorUnitPIDs STARTING`)
//nolint:revive // error code is important for this "test"
os.Exit(0)
}
fmt.Printf("command not found\n")
//nolint:revive // error code is important for this "test"
os.Exit(1)
}
type testPgrep struct {
pids []pid
err error
}
func newTestFinder(pids []pid) pidFinder {
return &testPgrep{
pids: pids,
err: nil,
}
}
func (pg *testPgrep) pidFile(_ string) ([]pid, error) {
return pg.pids, pg.err
}
func (pg *testPgrep) pattern(_ string) ([]pid, error) {
return pg.pids, pg.err
}
func (pg *testPgrep) uid(_ string) ([]pid, error) {
return pg.pids, pg.err
}
func (pg *testPgrep) fullPattern(_ string) ([]pid, error) {
return pg.pids, pg.err
}
func (pg *testPgrep) children(_ pid) ([]pid, error) {
pids := []pid{7311, 8111, 8112}
return pids, pg.err
}
type testProc struct {
procID pid
tags map[string]string
}
func newTestProc(pid pid) (process, error) {
proc := &testProc{
procID: pid,
tags: make(map[string]string),
}
return proc, nil
}
func (p *testProc) pid() pid {
return p.procID
}
func (*testProc) Name() (string, error) {
return "test_proc", nil
}
func (p *testProc) setTag(k, v string) {
p.tags[k] = v
}
func (*testProc) MemoryMaps(bool) (*[]gopsprocess.MemoryMapsStat, error) {
stats := make([]gopsprocess.MemoryMapsStat, 0)
return &stats, nil
}
func (p *testProc) metrics(prefix string, cfg *collectionConfig, t time.Time) ([]telegraf.Metric, error) {
if prefix != "" {
prefix += "_"
}
fields := map[string]interface{}{
prefix + "num_fds": int32(0),
prefix + "num_threads": int32(0),
prefix + "voluntary_context_switches": int64(0),
prefix + "involuntary_context_switches": int64(0),
prefix + "minor_faults": uint64(0),
prefix + "major_faults": uint64(0),
prefix + "child_major_faults": uint64(0),
prefix + "child_minor_faults": uint64(0),
prefix + "read_bytes": uint64(0),
prefix + "read_count": uint64(0),
prefix + "write_bytes": uint64(0),
prefix + "write_count": uint64(0),
prefix + "created_at": int64(0),
}
if cfg.features["cpu"] {
fields[prefix+"cpu_time_user"] = float64(0)
fields[prefix+"cpu_time_system"] = float64(0)
fields[prefix+"cpu_time_iowait"] = float64(0)
fields[prefix+"cpu_usage"] = float64(0)
}
if cfg.features["memory"] {
fields[prefix+"memory_rss"] = uint64(0)
fields[prefix+"memory_vms"] = uint64(0)
fields[prefix+"memory_usage"] = float32(0)
}
tags := map[string]string{
"process_name": "test_proc",
}
for k, v := range p.tags {
tags[k] = v
}
// Add the tags as requested by the user
if cfg.tagging["cmdline"] {
tags["cmdline"] = "test_proc"
} else {
fields[prefix+"cmdline"] = "test_proc"
}
if cfg.tagging["pid"] {
tags["pid"] = strconv.Itoa(int(p.procID))
} else {
fields["pid"] = int32(p.procID)
}
if cfg.tagging["ppid"] {
tags["ppid"] = "0"
} else {
fields[prefix+"ppid"] = int32(0)
}
if cfg.tagging["status"] {
tags["status"] = "running"
} else {
fields[prefix+"status"] = "running"
}
if cfg.tagging["user"] {
tags["user"] = "testuser"
} else {
fields[prefix+"user"] = "testuser"
}
return []telegraf.Metric{metric.New("procstat", tags, fields, t)}, nil
}
var processID = pid(42)
var exe = "foo"
func TestInitInvalidFinder(t *testing.T) {
plugin := Procstat{
PidFinder: "foo",
Properties: []string{"cpu", "memory", "mmap"},
Log: testutil.Logger{},
createProcess: newTestProc,
}
require.Error(t, plugin.Init())
}
func TestInitRequiresChildDarwin(t *testing.T) {
if runtime.GOOS != "darwin" {
t.Skip("Skipping test on non-darwin platform")
}
p := Procstat{
Pattern: "somepattern",
SupervisorUnits: []string{"a_unit"},
PidFinder: "native",
Properties: []string{"cpu", "memory", "mmap"},
Log: testutil.Logger{},
}
require.ErrorContains(t, p.Init(), "requires 'pgrep' finder")
}
func TestInitMissingPidMethod(t *testing.T) {
p := Procstat{
Properties: []string{"cpu", "memory", "mmap"},
Log: testutil.Logger{},
createProcess: newTestProc,
}
require.ErrorContains(t, p.Init(), "require filter option but none set")
}
func TestGather_CreateProcessErrorOk(t *testing.T) {
expected := []telegraf.Metric{
testutil.MustMetric(
"procstat_lookup",
map[string]string{
"exe": "foo",
"pid_finder": "test",
"result": "success",
},
map[string]interface{}{
"pid_count": int64(1),
"result_code": int64(0),
"running": int64(0),
},
time.Unix(0, 0),
telegraf.Untyped,
),
}
p := Procstat{
Exe: exe,
PidFinder: "test",
Properties: []string{"cpu", "memory", "mmap"},
Log: testutil.Logger{},
finder: newTestFinder([]pid{processID}),
createProcess: func(pid) (process, error) {
return nil, errors.New("createProcess error")
},
}
require.NoError(t, p.Init())
var acc testutil.Accumulator
require.NoError(t, p.Gather(&acc))
testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics(), testutil.IgnoreTime())
}
func TestGather_ProcessName(t *testing.T) {
expected := []telegraf.Metric{
testutil.MustMetric(
"procstat",
map[string]string{
"exe": "foo",
"process_name": "custom_name",
},
map[string]interface{}{
"child_major_faults": uint64(0),
"child_minor_faults": uint64(0),
"cmdline": "test_proc",
"cpu_time_iowait": float64(0),
"cpu_time_system": float64(0),
"cpu_time_user": float64(0),
"cpu_usage": float64(0),
"created_at": int64(0),
"involuntary_context_switches": int64(0),
"major_faults": uint64(0),
"memory_rss": uint64(0),
"memory_usage": float32(0),
"memory_vms": uint64(0),
"minor_faults": uint64(0),
"num_fds": int32(0),
"num_threads": int32(0),
"pid": int32(42),
"ppid": int32(0),
"read_bytes": uint64(0),
"read_count": uint64(0),
"status": "running",
"user": "testuser",
"voluntary_context_switches": int64(0),
"write_bytes": uint64(0),
"write_count": uint64(0),
},
time.Unix(0, 0),
telegraf.Untyped,
),
testutil.MustMetric(
"procstat_lookup",
map[string]string{
"exe": "foo",
"pid_finder": "test",
"result": "success",
},
map[string]interface{}{
"pid_count": int64(1),
"result_code": int64(0),
"running": int64(1),
},
time.Unix(0, 0),
telegraf.Untyped,
),
}
p := Procstat{
Exe: exe,
ProcessName: "custom_name",
PidFinder: "test",
Properties: []string{"cpu", "memory", "mmap"},
Log: testutil.Logger{},
finder: newTestFinder([]pid{processID}),
createProcess: newTestProc,
}
require.NoError(t, p.Init())
var acc testutil.Accumulator
require.NoError(t, p.Gather(&acc))
require.Equal(t, "custom_name", acc.TagValue("procstat", "process_name"))
testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics(), testutil.IgnoreTime())
}
func TestGather_NoProcessNameUsesReal(t *testing.T) {
processID := pid(os.Getpid())
p := Procstat{
Exe: exe,
PidFinder: "test",
Properties: []string{"cpu", "memory", "mmap"},
Log: testutil.Logger{},
finder: newTestFinder([]pid{processID}),
createProcess: newTestProc,
}
require.NoError(t, p.Init())
var acc testutil.Accumulator
require.NoError(t, p.Gather(&acc))
require.True(t, acc.HasTag("procstat", "process_name"))
}
func TestGather_NoPidTag(t *testing.T) {
p := Procstat{
Exe: exe,
PidFinder: "test",
Properties: []string{"cpu", "memory", "mmap"},
Log: testutil.Logger{},
finder: newTestFinder([]pid{processID}),
createProcess: newTestProc,
}
require.NoError(t, p.Init())
var acc testutil.Accumulator
require.NoError(t, p.Gather(&acc))
require.True(t, acc.HasInt64Field("procstat", "pid"))
require.False(t, acc.HasTag("procstat", "pid"))
}
func TestGather_PidTag(t *testing.T) {
p := Procstat{
Exe: exe,
PidTag: true,
PidFinder: "test",
Properties: []string{"cpu", "memory", "mmap"},
Log: testutil.Logger{},
finder: newTestFinder([]pid{processID}),
createProcess: newTestProc,
}
require.NoError(t, p.Init())
var acc testutil.Accumulator
require.NoError(t, p.Gather(&acc))
require.Equal(t, "42", acc.TagValue("procstat", "pid"))
require.False(t, acc.HasInt32Field("procstat", "pid"))
}
func TestGather_Prefix(t *testing.T) {
p := Procstat{
Exe: exe,
Prefix: "custom_prefix",
PidFinder: "test",
Properties: []string{"cpu", "memory", "mmap"},
Log: testutil.Logger{},
finder: newTestFinder([]pid{processID}),
createProcess: newTestProc,
}
require.NoError(t, p.Init())
var acc testutil.Accumulator
require.NoError(t, p.Gather(&acc))
require.True(t, acc.HasInt64Field("procstat", "custom_prefix_num_fds"))
}
func TestGather_Exe(t *testing.T) {
p := Procstat{
Exe: exe,
PidFinder: "test",
Properties: []string{"cpu", "memory", "mmap"},
Log: testutil.Logger{},
finder: newTestFinder([]pid{processID}),
createProcess: newTestProc,
}
require.NoError(t, p.Init())
var acc testutil.Accumulator
require.NoError(t, p.Gather(&acc))
require.Equal(t, exe, acc.TagValue("procstat", "exe"))
}
func TestGather_User(t *testing.T) {
user := "ada"
p := Procstat{
User: user,
PidFinder: "test",
Properties: []string{"cpu", "memory", "mmap"},
Log: testutil.Logger{},
finder: newTestFinder([]pid{processID}),
createProcess: newTestProc,
}
require.NoError(t, p.Init())
var acc testutil.Accumulator
require.NoError(t, p.Gather(&acc))
require.Equal(t, user, acc.TagValue("procstat", "user"))
}
func TestGather_Pattern(t *testing.T) {
pattern := "foo"
p := Procstat{
Pattern: pattern,
PidFinder: "test",
Properties: []string{"cpu", "memory", "mmap"},
Log: testutil.Logger{},
finder: newTestFinder([]pid{processID}),
createProcess: newTestProc,
}
require.NoError(t, p.Init())
var acc testutil.Accumulator
require.NoError(t, p.Gather(&acc))
require.Equal(t, pattern, acc.TagValue("procstat", "pattern"))
}
func TestGather_PidFile(t *testing.T) {
pidfile := "/path/to/pidfile"
p := Procstat{
PidFile: pidfile,
PidFinder: "test",
Properties: []string{"cpu", "memory", "mmap"},
Log: testutil.Logger{},
finder: newTestFinder([]pid{processID}),
createProcess: newTestProc,
}
require.NoError(t, p.Init())
var acc testutil.Accumulator
require.NoError(t, p.Gather(&acc))
require.Equal(t, pidfile, acc.TagValue("procstat", "pidfile"))
}
func TestGather_PercentFirstPass(t *testing.T) {
processID := pid(os.Getpid())
p := Procstat{
Pattern: "foo",
PidTag: true,
PidFinder: "test",
Properties: []string{"cpu", "memory", "mmap"},
Log: testutil.Logger{},
finder: newTestFinder([]pid{processID}),
createProcess: newProc,
}
require.NoError(t, p.Init())
var acc testutil.Accumulator
require.NoError(t, p.Gather(&acc))
require.True(t, acc.HasFloatField("procstat", "cpu_time_user"))
require.False(t, acc.HasFloatField("procstat", "cpu_usage"))
}
func TestGather_PercentSecondPass(t *testing.T) {
processID := pid(os.Getpid())
p := Procstat{
Pattern: "foo",
PidTag: true,
PidFinder: "test",
Properties: []string{"cpu", "memory", "mmap"},
Log: testutil.Logger{},
finder: newTestFinder([]pid{processID}),
createProcess: newProc,
}
require.NoError(t, p.Init())
var acc testutil.Accumulator
require.NoError(t, p.Gather(&acc))
require.NoError(t, p.Gather(&acc))
require.True(t, acc.HasFloatField("procstat", "cpu_time_user"))
require.True(t, acc.HasFloatField("procstat", "cpu_usage"))
}
func TestGather_systemdUnitPIDs(t *testing.T) {
p := Procstat{
SystemdUnit: "TestGather_systemdUnitPIDs",
PidFinder: "test",
Properties: []string{"cpu", "memory", "mmap"},
Log: testutil.Logger{},
finder: newTestFinder([]pid{processID}),
}
require.NoError(t, p.Init())
pidsTags, err := p.findPids()
require.NoError(t, err)
for _, pidsTag := range pidsTags {
require.Equal(t, []pid{11408}, pidsTag.PIDs)
require.Equal(t, "TestGather_systemdUnitPIDs", pidsTag.Tags["systemd_unit"])
}
}
func TestGather_cgroupPIDs(t *testing.T) {
// no cgroups in windows
if runtime.GOOS == "windows" {
t.Skip("no cgroups in windows")
}
td := t.TempDir()
err := os.WriteFile(filepath.Join(td, "cgroup.procs"), []byte("1234\n5678\n"), 0640)
require.NoError(t, err)
p := Procstat{
CGroup: td,
PidFinder: "test",
Properties: []string{"cpu", "memory", "mmap"},
Log: testutil.Logger{},
finder: newTestFinder([]pid{processID}),
}
require.NoError(t, p.Init())
pidsTags, err := p.findPids()
require.NoError(t, err)
for _, pidsTag := range pidsTags {
require.Equal(t, []pid{1234, 5678}, pidsTag.PIDs)
require.Equal(t, td, pidsTag.Tags["cgroup"])
}
}
func TestProcstatLookupMetric(t *testing.T) {
p := Procstat{
Exe: "-Gsys",
PidFinder: "test",
Properties: []string{"cpu", "memory", "mmap"},
Log: testutil.Logger{},
finder: newTestFinder([]pid{543}),
createProcess: newProc,
}
require.NoError(t, p.Init())
var acc testutil.Accumulator
require.NoError(t, p.Gather(&acc))
require.NotEmpty(t, acc.GetTelegrafMetrics())
}
func TestGather_SameTimestamps(t *testing.T) {
pidfile := "/path/to/pidfile"
p := Procstat{
PidFile: pidfile,
PidFinder: "test",
Properties: []string{"cpu", "memory", "mmap"},
Log: testutil.Logger{},
finder: newTestFinder([]pid{processID}),
createProcess: newTestProc,
}
require.NoError(t, p.Init())
var acc testutil.Accumulator
require.NoError(t, p.Gather(&acc))
procstat, _ := acc.Get("procstat")
procstatLookup, _ := acc.Get("procstat_lookup")
require.Equal(t, procstat.Time, procstatLookup.Time)
}
func TestGather_supervisorUnitPIDs(t *testing.T) {
p := Procstat{
SupervisorUnits: []string{"TestGather_supervisorUnitPIDs"},
PidFinder: "test",
Properties: []string{"cpu", "memory", "mmap"},
Log: testutil.Logger{},
finder: newTestFinder([]pid{processID}),
}
require.NoError(t, p.Init())
pidsTags, err := p.findPids()
require.NoError(t, err)
for _, pidsTag := range pidsTags {
require.Equal(t, []pid{7311, 8111, 8112}, pidsTag.PIDs)
require.Equal(t, "TestGather_supervisorUnitPIDs", pidsTag.Tags["supervisor_unit"])
}
}
func TestGather_MoresupervisorUnitPIDs(t *testing.T) {
p := Procstat{
SupervisorUnits: []string{"TestGather_STARTINGsupervisorUnitPIDs", "TestGather_FATALsupervisorUnitPIDs"},
PidFinder: "test",
Properties: []string{"cpu", "memory", "mmap"},
Log: testutil.Logger{},
finder: newTestFinder([]pid{processID}),
}
require.NoError(t, p.Init())
pidsTags, err := p.findPids()
require.NoError(t, err)
for _, pidsTag := range pidsTags {
require.Empty(t, pidsTag.PIDs)
switch pidsTag.Tags["supervisor_unit"] {
case "TestGather_STARTINGsupervisorUnitPIDs":
require.Equal(t, "STARTING", pidsTag.Tags["status"])
case "TestGather_FATALsupervisorUnitPIDs":
require.Equal(t, "FATAL", pidsTag.Tags["status"])
require.Equal(t, "Exited too quickly (process log may have details)", pidsTag.Tags["error"])
default:
t.Fatalf("unexpected value for tag 'supervisor_unit': %q", pidsTag.Tags["supervisor_unit"])
}
}
}

View file

@ -0,0 +1,106 @@
# Monitor process cpu and memory usage
[[inputs.procstat]]
## PID file to monitor process
pid_file = "/var/run/nginx.pid"
## executable name (ie, pgrep <exe>)
# exe = "nginx"
## pattern as argument for pgrep (ie, pgrep -f <pattern>)
# pattern = "nginx"
## user as argument for pgrep (ie, pgrep -u <user>)
# user = "nginx"
## Systemd unit name, supports globs when include_systemd_children is set to true
# systemd_unit = "nginx.service"
# include_systemd_children = false
## CGroup name or path, supports globs
# cgroup = "systemd/system.slice/nginx.service"
## Supervisor service names of hypervisorctl management
# supervisor_units = ["webserver", "proxy"]
## Windows service name
# win_service = ""
## override for process_name
## This is optional; default is sourced from /proc/<pid>/status
# process_name = "bar"
## Field name prefix
# prefix = ""
## Mode to use when calculating CPU usage. Can be one of 'solaris' or 'irix'.
# mode = "irix"
## Add the given information tag instead of a field
## This allows to create unique metrics/series when collecting processes with
## otherwise identical tags. However, please be careful as this can easily
## result in a large number of series, especially with short-lived processes,
## creating high cardinality at the output.
## Available options are:
## cmdline -- full commandline
## pid -- ID of the process
## ppid -- ID of the process' parent
## status -- state of the process
## user -- username owning the process
## socket only options:
## protocol -- protocol type of the process socket
## state -- state of the process socket
## src -- source address of the process socket (non-unix sockets)
## src_port -- source port of the process socket (non-unix sockets)
## dest -- destination address of the process socket (non-unix sockets)
## dest_port -- destination port of the process socket (non-unix sockets)
## name -- name of the process socket (unix sockets only)
## Available for procstat_lookup:
## level -- level of the process filtering
# tag_with = []
## Properties to collect
## Available options are
## cpu -- CPU usage statistics
## limits -- set resource limits
## memory -- memory usage statistics
## mmap -- mapped memory usage statistics (caution: can cause high load)
## sockets -- socket statistics for protocols in 'socket_protocols'
# properties = ["cpu", "limits", "memory", "mmap"]
## Protocol filter for the sockets property
## Available options are
## all -- all of the protocols below
## tcp4 -- TCP socket statistics for IPv4
## tcp6 -- TCP socket statistics for IPv6
## udp4 -- UDP socket statistics for IPv4
## udp6 -- UDP socket statistics for IPv6
## unix -- Unix socket statistics
# socket_protocols = ["all"]
## Method to use when finding process IDs. Can be one of 'pgrep', or
## 'native'. The pgrep finder calls the pgrep executable in the PATH while
## the native finder performs the search directly in a manor dependent on the
## platform. Default is 'pgrep'
# pid_finder = "pgrep"
## New-style filtering configuration (multiple filter sections are allowed)
# [[inputs.procstat.filter]]
# ## Name of the filter added as 'filter' tag
# name = "shell"
#
# ## Service filters, only one is allowed
# ## Systemd unit names (wildcards are supported)
# # systemd_units = []
# ## CGroup name or path (wildcards are supported)
# # cgroups = []
# ## Supervisor service names of hypervisorctl management
# # supervisor_units = []
# ## Windows service names
# # win_service = []
#
# ## Process filters, multiple are allowed
# ## Regular expressions to use for matching against the full command
# # patterns = ['.*']
# ## List of users owning the process (wildcards are supported)
# # users = ['*']
# ## List of executable paths of the process (wildcards are supported)
# # executables = ['*']
# ## List of process names (wildcards are supported)
# # process_names = ['*']
# ## Recursion depth for determining children of the matched processes
# ## A negative value means all children with infinite depth
# # recursion_depth = 0

View file

@ -0,0 +1,166 @@
package procstat
import (
"bytes"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
gopsprocess "github.com/shirou/gopsutil/v4/process"
"github.com/influxdata/telegraf"
)
type processFinder struct {
errPidFiles map[string]bool
log telegraf.Logger
}
func newProcessFinder(log telegraf.Logger) *processFinder {
return &processFinder{
errPidFiles: make(map[string]bool),
log: log,
}
}
func (f *processFinder) findByPidFiles(paths []string) ([]processGroup, error) {
groups := make([]processGroup, 0, len(paths))
for _, path := range paths {
buf, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("failed to read pidfile %q: %w", path, err)
}
pid, err := strconv.ParseInt(strings.TrimSpace(string(buf)), 10, 32)
if err != nil {
return nil, fmt.Errorf("failed to parse PID in file %q: %w", path, err)
}
p, err := gopsprocess.NewProcess(int32(pid))
if err != nil && !f.errPidFiles[path] {
f.log.Errorf("failed to find process for PID %d of file %q: %v", pid, path, err)
f.errPidFiles[path] = true
}
groups = append(groups, processGroup{
processes: []*gopsprocess.Process{p},
tags: map[string]string{"pidfile": path},
})
}
return groups, nil
}
func findByCgroups(cgroups []string) ([]processGroup, error) {
groups := make([]processGroup, 0, len(cgroups))
for _, cgroup := range cgroups {
path := cgroup
if !filepath.IsAbs(cgroup) {
path = filepath.Join("sys", "fs", "cgroup"+cgroup)
}
files, err := filepath.Glob(path)
if err != nil {
return nil, fmt.Errorf("failed to determine files for cgroup %q: %w", cgroup, err)
}
for _, fpath := range files {
if f, err := os.Stat(fpath); err != nil {
return nil, fmt.Errorf("accessing %q failed: %w", fpath, err)
} else if !f.IsDir() {
return nil, fmt.Errorf("%q is not a directory", fpath)
}
fn := filepath.Join(fpath, "cgroup.procs")
buf, err := os.ReadFile(fn)
if err != nil {
return nil, err
}
lines := bytes.Split(buf, []byte{'\n'})
procs := make([]*gopsprocess.Process, 0, len(lines))
for _, l := range lines {
l := strings.TrimSpace(string(l))
if len(l) == 0 {
continue
}
pid, err := strconv.ParseInt(l, 10, 32)
if err != nil {
return nil, fmt.Errorf("failed to parse PID %q in file %q", l, fpath)
}
p, err := gopsprocess.NewProcess(int32(pid))
if err != nil {
return nil, fmt.Errorf("failed to find process for PID %d of %q: %w", pid, fpath, err)
}
procs = append(procs, p)
}
groups = append(groups, processGroup{
processes: procs,
tags: map[string]string{"cgroup": cgroup, "cgroup_full": fpath}})
}
}
return groups, nil
}
func findBySupervisorUnits(units string) ([]processGroup, error) {
buf, err := execCommand("supervisorctl", "status", units, " ").Output()
if err != nil && !strings.Contains(err.Error(), "exit status 3") {
// Exit 3 means at least on process is in one of the "STOPPED" states
return nil, fmt.Errorf("failed to execute 'supervisorctl': %w", err)
}
lines := strings.Split(string(buf), "\n")
// Get the PID, running status, running time and boot time of the main process:
// pid 11779, uptime 17:41:16
// Exited too quickly (process log may have details)
groups := make([]processGroup, 0, len(lines))
for _, line := range lines {
if line == "" {
continue
}
kv := strings.Fields(line)
if len(kv) < 2 {
// Not a key-value pair
continue
}
name, status := kv[0], kv[1]
tags := map[string]string{
"supervisor_unit": name,
"status": status,
}
var procs []*gopsprocess.Process
switch status {
case "FATAL", "EXITED", "BACKOFF", "STOPPING":
tags["error"] = strings.Join(kv[2:], " ")
case "RUNNING":
tags["uptimes"] = kv[5]
rawpid := strings.ReplaceAll(kv[3], ",", "")
grouppid, err := strconv.ParseInt(rawpid, 10, 32)
if err != nil {
return nil, fmt.Errorf("failed to parse group PID %q: %w", rawpid, err)
}
p, err := gopsprocess.NewProcess(int32(grouppid))
if err != nil {
return nil, fmt.Errorf("failed to find process for PID %d of unit %q: %w", grouppid, name, err)
}
// Get all children of the supervisor unit
procs, err = p.Children()
if err != nil {
return nil, fmt.Errorf("failed to get children for PID %d of unit %q: %w", grouppid, name, err)
}
tags["parent_pid"] = rawpid
case "STOPPED", "UNKNOWN", "STARTING":
// No additional info
}
groups = append(groups, processGroup{
processes: procs,
tags: tags,
})
}
return groups, nil
}