Adding upstream version 1.34.4.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
e393c3af3f
commit
4978089aab
4963 changed files with 677545 additions and 0 deletions
300
plugins/inputs/procstat/README.md
Normal file
300
plugins/inputs/procstat/README.md
Normal file
|
@ -0,0 +1,300 @@
|
|||
# Procstat Input Plugin
|
||||
|
||||
The procstat plugin can be used to monitor the system resource usage of one or
|
||||
more processes. The procstat_lookup metric displays the query information,
|
||||
specifically the number of PIDs returned on a search
|
||||
|
||||
Processes can be selected for monitoring using one of several methods:
|
||||
|
||||
- pidfile
|
||||
- exe
|
||||
- pattern
|
||||
- user
|
||||
- systemd_unit
|
||||
- cgroup
|
||||
- supervisor_unit
|
||||
- win_service
|
||||
|
||||
## Global configuration options <!-- @/docs/includes/plugin_config.md -->
|
||||
|
||||
In addition to the plugin-specific configuration settings, plugins support
|
||||
additional global and plugin configuration settings. These settings are used to
|
||||
modify metrics, tags, and field or create aliases and configure ordering, etc.
|
||||
See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
||||
|
||||
[CONFIGURATION.md]: ../../../docs/CONFIGURATION.md#plugins
|
||||
|
||||
## Configuration
|
||||
|
||||
```toml @sample.conf
|
||||
# Monitor process cpu and memory usage
|
||||
[[inputs.procstat]]
|
||||
## PID file to monitor process
|
||||
pid_file = "/var/run/nginx.pid"
|
||||
## executable name (ie, pgrep <exe>)
|
||||
# exe = "nginx"
|
||||
## pattern as argument for pgrep (ie, pgrep -f <pattern>)
|
||||
# pattern = "nginx"
|
||||
## user as argument for pgrep (ie, pgrep -u <user>)
|
||||
# user = "nginx"
|
||||
## Systemd unit name, supports globs when include_systemd_children is set to true
|
||||
# systemd_unit = "nginx.service"
|
||||
# include_systemd_children = false
|
||||
## CGroup name or path, supports globs
|
||||
# cgroup = "systemd/system.slice/nginx.service"
|
||||
## Supervisor service names of hypervisorctl management
|
||||
# supervisor_units = ["webserver", "proxy"]
|
||||
|
||||
## Windows service name
|
||||
# win_service = ""
|
||||
|
||||
## override for process_name
|
||||
## This is optional; default is sourced from /proc/<pid>/status
|
||||
# process_name = "bar"
|
||||
|
||||
## Field name prefix
|
||||
# prefix = ""
|
||||
|
||||
## Mode to use when calculating CPU usage. Can be one of 'solaris' or 'irix'.
|
||||
# mode = "irix"
|
||||
|
||||
## Add the given information tag instead of a field
|
||||
## This allows to create unique metrics/series when collecting processes with
|
||||
## otherwise identical tags. However, please be careful as this can easily
|
||||
## result in a large number of series, especially with short-lived processes,
|
||||
## creating high cardinality at the output.
|
||||
## Available options are:
|
||||
## cmdline -- full commandline
|
||||
## pid -- ID of the process
|
||||
## ppid -- ID of the process' parent
|
||||
## status -- state of the process
|
||||
## user -- username owning the process
|
||||
## socket only options:
|
||||
## protocol -- protocol type of the process socket
|
||||
## state -- state of the process socket
|
||||
## src -- source address of the process socket (non-unix sockets)
|
||||
## src_port -- source port of the process socket (non-unix sockets)
|
||||
## dest -- destination address of the process socket (non-unix sockets)
|
||||
## dest_port -- destination port of the process socket (non-unix sockets)
|
||||
## name -- name of the process socket (unix sockets only)
|
||||
## Available for procstat_lookup:
|
||||
## level -- level of the process filtering
|
||||
# tag_with = []
|
||||
|
||||
## Properties to collect
|
||||
## Available options are
|
||||
## cpu -- CPU usage statistics
|
||||
## limits -- set resource limits
|
||||
## memory -- memory usage statistics
|
||||
## mmap -- mapped memory usage statistics (caution: can cause high load)
|
||||
## sockets -- socket statistics for protocols in 'socket_protocols'
|
||||
# properties = ["cpu", "limits", "memory", "mmap"]
|
||||
|
||||
## Protocol filter for the sockets property
|
||||
## Available options are
|
||||
## all -- all of the protocols below
|
||||
## tcp4 -- TCP socket statistics for IPv4
|
||||
## tcp6 -- TCP socket statistics for IPv6
|
||||
## udp4 -- UDP socket statistics for IPv4
|
||||
## udp6 -- UDP socket statistics for IPv6
|
||||
## unix -- Unix socket statistics
|
||||
# socket_protocols = ["all"]
|
||||
|
||||
## Method to use when finding process IDs. Can be one of 'pgrep', or
|
||||
## 'native'. The pgrep finder calls the pgrep executable in the PATH while
|
||||
## the native finder performs the search directly in a manor dependent on the
|
||||
## platform. Default is 'pgrep'
|
||||
# pid_finder = "pgrep"
|
||||
|
||||
## New-style filtering configuration (multiple filter sections are allowed)
|
||||
# [[inputs.procstat.filter]]
|
||||
# ## Name of the filter added as 'filter' tag
|
||||
# name = "shell"
|
||||
#
|
||||
# ## Service filters, only one is allowed
|
||||
# ## Systemd unit names (wildcards are supported)
|
||||
# # systemd_units = []
|
||||
# ## CGroup name or path (wildcards are supported)
|
||||
# # cgroups = []
|
||||
# ## Supervisor service names of hypervisorctl management
|
||||
# # supervisor_units = []
|
||||
# ## Windows service names
|
||||
# # win_service = []
|
||||
#
|
||||
# ## Process filters, multiple are allowed
|
||||
# ## Regular expressions to use for matching against the full command
|
||||
# # patterns = ['.*']
|
||||
# ## List of users owning the process (wildcards are supported)
|
||||
# # users = ['*']
|
||||
# ## List of executable paths of the process (wildcards are supported)
|
||||
# # executables = ['*']
|
||||
# ## List of process names (wildcards are supported)
|
||||
# # process_names = ['*']
|
||||
# ## Recursion depth for determining children of the matched processes
|
||||
# ## A negative value means all children with infinite depth
|
||||
# # recursion_depth = 0
|
||||
```
|
||||
|
||||
### Windows support
|
||||
|
||||
Preliminary support for Windows has been added, however you may prefer using
|
||||
the `win_perf_counters` input plugin as a more mature alternative.
|
||||
|
||||
### Darwin specifics
|
||||
|
||||
If you use this plugin with `supervisor_units` *and* `pattern` on Darwin, you
|
||||
**have to** use the `pgrep` finder as the underlying library relies on `pgrep`.
|
||||
|
||||
### Permissions
|
||||
|
||||
Some files or directories may require elevated permissions. As such a user may
|
||||
need to provide telegraf with higher levels of permissions to access and produce
|
||||
metrics.
|
||||
|
||||
## Metrics
|
||||
|
||||
For descriptions of these tags and fields, consider reading one of the
|
||||
following:
|
||||
|
||||
- [Linux Kernel /proc Filesystem][kernel /proc]
|
||||
- [proc manpage][manpage]
|
||||
|
||||
[kernel /proc]: https://www.kernel.org/doc/html/latest/filesystems/proc.html
|
||||
[manpage]: https://man7.org/linux/man-pages/man5/proc.5.html
|
||||
|
||||
Below are an example set of tags and fields:
|
||||
|
||||
- procstat
|
||||
- tags:
|
||||
- pid (if requested)
|
||||
- cmdline (if requested)
|
||||
- process_name
|
||||
- pidfile (when defined)
|
||||
- exe (when defined)
|
||||
- pattern (when defined)
|
||||
- user (when selected)
|
||||
- systemd_unit (when defined)
|
||||
- cgroup (when defined)
|
||||
- cgroup_full (when cgroup or systemd_unit is used with glob)
|
||||
- supervisor_unit (when defined)
|
||||
- win_service (when defined)
|
||||
- parent_pid (for child processes)
|
||||
- child_level (for child processes)
|
||||
- fields:
|
||||
- child_major_faults (int)
|
||||
- child_minor_faults (int)
|
||||
- created_at (int) [epoch in nanoseconds]
|
||||
- cpu_time (int)
|
||||
- cpu_time_iowait (float) (zero for all OSes except Linux)
|
||||
- cpu_time_system (float)
|
||||
- cpu_time_user (float)
|
||||
- cpu_usage (float)
|
||||
- disk_read_bytes (int, Linux only, *telegraf* may need to be ran as **root**)
|
||||
- disk_write_bytes (int, Linux only, *telegraf* may need to be ran as **root**)
|
||||
- involuntary_context_switches (int)
|
||||
- major_faults (int)
|
||||
- memory_anonymous (int)
|
||||
- memory_private_clean (int)
|
||||
- memory_private_dirty (int)
|
||||
- memory_pss (int)
|
||||
- memory_referenced (int)
|
||||
- memory_rss (int)
|
||||
- memory_shared_clean (int)
|
||||
- memory_shared_dirty (int)
|
||||
- memory_size (int)
|
||||
- memory_swap (int)
|
||||
- memory_usage (float)
|
||||
- memory_vms (int)
|
||||
- minor_faults (int)
|
||||
- nice_priority (int)
|
||||
- num_fds (int, *telegraf* may need to be ran as **root**)
|
||||
- num_threads (int)
|
||||
- pid (int)
|
||||
- ppid (int)
|
||||
- status (string)
|
||||
- read_bytes (int, *telegraf* may need to be ran as **root**)
|
||||
- read_count (int, *telegraf* may need to be ran as **root**)
|
||||
- realtime_priority (int)
|
||||
- rlimit_cpu_time_hard (int)
|
||||
- rlimit_cpu_time_soft (int)
|
||||
- rlimit_file_locks_hard (int)
|
||||
- rlimit_file_locks_soft (int)
|
||||
- rlimit_memory_data_hard (int)
|
||||
- rlimit_memory_data_soft (int)
|
||||
- rlimit_memory_locked_hard (int)
|
||||
- rlimit_memory_locked_soft (int)
|
||||
- rlimit_memory_rss_hard (int)
|
||||
- rlimit_memory_rss_soft (int)
|
||||
- rlimit_memory_stack_hard (int)
|
||||
- rlimit_memory_stack_soft (int)
|
||||
- rlimit_memory_vms_hard (int)
|
||||
- rlimit_memory_vms_soft (int)
|
||||
- rlimit_nice_priority_hard (int)
|
||||
- rlimit_nice_priority_soft (int)
|
||||
- rlimit_num_fds_hard (int)
|
||||
- rlimit_num_fds_soft (int)
|
||||
- rlimit_realtime_priority_hard (int)
|
||||
- rlimit_realtime_priority_soft (int)
|
||||
- rlimit_signals_pending_hard (int)
|
||||
- rlimit_signals_pending_soft (int)
|
||||
- signals_pending (int)
|
||||
- voluntary_context_switches (int)
|
||||
- write_bytes (int, *telegraf* may need to be ran as **root**)
|
||||
- write_count (int, *telegraf* may need to be ran as **root**)
|
||||
- procstat_lookup
|
||||
- tags:
|
||||
- exe
|
||||
- pid_finder
|
||||
- pid_file
|
||||
- pattern
|
||||
- prefix
|
||||
- user
|
||||
- systemd_unit
|
||||
- cgroup
|
||||
- supervisor_unit
|
||||
- win_service
|
||||
- result
|
||||
- fields:
|
||||
- pid_count (int)
|
||||
- running (int)
|
||||
- result_code (int, success = 0, lookup_error = 1)
|
||||
- procstat_socket (if configured, Linux only)
|
||||
- tags:
|
||||
- pid (if requested)
|
||||
- protocol (if requested)
|
||||
- cmdline (if requested)
|
||||
- process_name
|
||||
- pidfile (when defined)
|
||||
- exe (when defined)
|
||||
- pattern (when defined)
|
||||
- user (when selected)
|
||||
- systemd_unit (when defined)
|
||||
- cgroup (when defined)
|
||||
- cgroup_full (when cgroup or systemd_unit is used with glob)
|
||||
- supervisor_unit (when defined)
|
||||
- win_service (when defined)
|
||||
- fields:
|
||||
- protocol
|
||||
- state
|
||||
- pid
|
||||
- src
|
||||
- src_port (tcp and udp sockets only)
|
||||
- dest (tcp and udp sockets only)
|
||||
- dest_port (tcp and udp sockets only)
|
||||
- bytes_received (tcp sockets only)
|
||||
- bytes_sent (tcp sockets only)
|
||||
- lost (tcp sockets only)
|
||||
- retransmits (tcp sockets only)
|
||||
- rx_queue
|
||||
- tx_queue
|
||||
- inode (unix sockets only)
|
||||
|
||||
*NOTE: Resource limit > 2147483647 will be reported as 2147483647.*
|
||||
|
||||
## Example Output
|
||||
|
||||
```text
|
||||
procstat_lookup,host=prash-laptop,pattern=influxd,pid_finder=pgrep,result=success pid_count=1i,running=1i,result_code=0i 1582089700000000000
|
||||
procstat,host=prash-laptop,pattern=influxd,process_name=influxd,user=root involuntary_context_switches=151496i,child_minor_faults=1061i,child_major_faults=8i,cpu_time_user=2564.81,pid=32025i,major_faults=8609i,created_at=1580107536000000000i,voluntary_context_switches=1058996i,cpu_time_system=616.98,memory_swap=0i,memory_locked=0i,memory_usage=1.7797634601593018,num_threads=18i,cpu_time_iowait=0,memory_rss=148643840i,memory_vms=1435688960i,memory_data=0i,memory_stack=0i,minor_faults=1856550i 1582089700000000000
|
||||
procstat_socket,host=prash-laptop,process_name=browser,protocol=tcp4 bytes_received=826987i,bytes_sent=32869i,dest="192.168.0.2",dest_port=443i,lost=0i,pid=32025i,retransmits=0i,rx_queue=0i,src="192.168.0.1",src_port=52106i,state="established",tx_queue=0i 1582089700000000000
|
||||
```
|
234
plugins/inputs/procstat/filter.go
Normal file
234
plugins/inputs/procstat/filter.go
Normal file
|
@ -0,0 +1,234 @@
|
|||
package procstat
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
gopsprocess "github.com/shirou/gopsutil/v4/process"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
telegraf_filter "github.com/influxdata/telegraf/filter"
|
||||
)
|
||||
|
||||
type filter struct {
|
||||
Name string `toml:"name"`
|
||||
PidFiles []string `toml:"pid_files"`
|
||||
SystemdUnits []string `toml:"systemd_units"`
|
||||
SupervisorUnits []string `toml:"supervisor_units"`
|
||||
WinService []string `toml:"win_services"`
|
||||
CGroups []string `toml:"cgroups"`
|
||||
Patterns []string `toml:"patterns"`
|
||||
Users []string `toml:"users"`
|
||||
Executables []string `toml:"executables"`
|
||||
ProcessNames []string `toml:"process_names"`
|
||||
RecursionDepth int `toml:"recursion_depth"`
|
||||
Log telegraf.Logger `toml:"-"`
|
||||
|
||||
filterSupervisorUnit string
|
||||
filterCmds []*regexp.Regexp
|
||||
filterUser telegraf_filter.Filter
|
||||
filterExecutable telegraf_filter.Filter
|
||||
filterProcessName telegraf_filter.Filter
|
||||
finder *processFinder
|
||||
}
|
||||
|
||||
func (f *filter) init() error {
|
||||
if f.Name == "" {
|
||||
return errors.New("filter must be named")
|
||||
}
|
||||
|
||||
// Check for only one service selector being active
|
||||
var active []string
|
||||
if len(f.PidFiles) > 0 {
|
||||
active = append(active, "pid_files")
|
||||
}
|
||||
if len(f.CGroups) > 0 {
|
||||
active = append(active, "cgroups")
|
||||
}
|
||||
if len(f.SystemdUnits) > 0 {
|
||||
active = append(active, "systemd_units")
|
||||
}
|
||||
if len(f.SupervisorUnits) > 0 {
|
||||
active = append(active, "supervisor_units")
|
||||
}
|
||||
if len(f.WinService) > 0 {
|
||||
active = append(active, "win_services")
|
||||
}
|
||||
if len(active) > 1 {
|
||||
return fmt.Errorf("cannot select multiple services %q", strings.Join(active, ", "))
|
||||
}
|
||||
|
||||
// Prepare the filters
|
||||
f.filterCmds = make([]*regexp.Regexp, 0, len(f.Patterns))
|
||||
for _, p := range f.Patterns {
|
||||
re, err := regexp.Compile(p)
|
||||
if err != nil {
|
||||
return fmt.Errorf("compiling pattern %q of filter %q failed: %w", p, f.Name, err)
|
||||
}
|
||||
f.filterCmds = append(f.filterCmds, re)
|
||||
}
|
||||
|
||||
f.filterSupervisorUnit = strings.TrimSpace(strings.Join(f.SupervisorUnits, " "))
|
||||
|
||||
var err error
|
||||
if f.filterUser, err = telegraf_filter.Compile(f.Users); err != nil {
|
||||
return fmt.Errorf("compiling users filter for %q failed: %w", f.Name, err)
|
||||
}
|
||||
if f.filterExecutable, err = telegraf_filter.Compile(f.Executables); err != nil {
|
||||
return fmt.Errorf("compiling executables filter for %q failed: %w", f.Name, err)
|
||||
}
|
||||
if f.filterProcessName, err = telegraf_filter.Compile(f.ProcessNames); err != nil {
|
||||
return fmt.Errorf("compiling process-names filter for %q failed: %w", f.Name, err)
|
||||
}
|
||||
|
||||
// Setup the process finder
|
||||
f.finder = newProcessFinder(f.Log)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *filter) applyFilter() ([]processGroup, error) {
|
||||
// Determine processes on service level. if there is no constraint on the
|
||||
// services, use all processes for matching.
|
||||
var groups []processGroup
|
||||
switch {
|
||||
case len(f.PidFiles) > 0:
|
||||
g, err := f.finder.findByPidFiles(f.PidFiles)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
groups = append(groups, g...)
|
||||
case len(f.CGroups) > 0:
|
||||
g, err := findByCgroups(f.CGroups)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
groups = append(groups, g...)
|
||||
case len(f.SystemdUnits) > 0:
|
||||
g, err := findBySystemdUnits(f.SystemdUnits)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
groups = append(groups, g...)
|
||||
case f.filterSupervisorUnit != "":
|
||||
g, err := findBySupervisorUnits(f.filterSupervisorUnit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
groups = append(groups, g...)
|
||||
case len(f.WinService) > 0:
|
||||
g, err := findByWindowsServices(f.WinService)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
groups = append(groups, g...)
|
||||
default:
|
||||
procs, err := gopsprocess.Processes()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
groups = append(groups, processGroup{processes: procs, tags: make(map[string]string)})
|
||||
}
|
||||
|
||||
// Filter by additional properties such as users, patterns etc
|
||||
result := make([]processGroup, 0, len(groups))
|
||||
for _, g := range groups {
|
||||
var matched []*gopsprocess.Process
|
||||
for _, p := range g.processes {
|
||||
// Users
|
||||
if f.filterUser != nil {
|
||||
if username, err := p.Username(); err != nil || !f.filterUser.Match(username) {
|
||||
// Errors can happen if we don't have permissions or the process no longer exists
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Executables
|
||||
if f.filterExecutable != nil {
|
||||
if exe, err := p.Exe(); err != nil || !f.filterExecutable.Match(exe) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Process names
|
||||
if f.filterProcessName != nil {
|
||||
if name, err := p.Name(); err != nil || !f.filterProcessName.Match(name) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Patterns
|
||||
if len(f.filterCmds) > 0 {
|
||||
cmd, err := p.Cmdline()
|
||||
if err != nil {
|
||||
// This can happen if we don't have permissions or the process no longer exists
|
||||
continue
|
||||
}
|
||||
var found bool
|
||||
for _, re := range f.filterCmds {
|
||||
if re.MatchString(cmd) {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
matched = append(matched, p)
|
||||
}
|
||||
result = append(result, processGroup{processes: matched, tags: g.tags})
|
||||
}
|
||||
|
||||
// Resolve children down to the requested depth
|
||||
previous := result
|
||||
for depth := 0; depth < f.RecursionDepth || f.RecursionDepth < 0; depth++ {
|
||||
children := make([]processGroup, 0, len(previous))
|
||||
for _, group := range previous {
|
||||
for _, p := range group.processes {
|
||||
c, err := getChildren(p)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to get children of process %d: %w", p.Pid, err)
|
||||
}
|
||||
if len(c) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
tags := make(map[string]string, len(group.tags)+1)
|
||||
for k, v := range group.tags {
|
||||
tags[k] = v
|
||||
}
|
||||
tags["parent_pid"] = strconv.FormatInt(int64(p.Pid), 10)
|
||||
|
||||
children = append(children, processGroup{
|
||||
processes: c,
|
||||
tags: tags,
|
||||
level: depth + 1,
|
||||
})
|
||||
}
|
||||
}
|
||||
if len(children) == 0 {
|
||||
break
|
||||
}
|
||||
result = append(result, children...)
|
||||
previous = children
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func getChildren(p *gopsprocess.Process) ([]*gopsprocess.Process, error) {
|
||||
children, err := p.Children()
|
||||
// Check for cases that do not really mean error but rather means that there
|
||||
// is no match.
|
||||
switch {
|
||||
case err == nil,
|
||||
errors.Is(err, gopsprocess.ErrorNoChildren),
|
||||
strings.Contains(err.Error(), "exit status 1"):
|
||||
return children, nil
|
||||
}
|
||||
return nil, fmt.Errorf("unable to get children of process %d: %w", p.Pid, err)
|
||||
}
|
131
plugins/inputs/procstat/native_finder.go
Normal file
131
plugins/inputs/procstat/native_finder.go
Normal file
|
@ -0,0 +1,131 @@
|
|||
package procstat
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
gopsprocess "github.com/shirou/gopsutil/v4/process"
|
||||
)
|
||||
|
||||
// NativeFinder uses gopsutil to find processes
|
||||
type NativeFinder struct{}
|
||||
|
||||
// Uid will return all pids for the given user
|
||||
func (*NativeFinder) uid(user string) ([]pid, error) {
|
||||
var dst []pid
|
||||
procs, err := gopsprocess.Processes()
|
||||
if err != nil {
|
||||
return dst, err
|
||||
}
|
||||
for _, p := range procs {
|
||||
username, err := p.Username()
|
||||
if err != nil {
|
||||
// skip, this can be caused by the pid no longer exists, or you don't have permissions to access it
|
||||
continue
|
||||
}
|
||||
if username == user {
|
||||
dst = append(dst, pid(p.Pid))
|
||||
}
|
||||
}
|
||||
return dst, nil
|
||||
}
|
||||
|
||||
// PidFile returns the pid from the pid file given.
|
||||
func (*NativeFinder) pidFile(path string) ([]pid, error) {
|
||||
var pids []pid
|
||||
pidString, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return pids, fmt.Errorf("failed to read pidfile %q: %w", path, err)
|
||||
}
|
||||
processID, err := strconv.ParseInt(strings.TrimSpace(string(pidString)), 10, 32)
|
||||
if err != nil {
|
||||
return pids, err
|
||||
}
|
||||
pids = append(pids, pid(processID))
|
||||
return pids, nil
|
||||
}
|
||||
|
||||
// FullPattern matches on the command line when the process was executed
|
||||
func (*NativeFinder) fullPattern(pattern string) ([]pid, error) {
|
||||
var pids []pid
|
||||
regxPattern, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
return pids, err
|
||||
}
|
||||
procs, err := fastProcessList()
|
||||
if err != nil {
|
||||
return pids, err
|
||||
}
|
||||
for _, p := range procs {
|
||||
cmd, err := p.Cmdline()
|
||||
if err != nil {
|
||||
// skip, this can be caused by the pid no longer exists, or you don't have permissions to access it
|
||||
continue
|
||||
}
|
||||
if regxPattern.MatchString(cmd) {
|
||||
pids = append(pids, pid(p.Pid))
|
||||
}
|
||||
}
|
||||
return pids, err
|
||||
}
|
||||
|
||||
// Children matches children pids on the command line when the process was executed
|
||||
func (*NativeFinder) children(processID pid) ([]pid, error) {
|
||||
// Get all running processes
|
||||
p, err := gopsprocess.NewProcess(int32(processID))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("getting process %d failed: %w", processID, err)
|
||||
}
|
||||
|
||||
// Get all children of the current process
|
||||
children, err := p.Children()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to get children of process %d: %w", p.Pid, err)
|
||||
}
|
||||
pids := make([]pid, 0, len(children))
|
||||
for _, child := range children {
|
||||
pids = append(pids, pid(child.Pid))
|
||||
}
|
||||
|
||||
return pids, err
|
||||
}
|
||||
|
||||
func fastProcessList() ([]*gopsprocess.Process, error) {
|
||||
pids, err := gopsprocess.Pids()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
result := make([]*gopsprocess.Process, 0, len(pids))
|
||||
for _, pid := range pids {
|
||||
result = append(result, &gopsprocess.Process{Pid: pid})
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// Pattern matches on the process name
|
||||
func (*NativeFinder) pattern(pattern string) ([]pid, error) {
|
||||
var pids []pid
|
||||
regxPattern, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
return pids, err
|
||||
}
|
||||
procs, err := fastProcessList()
|
||||
if err != nil {
|
||||
return pids, err
|
||||
}
|
||||
for _, p := range procs {
|
||||
name, err := processName(p)
|
||||
if err != nil {
|
||||
// skip, this can be caused by the pid no longer exists, or you don't have permissions to access it
|
||||
continue
|
||||
}
|
||||
if regxPattern.MatchString(name) {
|
||||
pids = append(pids, pid(p.Pid))
|
||||
}
|
||||
}
|
||||
return pids, err
|
||||
}
|
98
plugins/inputs/procstat/native_finder_test.go
Normal file
98
plugins/inputs/procstat/native_finder_test.go
Normal file
|
@ -0,0 +1,98 @@
|
|||
package procstat
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/user"
|
||||
"runtime"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func BenchmarkPattern(b *testing.B) {
|
||||
finder := &NativeFinder{}
|
||||
for n := 0; n < b.N; n++ {
|
||||
_, err := finder.pattern(".*")
|
||||
require.NoError(b, err)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFullPattern(b *testing.B) {
|
||||
finder := &NativeFinder{}
|
||||
for n := 0; n < b.N; n++ {
|
||||
_, err := finder.fullPattern(".*")
|
||||
require.NoError(b, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestChildPattern(t *testing.T) {
|
||||
if runtime.GOOS == "windows" || runtime.GOOS == "darwin" {
|
||||
t.Skip("Skipping test on unsupported platform")
|
||||
}
|
||||
|
||||
// Get our own process name
|
||||
parentName, err := os.Executable()
|
||||
require.NoError(t, err)
|
||||
|
||||
// Spawn two child processes and get their PIDs
|
||||
expected := make([]pid, 0, 2)
|
||||
ctx, cancel := context.WithCancel(t.Context())
|
||||
defer cancel()
|
||||
|
||||
// First process
|
||||
cmd1 := exec.CommandContext(ctx, "/bin/sh")
|
||||
require.NoError(t, cmd1.Start(), "starting first command failed")
|
||||
expected = append(expected, pid(cmd1.Process.Pid))
|
||||
|
||||
// Second process
|
||||
cmd2 := exec.CommandContext(ctx, "/bin/sh")
|
||||
require.NoError(t, cmd2.Start(), "starting first command failed")
|
||||
expected = append(expected, pid(cmd2.Process.Pid))
|
||||
|
||||
// Use the plugin to find the children
|
||||
finder := &NativeFinder{}
|
||||
parent, err := finder.pattern(parentName)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, parent, 1)
|
||||
children, err := finder.children(parent[0])
|
||||
require.NoError(t, err)
|
||||
require.ElementsMatch(t, expected, children)
|
||||
}
|
||||
|
||||
func TestGather_RealPatternIntegration(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("Skipping integration test in short mode")
|
||||
}
|
||||
pg := &NativeFinder{}
|
||||
pids, err := pg.pattern(`procstat`)
|
||||
require.NoError(t, err)
|
||||
require.NotEmpty(t, pids)
|
||||
}
|
||||
|
||||
func TestGather_RealFullPatternIntegration(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("Skipping integration test in short mode")
|
||||
}
|
||||
if runtime.GOOS != "windows" {
|
||||
t.Skip("Skipping integration test on Non-Windows OS")
|
||||
}
|
||||
pg := &NativeFinder{}
|
||||
pids, err := pg.fullPattern(`%procstat%`)
|
||||
require.NoError(t, err)
|
||||
require.NotEmpty(t, pids)
|
||||
}
|
||||
|
||||
func TestGather_RealUserIntegration(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("Skipping integration test in short mode")
|
||||
}
|
||||
currentUser, err := user.Current()
|
||||
require.NoError(t, err)
|
||||
|
||||
pg := &NativeFinder{}
|
||||
pids, err := pg.uid(currentUser.Username)
|
||||
require.NoError(t, err)
|
||||
require.NotEmpty(t, pids)
|
||||
}
|
381
plugins/inputs/procstat/os_linux.go
Normal file
381
plugins/inputs/procstat/os_linux.go
Normal file
|
@ -0,0 +1,381 @@
|
|||
//go:build linux
|
||||
|
||||
package procstat
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/coreos/go-systemd/v22/dbus"
|
||||
"github.com/prometheus/procfs"
|
||||
gopsnet "github.com/shirou/gopsutil/v4/net"
|
||||
gopsprocess "github.com/shirou/gopsutil/v4/process"
|
||||
"github.com/vishvananda/netlink"
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/influxdata/telegraf/internal"
|
||||
)
|
||||
|
||||
func processName(p *gopsprocess.Process) (string, error) {
|
||||
return p.Exe()
|
||||
}
|
||||
|
||||
func queryPidWithWinServiceName(_ string) (uint32, error) {
|
||||
return 0, errors.New("os not supporting win_service option")
|
||||
}
|
||||
|
||||
func collectMemmap(proc process, prefix string, fields map[string]any) {
|
||||
memMapStats, err := proc.MemoryMaps(true)
|
||||
if err == nil && len(*memMapStats) == 1 {
|
||||
memMap := (*memMapStats)[0]
|
||||
fields[prefix+"memory_size"] = memMap.Size
|
||||
fields[prefix+"memory_pss"] = memMap.Pss
|
||||
fields[prefix+"memory_shared_clean"] = memMap.SharedClean
|
||||
fields[prefix+"memory_shared_dirty"] = memMap.SharedDirty
|
||||
fields[prefix+"memory_private_clean"] = memMap.PrivateClean
|
||||
fields[prefix+"memory_private_dirty"] = memMap.PrivateDirty
|
||||
fields[prefix+"memory_referenced"] = memMap.Referenced
|
||||
fields[prefix+"memory_anonymous"] = memMap.Anonymous
|
||||
fields[prefix+"memory_swap"] = memMap.Swap
|
||||
}
|
||||
}
|
||||
|
||||
func findBySystemdUnits(units []string) ([]processGroup, error) {
|
||||
ctx := context.Background()
|
||||
conn, err := dbus.NewSystemConnectionContext(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to connect to systemd: %w", err)
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
sdunits, err := conn.ListUnitsByPatternsContext(ctx, []string{"enabled", "disabled", "static"}, units)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to list units: %w", err)
|
||||
}
|
||||
|
||||
groups := make([]processGroup, 0, len(sdunits))
|
||||
for _, u := range sdunits {
|
||||
prop, err := conn.GetUnitTypePropertyContext(ctx, u.Name, "Service", "MainPID")
|
||||
if err != nil {
|
||||
// This unit might not be a service or similar
|
||||
continue
|
||||
}
|
||||
raw := prop.Value.Value()
|
||||
pid, ok := raw.(uint32)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("failed to parse PID %v of unit %q: invalid type %T", raw, u, raw)
|
||||
}
|
||||
p, err := gopsprocess.NewProcess(int32(pid))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to find process for PID %d of unit %q: %w", pid, u, err)
|
||||
}
|
||||
groups = append(groups, processGroup{
|
||||
processes: []*gopsprocess.Process{p},
|
||||
tags: map[string]string{"systemd_unit": u.Name},
|
||||
})
|
||||
}
|
||||
|
||||
return groups, nil
|
||||
}
|
||||
|
||||
func findByWindowsServices(_ []string) ([]processGroup, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func collectTotalReadWrite(proc process) (r, w uint64, err error) {
|
||||
path := internal.GetProcPath()
|
||||
fs, err := procfs.NewFS(path)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
p, err := fs.Proc(int(proc.pid()))
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
stat, err := p.IO()
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
return stat.RChar, stat.WChar, nil
|
||||
}
|
||||
|
||||
/* Socket statistics functions */
|
||||
func socketStateName(s uint8) string {
|
||||
switch s {
|
||||
case unix.BPF_TCP_ESTABLISHED:
|
||||
return "established"
|
||||
case unix.BPF_TCP_SYN_SENT:
|
||||
return "syn-sent"
|
||||
case unix.BPF_TCP_SYN_RECV:
|
||||
return "syn-recv"
|
||||
case unix.BPF_TCP_FIN_WAIT1:
|
||||
return "fin-wait1"
|
||||
case unix.BPF_TCP_FIN_WAIT2:
|
||||
return "fin-wait2"
|
||||
case unix.BPF_TCP_TIME_WAIT:
|
||||
return "time-wait"
|
||||
case unix.BPF_TCP_CLOSE:
|
||||
return "closed"
|
||||
case unix.BPF_TCP_CLOSE_WAIT:
|
||||
return "close-wait"
|
||||
case unix.BPF_TCP_LAST_ACK:
|
||||
return "last-ack"
|
||||
case unix.BPF_TCP_LISTEN:
|
||||
return "listen"
|
||||
case unix.BPF_TCP_CLOSING:
|
||||
return "closing"
|
||||
case unix.BPF_TCP_NEW_SYN_RECV:
|
||||
return "sync-recv"
|
||||
}
|
||||
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
func socketTypeName(t uint8) string {
|
||||
switch t {
|
||||
case syscall.SOCK_STREAM:
|
||||
return "stream"
|
||||
case syscall.SOCK_DGRAM:
|
||||
return "dgram"
|
||||
case syscall.SOCK_RAW:
|
||||
return "raw"
|
||||
case syscall.SOCK_RDM:
|
||||
return "rdm"
|
||||
case syscall.SOCK_SEQPACKET:
|
||||
return "seqpacket"
|
||||
case syscall.SOCK_DCCP:
|
||||
return "dccp"
|
||||
case syscall.SOCK_PACKET:
|
||||
return "packet"
|
||||
}
|
||||
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
func mapFdToInode(pid int32, fd uint32) (uint32, error) {
|
||||
root := internal.GetProcPath()
|
||||
fn := fmt.Sprintf("%s/%d/fd/%d", root, pid, fd)
|
||||
link, err := os.Readlink(fn)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("reading link failed: %w", err)
|
||||
}
|
||||
target := strings.TrimPrefix(link, "socket:[")
|
||||
target = strings.TrimSuffix(target, "]")
|
||||
inode, err := strconv.ParseUint(target, 10, 32)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("parsing link %q: %w", link, err)
|
||||
}
|
||||
|
||||
return uint32(inode), nil
|
||||
}
|
||||
|
||||
func statsTCP(conns []gopsnet.ConnectionStat, family uint8) ([]map[string]interface{}, error) {
|
||||
if len(conns) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// For TCP we need the inode for each connection to relate the connection
|
||||
// statistics to the actual process socket. Therefore, map the
|
||||
// file-descriptors to inodes using the /proc/<pid>/fd entries.
|
||||
inodes := make(map[uint32]gopsnet.ConnectionStat, len(conns))
|
||||
for _, c := range conns {
|
||||
inode, err := mapFdToInode(c.Pid, c.Fd)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("mapping fd %d of pid %d failed: %w", c.Fd, c.Pid, err)
|
||||
}
|
||||
inodes[inode] = c
|
||||
}
|
||||
|
||||
// Get the TCP socket statistics from the netlink socket.
|
||||
responses, err := netlink.SocketDiagTCPInfo(family)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("connecting to diag socket failed: %w", err)
|
||||
}
|
||||
|
||||
// Filter the responses via the inodes belonging to the process
|
||||
fieldslist := make([]map[string]interface{}, 0)
|
||||
for _, r := range responses {
|
||||
c, found := inodes[r.InetDiagMsg.INode]
|
||||
if !found {
|
||||
// The inode does not belong to the process.
|
||||
continue
|
||||
}
|
||||
|
||||
var proto string
|
||||
switch r.InetDiagMsg.Family {
|
||||
case syscall.AF_INET:
|
||||
proto = "tcp4"
|
||||
case syscall.AF_INET6:
|
||||
proto = "tcp6"
|
||||
default:
|
||||
continue
|
||||
}
|
||||
|
||||
fields := map[string]interface{}{
|
||||
"protocol": proto,
|
||||
"state": socketStateName(r.InetDiagMsg.State),
|
||||
"pid": c.Pid,
|
||||
"src": r.InetDiagMsg.ID.Source.String(),
|
||||
"src_port": r.InetDiagMsg.ID.SourcePort,
|
||||
"dest": r.InetDiagMsg.ID.Destination.String(),
|
||||
"dest_port": r.InetDiagMsg.ID.DestinationPort,
|
||||
"bytes_received": r.TCPInfo.Bytes_received,
|
||||
"bytes_sent": r.TCPInfo.Bytes_sent,
|
||||
"lost": r.TCPInfo.Lost,
|
||||
"retransmits": r.TCPInfo.Retransmits,
|
||||
"rx_queue": r.InetDiagMsg.RQueue,
|
||||
"tx_queue": r.InetDiagMsg.WQueue,
|
||||
}
|
||||
fieldslist = append(fieldslist, fields)
|
||||
}
|
||||
|
||||
return fieldslist, nil
|
||||
}
|
||||
|
||||
func statsUDP(conns []gopsnet.ConnectionStat, family uint8) ([]map[string]interface{}, error) {
|
||||
if len(conns) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// For UDP we need the inode for each connection to relate the connection
|
||||
// statistics to the actual process socket. Therefore, map the
|
||||
// file-descriptors to inodes using the /proc/<pid>/fd entries.
|
||||
inodes := make(map[uint32]gopsnet.ConnectionStat, len(conns))
|
||||
for _, c := range conns {
|
||||
inode, err := mapFdToInode(c.Pid, c.Fd)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("mapping fd %d of pid %d failed: %w", c.Fd, c.Pid, err)
|
||||
}
|
||||
inodes[inode] = c
|
||||
}
|
||||
|
||||
// Get the UDP socket statistics from the netlink socket.
|
||||
responses, err := netlink.SocketDiagUDPInfo(family)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("connecting to diag socket failed: %w", err)
|
||||
}
|
||||
|
||||
// Filter the responses via the inodes belonging to the process
|
||||
fieldslist := make([]map[string]interface{}, 0)
|
||||
for _, r := range responses {
|
||||
c, found := inodes[r.InetDiagMsg.INode]
|
||||
if !found {
|
||||
// The inode does not belong to the process.
|
||||
continue
|
||||
}
|
||||
|
||||
var proto string
|
||||
switch r.InetDiagMsg.Family {
|
||||
case syscall.AF_INET:
|
||||
proto = "udp4"
|
||||
case syscall.AF_INET6:
|
||||
proto = "udp6"
|
||||
default:
|
||||
continue
|
||||
}
|
||||
|
||||
fields := map[string]interface{}{
|
||||
"protocol": proto,
|
||||
"state": socketStateName(r.InetDiagMsg.State),
|
||||
"pid": c.Pid,
|
||||
"src": r.InetDiagMsg.ID.Source.String(),
|
||||
"src_port": r.InetDiagMsg.ID.SourcePort,
|
||||
"dest": r.InetDiagMsg.ID.Destination.String(),
|
||||
"dest_port": r.InetDiagMsg.ID.DestinationPort,
|
||||
"rx_queue": r.InetDiagMsg.RQueue,
|
||||
"tx_queue": r.InetDiagMsg.WQueue,
|
||||
}
|
||||
fieldslist = append(fieldslist, fields)
|
||||
}
|
||||
|
||||
return fieldslist, nil
|
||||
}
|
||||
|
||||
func statsUnix(conns []gopsnet.ConnectionStat) ([]map[string]interface{}, error) {
|
||||
if len(conns) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// We need to read the inode for each connection to relate the connection
|
||||
// statistics to the actual process socket. Therefore, map the
|
||||
// file-descriptors to inodes using the /proc/<pid>/fd entries.
|
||||
inodes := make(map[uint32]gopsnet.ConnectionStat, len(conns))
|
||||
for _, c := range conns {
|
||||
inode, err := mapFdToInode(c.Pid, c.Fd)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("mapping fd %d of pid %d failed: %w", c.Fd, c.Pid, err)
|
||||
}
|
||||
inodes[inode] = c
|
||||
}
|
||||
|
||||
// Get the UDP socket statistics from the netlink socket.
|
||||
responses, err := netlink.UnixSocketDiagInfo()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("connecting to diag socket failed: %w", err)
|
||||
}
|
||||
|
||||
// Filter the responses via the inodes belonging to the process
|
||||
fieldslist := make([]map[string]interface{}, 0)
|
||||
for _, r := range responses {
|
||||
// Check if the inode belongs to the process and skip otherwise
|
||||
c, found := inodes[r.DiagMsg.INode]
|
||||
if !found {
|
||||
continue
|
||||
}
|
||||
|
||||
name := c.Laddr.IP
|
||||
if name == "" {
|
||||
name = fmt.Sprintf("inode-%d", r.DiagMsg.INode)
|
||||
}
|
||||
|
||||
fields := map[string]interface{}{
|
||||
"protocol": "unix",
|
||||
"type": "stream",
|
||||
"state": socketStateName(r.DiagMsg.State),
|
||||
"pid": c.Pid,
|
||||
"name": name,
|
||||
"rx_queue": r.Queue.RQueue,
|
||||
"tx_queue": r.Queue.WQueue,
|
||||
"inode": r.DiagMsg.INode,
|
||||
}
|
||||
if r.Peer != nil {
|
||||
fields["peer"] = *r.Peer
|
||||
}
|
||||
fieldslist = append(fieldslist, fields)
|
||||
}
|
||||
|
||||
// Diagnosis only works for stream sockets, so add all non-stream sockets
|
||||
// of the process without further data
|
||||
for inode, c := range inodes {
|
||||
if c.Type == syscall.SOCK_STREAM {
|
||||
continue
|
||||
}
|
||||
|
||||
name := c.Laddr.IP
|
||||
if name == "" {
|
||||
name = fmt.Sprintf("inode-%d", inode)
|
||||
}
|
||||
|
||||
fields := map[string]interface{}{
|
||||
"protocol": "unix",
|
||||
"type": socketTypeName(uint8(c.Type)),
|
||||
"state": "close",
|
||||
"pid": c.Pid,
|
||||
"name": name,
|
||||
"rx_queue": uint32(0),
|
||||
"tx_queue": uint32(0),
|
||||
"inode": inode,
|
||||
}
|
||||
fieldslist = append(fieldslist, fields)
|
||||
}
|
||||
|
||||
return fieldslist, nil
|
||||
}
|
103
plugins/inputs/procstat/os_others.go
Normal file
103
plugins/inputs/procstat/os_others.go
Normal file
|
@ -0,0 +1,103 @@
|
|||
//go:build !linux && !windows
|
||||
|
||||
package procstat
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"syscall"
|
||||
|
||||
gopsnet "github.com/shirou/gopsutil/v4/net"
|
||||
gopsprocess "github.com/shirou/gopsutil/v4/process"
|
||||
)
|
||||
|
||||
func processName(p *gopsprocess.Process) (string, error) {
|
||||
return p.Exe()
|
||||
}
|
||||
|
||||
func queryPidWithWinServiceName(string) (uint32, error) {
|
||||
return 0, errors.New("os not supporting win_service option")
|
||||
}
|
||||
|
||||
func collectMemmap(process, string, map[string]any) {}
|
||||
|
||||
func findBySystemdUnits([]string) ([]processGroup, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func findByWindowsServices([]string) ([]processGroup, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func collectTotalReadWrite(process) (r, w uint64, err error) {
|
||||
return 0, 0, errors.ErrUnsupported
|
||||
}
|
||||
|
||||
func statsTCP(conns []gopsnet.ConnectionStat, _ uint8) ([]map[string]interface{}, error) {
|
||||
if len(conns) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Filter the responses via the inodes belonging to the process
|
||||
fieldslist := make([]map[string]interface{}, 0, len(conns))
|
||||
for _, c := range conns {
|
||||
var proto string
|
||||
switch c.Family {
|
||||
case syscall.AF_INET:
|
||||
proto = "tcp4"
|
||||
case syscall.AF_INET6:
|
||||
proto = "tcp6"
|
||||
default:
|
||||
continue
|
||||
}
|
||||
|
||||
fields := map[string]interface{}{
|
||||
"protocol": proto,
|
||||
"state": c.Status,
|
||||
"pid": c.Pid,
|
||||
"src": c.Laddr.IP,
|
||||
"src_port": c.Laddr.Port,
|
||||
"dest": c.Raddr.IP,
|
||||
"dest_port": c.Raddr.Port,
|
||||
}
|
||||
fieldslist = append(fieldslist, fields)
|
||||
}
|
||||
|
||||
return fieldslist, nil
|
||||
}
|
||||
|
||||
func statsUDP(conns []gopsnet.ConnectionStat, _ uint8) ([]map[string]interface{}, error) {
|
||||
if len(conns) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Filter the responses via the inodes belonging to the process
|
||||
fieldslist := make([]map[string]interface{}, 0, len(conns))
|
||||
for _, c := range conns {
|
||||
var proto string
|
||||
switch c.Family {
|
||||
case syscall.AF_INET:
|
||||
proto = "udp4"
|
||||
case syscall.AF_INET6:
|
||||
proto = "udp6"
|
||||
default:
|
||||
continue
|
||||
}
|
||||
|
||||
fields := map[string]interface{}{
|
||||
"protocol": proto,
|
||||
"state": c.Status,
|
||||
"pid": c.Pid,
|
||||
"src": c.Laddr.IP,
|
||||
"src_port": c.Laddr.Port,
|
||||
"dest": c.Raddr.IP,
|
||||
"dest_port": c.Raddr.Port,
|
||||
}
|
||||
fieldslist = append(fieldslist, fields)
|
||||
}
|
||||
|
||||
return fieldslist, nil
|
||||
}
|
||||
|
||||
func statsUnix([]gopsnet.ConnectionStat) ([]map[string]interface{}, error) {
|
||||
return nil, errors.ErrUnsupported
|
||||
}
|
160
plugins/inputs/procstat/os_windows.go
Normal file
160
plugins/inputs/procstat/os_windows.go
Normal file
|
@ -0,0 +1,160 @@
|
|||
//go:build windows
|
||||
|
||||
package procstat
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
|
||||
gopsnet "github.com/shirou/gopsutil/v4/net"
|
||||
gopsprocess "github.com/shirou/gopsutil/v4/process"
|
||||
"golang.org/x/sys/windows"
|
||||
"golang.org/x/sys/windows/svc/mgr"
|
||||
)
|
||||
|
||||
func processName(p *gopsprocess.Process) (string, error) {
|
||||
return p.Name()
|
||||
}
|
||||
|
||||
func getService(name string) (*mgr.Service, error) {
|
||||
m, err := mgr.Connect()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer m.Disconnect()
|
||||
|
||||
srv, err := m.OpenService(name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return srv, nil
|
||||
}
|
||||
|
||||
func queryPidWithWinServiceName(winServiceName string) (uint32, error) {
|
||||
srv, err := getService(winServiceName)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
var p *windows.SERVICE_STATUS_PROCESS
|
||||
var bytesNeeded uint32
|
||||
var buf []byte
|
||||
|
||||
err = windows.QueryServiceStatusEx(srv.Handle, windows.SC_STATUS_PROCESS_INFO, nil, 0, &bytesNeeded)
|
||||
if !errors.Is(err, windows.ERROR_INSUFFICIENT_BUFFER) {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
buf = make([]byte, bytesNeeded)
|
||||
p = (*windows.SERVICE_STATUS_PROCESS)(unsafe.Pointer(&buf[0])) //nolint:gosec // G103: Valid use of unsafe call to create SERVICE_STATUS_PROCESS
|
||||
if err := windows.QueryServiceStatusEx(srv.Handle, windows.SC_STATUS_PROCESS_INFO, &buf[0], uint32(len(buf)), &bytesNeeded); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return p.ProcessId, nil
|
||||
}
|
||||
|
||||
func collectMemmap(process, string, map[string]any) {}
|
||||
|
||||
func findBySystemdUnits([]string) ([]processGroup, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func findByWindowsServices(services []string) ([]processGroup, error) {
|
||||
groups := make([]processGroup, 0, len(services))
|
||||
for _, service := range services {
|
||||
pid, err := queryPidWithWinServiceName(service)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to query PID of service %q: %w", service, err)
|
||||
}
|
||||
|
||||
p, err := gopsprocess.NewProcess(int32(pid))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to find process for PID %d of service %q: %w", pid, service, err)
|
||||
}
|
||||
|
||||
groups = append(groups, processGroup{
|
||||
processes: []*gopsprocess.Process{p},
|
||||
tags: map[string]string{"win_service": service},
|
||||
})
|
||||
}
|
||||
|
||||
return groups, nil
|
||||
}
|
||||
|
||||
func collectTotalReadWrite(process) (r, w uint64, err error) {
|
||||
return 0, 0, errors.ErrUnsupported
|
||||
}
|
||||
|
||||
func statsTCP(conns []gopsnet.ConnectionStat, _ uint8) ([]map[string]interface{}, error) {
|
||||
if len(conns) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Filter the responses via the inodes belonging to the process
|
||||
fieldslist := make([]map[string]interface{}, 0, len(conns))
|
||||
for _, c := range conns {
|
||||
var proto string
|
||||
switch c.Family {
|
||||
case syscall.AF_INET:
|
||||
proto = "tcp4"
|
||||
case syscall.AF_INET6:
|
||||
proto = "tcp6"
|
||||
default:
|
||||
continue
|
||||
}
|
||||
|
||||
fields := map[string]interface{}{
|
||||
"protocol": proto,
|
||||
"state": c.Status,
|
||||
"pid": c.Pid,
|
||||
"src": c.Laddr.IP,
|
||||
"src_port": c.Laddr.Port,
|
||||
"dest": c.Raddr.IP,
|
||||
"dest_port": c.Raddr.Port,
|
||||
}
|
||||
fieldslist = append(fieldslist, fields)
|
||||
}
|
||||
|
||||
return fieldslist, nil
|
||||
}
|
||||
|
||||
func statsUDP(conns []gopsnet.ConnectionStat, _ uint8) ([]map[string]interface{}, error) {
|
||||
if len(conns) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Filter the responses via the inodes belonging to the process
|
||||
fieldslist := make([]map[string]interface{}, 0, len(conns))
|
||||
for _, c := range conns {
|
||||
var proto string
|
||||
switch c.Family {
|
||||
case syscall.AF_INET:
|
||||
proto = "udp4"
|
||||
case syscall.AF_INET6:
|
||||
proto = "udp6"
|
||||
default:
|
||||
continue
|
||||
}
|
||||
|
||||
fields := map[string]interface{}{
|
||||
"protocol": proto,
|
||||
"state": c.Status,
|
||||
"pid": c.Pid,
|
||||
"src": c.Laddr.IP,
|
||||
"src_port": c.Laddr.Port,
|
||||
"dest": c.Raddr.IP,
|
||||
"dest_port": c.Raddr.Port,
|
||||
}
|
||||
fieldslist = append(fieldslist, fields)
|
||||
}
|
||||
|
||||
return fieldslist, nil
|
||||
}
|
||||
|
||||
func statsUnix([]gopsnet.ConnectionStat) ([]map[string]interface{}, error) {
|
||||
return nil, nil
|
||||
}
|
85
plugins/inputs/procstat/pgrep.go
Normal file
85
plugins/inputs/procstat/pgrep.go
Normal file
|
@ -0,0 +1,85 @@
|
|||
package procstat
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/influxdata/telegraf/internal"
|
||||
)
|
||||
|
||||
// Implementation of PIDGatherer that execs pgrep to find processes
|
||||
type pgrep struct {
|
||||
path string
|
||||
}
|
||||
|
||||
func newPgrepFinder() (pidFinder, error) {
|
||||
path, err := exec.LookPath("pgrep")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not find pgrep binary: %w", err)
|
||||
}
|
||||
return &pgrep{path}, nil
|
||||
}
|
||||
|
||||
func (*pgrep) pidFile(path string) ([]pid, error) {
|
||||
var pids []pid
|
||||
pidString, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return pids, fmt.Errorf("failed to read pidfile %q: %w",
|
||||
path, err)
|
||||
}
|
||||
processID, err := strconv.ParseInt(strings.TrimSpace(string(pidString)), 10, 32)
|
||||
if err != nil {
|
||||
return pids, err
|
||||
}
|
||||
pids = append(pids, pid(processID))
|
||||
return pids, nil
|
||||
}
|
||||
|
||||
func (pg *pgrep) pattern(pattern string) ([]pid, error) {
|
||||
args := []string{pattern}
|
||||
return pg.find(args)
|
||||
}
|
||||
|
||||
func (pg *pgrep) uid(user string) ([]pid, error) {
|
||||
args := []string{"-u", user}
|
||||
return pg.find(args)
|
||||
}
|
||||
|
||||
func (pg *pgrep) fullPattern(pattern string) ([]pid, error) {
|
||||
args := []string{"-f", pattern}
|
||||
return pg.find(args)
|
||||
}
|
||||
|
||||
func (pg *pgrep) children(pid pid) ([]pid, error) {
|
||||
args := []string{"-P", strconv.FormatInt(int64(pid), 10)}
|
||||
return pg.find(args)
|
||||
}
|
||||
|
||||
func (pg *pgrep) find(args []string) ([]pid, error) {
|
||||
// Execute pgrep with the given arguments
|
||||
buf, err := exec.Command(pg.path, args...).Output()
|
||||
if err != nil {
|
||||
// Exit code 1 means "no processes found" so we should not return
|
||||
// an error in this case.
|
||||
if status, _ := internal.ExitStatus(err); status == 1 {
|
||||
return nil, nil
|
||||
}
|
||||
return nil, fmt.Errorf("error running %q: %w", pg.path, err)
|
||||
}
|
||||
out := string(buf)
|
||||
|
||||
// Parse the command output to extract the PIDs
|
||||
fields := strings.Fields(out)
|
||||
pids := make([]pid, 0, len(fields))
|
||||
for _, field := range fields {
|
||||
processID, err := strconv.ParseInt(field, 10, 32)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pids = append(pids, pid(processID))
|
||||
}
|
||||
return pids, nil
|
||||
}
|
380
plugins/inputs/procstat/process.go
Normal file
380
plugins/inputs/procstat/process.go
Normal file
|
@ -0,0 +1,380 @@
|
|||
package procstat
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
gopsnet "github.com/shirou/gopsutil/v4/net"
|
||||
gopsprocess "github.com/shirou/gopsutil/v4/process"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/metric"
|
||||
)
|
||||
|
||||
type process interface {
|
||||
Name() (string, error)
|
||||
MemoryMaps(bool) (*[]gopsprocess.MemoryMapsStat, error)
|
||||
pid() pid
|
||||
setTag(string, string)
|
||||
metrics(string, *collectionConfig, time.Time) ([]telegraf.Metric, error)
|
||||
}
|
||||
|
||||
type pidFinder interface {
|
||||
pidFile(path string) ([]pid, error)
|
||||
pattern(pattern string) ([]pid, error)
|
||||
uid(user string) ([]pid, error)
|
||||
fullPattern(path string) ([]pid, error)
|
||||
children(pid pid) ([]pid, error)
|
||||
}
|
||||
|
||||
type proc struct {
|
||||
hasCPUTimes bool
|
||||
tags map[string]string
|
||||
*gopsprocess.Process
|
||||
}
|
||||
|
||||
func newProc(pid pid) (process, error) {
|
||||
p, err := gopsprocess.NewProcess(int32(pid))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
proc := &proc{
|
||||
Process: p,
|
||||
hasCPUTimes: false,
|
||||
tags: make(map[string]string),
|
||||
}
|
||||
return proc, nil
|
||||
}
|
||||
|
||||
func (p *proc) pid() pid {
|
||||
return pid(p.Process.Pid)
|
||||
}
|
||||
|
||||
func (p *proc) setTag(k, v string) {
|
||||
p.tags[k] = v
|
||||
}
|
||||
|
||||
func (p *proc) percent(_ time.Duration) (float64, error) {
|
||||
cpuPerc, err := p.Process.Percent(time.Duration(0))
|
||||
if !p.hasCPUTimes && err == nil {
|
||||
p.hasCPUTimes = true
|
||||
return 0, errors.New("must call Percent twice to compute percent cpu")
|
||||
}
|
||||
return cpuPerc, err
|
||||
}
|
||||
|
||||
// Add metrics a single process
|
||||
func (p *proc) metrics(prefix string, cfg *collectionConfig, t time.Time) ([]telegraf.Metric, error) {
|
||||
if prefix != "" {
|
||||
prefix += "_"
|
||||
}
|
||||
|
||||
fields := make(map[string]interface{})
|
||||
numThreads, err := p.NumThreads()
|
||||
if err == nil {
|
||||
fields[prefix+"num_threads"] = numThreads
|
||||
}
|
||||
|
||||
fds, err := p.NumFDs()
|
||||
if err == nil {
|
||||
fields[prefix+"num_fds"] = fds
|
||||
}
|
||||
|
||||
ctx, err := p.NumCtxSwitches()
|
||||
if err == nil {
|
||||
fields[prefix+"voluntary_context_switches"] = ctx.Voluntary
|
||||
fields[prefix+"involuntary_context_switches"] = ctx.Involuntary
|
||||
}
|
||||
|
||||
faults, err := p.PageFaults()
|
||||
if err == nil {
|
||||
fields[prefix+"minor_faults"] = faults.MinorFaults
|
||||
fields[prefix+"major_faults"] = faults.MajorFaults
|
||||
fields[prefix+"child_minor_faults"] = faults.ChildMinorFaults
|
||||
fields[prefix+"child_major_faults"] = faults.ChildMajorFaults
|
||||
}
|
||||
|
||||
io, err := p.IOCounters()
|
||||
if err == nil {
|
||||
fields[prefix+"read_count"] = io.ReadCount
|
||||
fields[prefix+"write_count"] = io.WriteCount
|
||||
fields[prefix+"read_bytes"] = io.ReadBytes
|
||||
fields[prefix+"write_bytes"] = io.WriteBytes
|
||||
}
|
||||
|
||||
// Linux fixup for gopsutils exposing the disk-only-IO instead of the total
|
||||
// I/O as for example on Windows
|
||||
if rc, wc, err := collectTotalReadWrite(p); err == nil {
|
||||
fields[prefix+"read_bytes"] = rc
|
||||
fields[prefix+"write_bytes"] = wc
|
||||
fields[prefix+"disk_read_bytes"] = io.ReadBytes
|
||||
fields[prefix+"disk_write_bytes"] = io.WriteBytes
|
||||
}
|
||||
|
||||
createdAt, err := p.CreateTime() // returns epoch in ms
|
||||
if err == nil {
|
||||
fields[prefix+"created_at"] = createdAt * 1000000 // ms to ns
|
||||
}
|
||||
|
||||
if cfg.features["cpu"] {
|
||||
cpuTime, err := p.Times()
|
||||
if err == nil {
|
||||
fields[prefix+"cpu_time_user"] = cpuTime.User
|
||||
fields[prefix+"cpu_time_system"] = cpuTime.System
|
||||
fields[prefix+"cpu_time_iowait"] = cpuTime.Iowait // only reported on Linux
|
||||
}
|
||||
|
||||
cpuPerc, err := p.percent(time.Duration(0))
|
||||
if err == nil {
|
||||
if cfg.solarisMode {
|
||||
fields[prefix+"cpu_usage"] = cpuPerc / float64(runtime.NumCPU())
|
||||
} else {
|
||||
fields[prefix+"cpu_usage"] = cpuPerc
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This only returns values for RSS and VMS
|
||||
if cfg.features["memory"] {
|
||||
mem, err := p.MemoryInfo()
|
||||
if err == nil {
|
||||
fields[prefix+"memory_rss"] = mem.RSS
|
||||
fields[prefix+"memory_vms"] = mem.VMS
|
||||
}
|
||||
|
||||
memPerc, err := p.MemoryPercent()
|
||||
if err == nil {
|
||||
fields[prefix+"memory_usage"] = memPerc
|
||||
}
|
||||
}
|
||||
|
||||
if cfg.features["mmap"] {
|
||||
collectMemmap(p, prefix, fields)
|
||||
}
|
||||
|
||||
if cfg.features["limits"] {
|
||||
rlims, err := p.RlimitUsage(true)
|
||||
if err == nil {
|
||||
for _, rlim := range rlims {
|
||||
var name string
|
||||
switch rlim.Resource {
|
||||
case gopsprocess.RLIMIT_CPU:
|
||||
name = "cpu_time"
|
||||
case gopsprocess.RLIMIT_DATA:
|
||||
name = "memory_data"
|
||||
case gopsprocess.RLIMIT_STACK:
|
||||
name = "memory_stack"
|
||||
case gopsprocess.RLIMIT_RSS:
|
||||
name = "memory_rss"
|
||||
case gopsprocess.RLIMIT_NOFILE:
|
||||
name = "num_fds"
|
||||
case gopsprocess.RLIMIT_MEMLOCK:
|
||||
name = "memory_locked"
|
||||
case gopsprocess.RLIMIT_AS:
|
||||
name = "memory_vms"
|
||||
case gopsprocess.RLIMIT_LOCKS:
|
||||
name = "file_locks"
|
||||
case gopsprocess.RLIMIT_SIGPENDING:
|
||||
name = "signals_pending"
|
||||
case gopsprocess.RLIMIT_NICE:
|
||||
name = "nice_priority"
|
||||
case gopsprocess.RLIMIT_RTPRIO:
|
||||
name = "realtime_priority"
|
||||
default:
|
||||
continue
|
||||
}
|
||||
|
||||
fields[prefix+"rlimit_"+name+"_soft"] = rlim.Soft
|
||||
fields[prefix+"rlimit_"+name+"_hard"] = rlim.Hard
|
||||
if name != "file_locks" { // gopsutil doesn't currently track the used file locks count
|
||||
fields[prefix+name] = rlim.Used
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add the tags as requested by the user
|
||||
cmdline, err := p.Cmdline()
|
||||
if err == nil {
|
||||
if cfg.tagging["cmdline"] {
|
||||
p.tags["cmdline"] = cmdline
|
||||
} else {
|
||||
fields[prefix+"cmdline"] = cmdline
|
||||
}
|
||||
}
|
||||
|
||||
if cfg.tagging["pid"] {
|
||||
p.tags["pid"] = strconv.Itoa(int(p.Pid))
|
||||
} else {
|
||||
fields["pid"] = p.Pid
|
||||
}
|
||||
|
||||
ppid, err := p.Ppid()
|
||||
if err == nil {
|
||||
if cfg.tagging["ppid"] {
|
||||
p.tags["ppid"] = strconv.Itoa(int(ppid))
|
||||
} else {
|
||||
fields[prefix+"ppid"] = ppid
|
||||
}
|
||||
}
|
||||
|
||||
status, err := p.Status()
|
||||
if err == nil {
|
||||
if cfg.tagging["status"] {
|
||||
p.tags["status"] = status[0]
|
||||
} else {
|
||||
fields[prefix+"status"] = status[0]
|
||||
}
|
||||
}
|
||||
|
||||
user, err := p.Username()
|
||||
if err == nil {
|
||||
if cfg.tagging["user"] {
|
||||
p.tags["user"] = user
|
||||
} else {
|
||||
fields[prefix+"user"] = user
|
||||
}
|
||||
}
|
||||
|
||||
if _, exists := p.tags["process_name"]; !exists {
|
||||
name, err := p.Name()
|
||||
if err == nil {
|
||||
p.tags["process_name"] = name
|
||||
}
|
||||
}
|
||||
|
||||
metrics := []telegraf.Metric{metric.New("procstat", p.tags, fields, t)}
|
||||
|
||||
// Collect the socket statistics if requested
|
||||
if cfg.features["sockets"] {
|
||||
for _, protocol := range cfg.socketProtos {
|
||||
// Get the requested connections for the PID
|
||||
var fieldlist []map[string]interface{}
|
||||
switch protocol {
|
||||
case "all":
|
||||
conns, err := gopsnet.ConnectionsPid(protocol, p.Pid)
|
||||
if err != nil {
|
||||
return metrics, fmt.Errorf("cannot get connections for %q of PID %d", protocol, p.Pid)
|
||||
}
|
||||
var connsTCPv4, connsTCPv6, connsUDPv4, connsUDPv6, connsUnix []gopsnet.ConnectionStat
|
||||
for _, c := range conns {
|
||||
switch {
|
||||
case c.Family == syscall.AF_INET && c.Type == syscall.SOCK_STREAM:
|
||||
connsTCPv4 = append(connsTCPv4, c)
|
||||
case c.Family == syscall.AF_INET6 && c.Type == syscall.SOCK_STREAM:
|
||||
connsTCPv6 = append(connsTCPv6, c)
|
||||
case c.Family == syscall.AF_INET && c.Type == syscall.SOCK_DGRAM:
|
||||
connsUDPv4 = append(connsUDPv4, c)
|
||||
case c.Family == syscall.AF_INET6 && c.Type == syscall.SOCK_DGRAM:
|
||||
connsUDPv6 = append(connsUDPv6, c)
|
||||
case c.Family == syscall.AF_UNIX:
|
||||
connsUnix = append(connsUnix, c)
|
||||
}
|
||||
}
|
||||
fl, err := statsTCP(connsTCPv4, syscall.AF_INET)
|
||||
if err != nil {
|
||||
return metrics, fmt.Errorf("cannot get statistics for \"tcp4\" of PID %d", p.Pid)
|
||||
}
|
||||
fieldlist = append(fieldlist, fl...)
|
||||
|
||||
fl, err = statsTCP(connsTCPv6, syscall.AF_INET6)
|
||||
if err != nil {
|
||||
return metrics, fmt.Errorf("cannot get statistics for \"tcp6\" of PID %d", p.Pid)
|
||||
}
|
||||
fieldlist = append(fieldlist, fl...)
|
||||
|
||||
fl, err = statsUDP(connsUDPv4, syscall.AF_INET)
|
||||
if err != nil {
|
||||
return metrics, fmt.Errorf("cannot get statistics for \"udp4\" of PID %d", p.Pid)
|
||||
}
|
||||
fieldlist = append(fieldlist, fl...)
|
||||
|
||||
fl, err = statsUDP(connsUDPv6, syscall.AF_INET6)
|
||||
if err != nil {
|
||||
return metrics, fmt.Errorf("cannot get statistics for \"udp6\" of PID %d", p.Pid)
|
||||
}
|
||||
fieldlist = append(fieldlist, fl...)
|
||||
|
||||
fl, err = statsUnix(connsUnix)
|
||||
if err != nil {
|
||||
return metrics, fmt.Errorf("cannot get statistics for \"unix\" of PID %d", p.Pid)
|
||||
}
|
||||
fieldlist = append(fieldlist, fl...)
|
||||
case "tcp4", "tcp6":
|
||||
family := uint8(syscall.AF_INET)
|
||||
if protocol == "tcp6" {
|
||||
family = syscall.AF_INET6
|
||||
}
|
||||
conns, err := gopsnet.ConnectionsPid(protocol, p.Pid)
|
||||
if err != nil {
|
||||
return metrics, fmt.Errorf("cannot get connections for %q of PID %d", protocol, p.Pid)
|
||||
}
|
||||
if fieldlist, err = statsTCP(conns, family); err != nil {
|
||||
return metrics, fmt.Errorf("cannot get statistics for %q of PID %d", protocol, p.Pid)
|
||||
}
|
||||
case "udp4", "udp6":
|
||||
family := uint8(syscall.AF_INET)
|
||||
if protocol == "udp6" {
|
||||
family = syscall.AF_INET6
|
||||
}
|
||||
conns, err := gopsnet.ConnectionsPid(protocol, p.Pid)
|
||||
if err != nil {
|
||||
return metrics, fmt.Errorf("cannot get connections for %q of PID %d", protocol, p.Pid)
|
||||
}
|
||||
if fieldlist, err = statsUDP(conns, family); err != nil {
|
||||
return metrics, fmt.Errorf("cannot get statistics for %q of PID %d", protocol, p.Pid)
|
||||
}
|
||||
case "unix":
|
||||
conns, err := gopsnet.ConnectionsPid(protocol, p.Pid)
|
||||
if err != nil {
|
||||
return metrics, fmt.Errorf("cannot get connections for %q of PID %d", protocol, p.Pid)
|
||||
}
|
||||
if fieldlist, err = statsUnix(conns); err != nil {
|
||||
return metrics, fmt.Errorf("cannot get statistics for %q of PID %d", protocol, p.Pid)
|
||||
}
|
||||
}
|
||||
|
||||
for _, fields := range fieldlist {
|
||||
if cfg.tagging["protocol"] {
|
||||
p.tags["protocol"] = fields["protocol"].(string)
|
||||
delete(fields, "protocol")
|
||||
}
|
||||
if cfg.tagging["state"] {
|
||||
p.tags["state"] = fields["state"].(string)
|
||||
delete(fields, "state")
|
||||
}
|
||||
if cfg.tagging["src"] && fields["src"] != nil {
|
||||
p.tags["src"] = fields["src"].(string)
|
||||
delete(fields, "src")
|
||||
}
|
||||
if cfg.tagging["src_port"] && fields["src_port"] != nil {
|
||||
port := uint64(fields["src_port"].(uint16))
|
||||
p.tags["src_port"] = strconv.FormatUint(port, 10)
|
||||
delete(fields, "src_port")
|
||||
}
|
||||
if cfg.tagging["dest"] && fields["dest"] != nil {
|
||||
p.tags["dest"] = fields["dest"].(string)
|
||||
delete(fields, "dest")
|
||||
}
|
||||
if cfg.tagging["dest_port"] && fields["dest_port"] != nil {
|
||||
port := uint64(fields["dest_port"].(uint16))
|
||||
p.tags["dest_port"] = strconv.FormatUint(port, 10)
|
||||
delete(fields, "dest_port")
|
||||
}
|
||||
if cfg.tagging["name"] && fields["name"] != nil {
|
||||
p.tags["name"] = fields["name"].(string)
|
||||
delete(fields, "name")
|
||||
}
|
||||
|
||||
metrics = append(metrics, metric.New("procstat_socket", p.tags, fields, t))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return metrics, nil
|
||||
}
|
711
plugins/inputs/procstat/procstat.go
Normal file
711
plugins/inputs/procstat/procstat.go
Normal file
|
@ -0,0 +1,711 @@
|
|||
//go:generate ../../../tools/readme_config_includer/generator
|
||||
package procstat
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
_ "embed"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
gopsprocess "github.com/shirou/gopsutil/v4/process"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/internal/choice"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
)
|
||||
|
||||
//go:embed sample.conf
|
||||
var sampleConfig string
|
||||
|
||||
// execCommand is so tests can mock out exec.Command usage.
|
||||
var execCommand = exec.Command
|
||||
|
||||
type pid int32
|
||||
|
||||
type Procstat struct {
|
||||
PidFinder string `toml:"pid_finder"`
|
||||
PidFile string `toml:"pid_file"`
|
||||
Exe string `toml:"exe"`
|
||||
Pattern string `toml:"pattern"`
|
||||
Prefix string `toml:"prefix"`
|
||||
CmdLineTag bool `toml:"cmdline_tag" deprecated:"1.29.0;1.40.0;use 'tag_with' instead"`
|
||||
ProcessName string `toml:"process_name"`
|
||||
User string `toml:"user"`
|
||||
SystemdUnit string `toml:"systemd_unit"`
|
||||
SupervisorUnit []string `toml:"supervisor_unit" deprecated:"1.29.0;1.40.0;use 'supervisor_units' instead"`
|
||||
SupervisorUnits []string `toml:"supervisor_units"`
|
||||
IncludeSystemdChildren bool `toml:"include_systemd_children"`
|
||||
CGroup string `toml:"cgroup"`
|
||||
PidTag bool `toml:"pid_tag" deprecated:"1.29.0;1.40.0;use 'tag_with' instead"`
|
||||
WinService string `toml:"win_service"`
|
||||
Mode string `toml:"mode"`
|
||||
Properties []string `toml:"properties"`
|
||||
SocketProtocols []string `toml:"socket_protocols"`
|
||||
TagWith []string `toml:"tag_with"`
|
||||
Filter []filter `toml:"filter"`
|
||||
Log telegraf.Logger `toml:"-"`
|
||||
|
||||
finder pidFinder
|
||||
processes map[pid]process
|
||||
cfg collectionConfig
|
||||
oldMode bool
|
||||
|
||||
createProcess func(pid) (process, error)
|
||||
}
|
||||
|
||||
type collectionConfig struct {
|
||||
solarisMode bool
|
||||
tagging map[string]bool
|
||||
features map[string]bool
|
||||
socketProtos []string
|
||||
}
|
||||
|
||||
type pidsTags struct {
|
||||
PIDs []pid
|
||||
Tags map[string]string
|
||||
}
|
||||
|
||||
type processGroup struct {
|
||||
processes []*gopsprocess.Process
|
||||
tags map[string]string
|
||||
level int
|
||||
}
|
||||
|
||||
func (*Procstat) SampleConfig() string {
|
||||
return sampleConfig
|
||||
}
|
||||
|
||||
func (p *Procstat) Init() error {
|
||||
// Keep the old settings for compatibility
|
||||
if p.PidTag && !choice.Contains("pid", p.TagWith) {
|
||||
p.TagWith = append(p.TagWith, "pid")
|
||||
}
|
||||
if p.CmdLineTag && !choice.Contains("cmdline", p.TagWith) {
|
||||
p.TagWith = append(p.TagWith, "cmdline")
|
||||
}
|
||||
|
||||
// Configure metric collection features
|
||||
p.cfg.solarisMode = strings.EqualFold(p.Mode, "solaris")
|
||||
|
||||
// Convert tagging settings
|
||||
p.cfg.tagging = make(map[string]bool, len(p.TagWith))
|
||||
for _, tag := range p.TagWith {
|
||||
switch tag {
|
||||
case "cmdline", "pid", "ppid", "status", "user", "child_level", "parent_pid", "level":
|
||||
case "protocol", "state", "src", "src_port", "dest", "dest_port", "name": // socket only
|
||||
if !slices.Contains(p.Properties, "sockets") {
|
||||
return fmt.Errorf("socket tagging option %q specified without sockets enabled", tag)
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("invalid 'tag_with' setting %q", tag)
|
||||
}
|
||||
p.cfg.tagging[tag] = true
|
||||
}
|
||||
|
||||
// Convert collection properties
|
||||
p.cfg.features = make(map[string]bool, len(p.Properties))
|
||||
for _, prop := range p.Properties {
|
||||
switch prop {
|
||||
case "cpu", "limits", "memory", "mmap":
|
||||
case "sockets":
|
||||
if len(p.SocketProtocols) == 0 {
|
||||
p.SocketProtocols = []string{"all"}
|
||||
}
|
||||
protos := make(map[string]bool, len(p.SocketProtocols))
|
||||
for _, proto := range p.SocketProtocols {
|
||||
switch proto {
|
||||
case "all":
|
||||
if len(protos) > 0 || len(p.SocketProtocols) > 1 {
|
||||
return errors.New("additional 'socket_protocol' settings besides 'all' are not allowed")
|
||||
}
|
||||
case "tcp4", "tcp6", "udp4", "udp6", "unix":
|
||||
default:
|
||||
return fmt.Errorf("invalid 'socket_protocol' setting %q", proto)
|
||||
}
|
||||
if protos[proto] {
|
||||
return fmt.Errorf("duplicate %q in 'socket_protocol' setting", proto)
|
||||
}
|
||||
protos[proto] = true
|
||||
p.cfg.socketProtos = append(p.cfg.socketProtos, proto)
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("invalid 'properties' setting %q", prop)
|
||||
}
|
||||
p.cfg.features[prop] = true
|
||||
}
|
||||
|
||||
// Check if we got any new-style configuration options and determine
|
||||
// operation mode.
|
||||
p.oldMode = len(p.Filter) == 0
|
||||
if p.oldMode {
|
||||
// Keep the old settings for compatibility
|
||||
for _, u := range p.SupervisorUnit {
|
||||
if !choice.Contains(u, p.SupervisorUnits) {
|
||||
p.SupervisorUnits = append(p.SupervisorUnits, u)
|
||||
}
|
||||
}
|
||||
|
||||
// Check filtering
|
||||
switch {
|
||||
case len(p.SupervisorUnits) > 0, p.SystemdUnit != "", p.WinService != "",
|
||||
p.CGroup != "", p.PidFile != "", p.Exe != "", p.Pattern != "",
|
||||
p.User != "":
|
||||
// Do nothing as those are valid settings
|
||||
default:
|
||||
return errors.New("require filter option but none set")
|
||||
}
|
||||
|
||||
// Instantiate the finder
|
||||
switch p.PidFinder {
|
||||
case "", "pgrep":
|
||||
p.PidFinder = "pgrep"
|
||||
finder, err := newPgrepFinder()
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating pgrep finder failed: %w", err)
|
||||
}
|
||||
p.finder = finder
|
||||
case "native":
|
||||
// gopsutil relies on pgrep when looking up children on darwin
|
||||
// see https://github.com/shirou/gopsutil/blob/v3.23.10/process/process_darwin.go#L235
|
||||
requiresChildren := len(p.SupervisorUnits) > 0 && p.Pattern != ""
|
||||
if requiresChildren && runtime.GOOS == "darwin" {
|
||||
return errors.New("configuration requires 'pgrep' finder on your OS")
|
||||
}
|
||||
p.finder = &NativeFinder{}
|
||||
case "test":
|
||||
p.Log.Warn("running in test mode")
|
||||
default:
|
||||
return fmt.Errorf("unknown pid_finder %q", p.PidFinder)
|
||||
}
|
||||
} else {
|
||||
// Check for mixed mode
|
||||
switch {
|
||||
case p.PidFile != "", p.Exe != "", p.Pattern != "", p.User != "",
|
||||
p.SystemdUnit != "", len(p.SupervisorUnit) > 0,
|
||||
len(p.SupervisorUnits) > 0, p.CGroup != "", p.WinService != "":
|
||||
return errors.New("cannot operate in mixed mode with filters and old-style config")
|
||||
}
|
||||
|
||||
// New-style operations
|
||||
for i := range p.Filter {
|
||||
p.Filter[i].Log = p.Log
|
||||
if err := p.Filter[i].init(); err != nil {
|
||||
return fmt.Errorf("initializing filter %d failed: %w", i, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the running process cache
|
||||
p.processes = make(map[pid]process)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Procstat) Gather(acc telegraf.Accumulator) error {
|
||||
if p.oldMode {
|
||||
return p.gatherOld(acc)
|
||||
}
|
||||
|
||||
return p.gatherNew(acc)
|
||||
}
|
||||
|
||||
func (p *Procstat) gatherOld(acc telegraf.Accumulator) error {
|
||||
now := time.Now()
|
||||
results, err := p.findPids()
|
||||
if err != nil {
|
||||
// Add lookup error-metric
|
||||
fields := map[string]interface{}{
|
||||
"pid_count": 0,
|
||||
"running": 0,
|
||||
"result_code": 1,
|
||||
}
|
||||
tags := map[string]string{
|
||||
"pid_finder": p.PidFinder,
|
||||
"result": "lookup_error",
|
||||
}
|
||||
for _, pidTag := range results {
|
||||
for key, value := range pidTag.Tags {
|
||||
tags[key] = value
|
||||
}
|
||||
}
|
||||
acc.AddFields("procstat_lookup", fields, tags, now)
|
||||
return err
|
||||
}
|
||||
|
||||
var count int
|
||||
running := make(map[pid]bool)
|
||||
for _, r := range results {
|
||||
if len(r.PIDs) < 1 && len(p.SupervisorUnits) > 0 {
|
||||
continue
|
||||
}
|
||||
count += len(r.PIDs)
|
||||
for _, pid := range r.PIDs {
|
||||
// Check if the process is still running
|
||||
proc, err := p.createProcess(pid)
|
||||
if err != nil {
|
||||
// No problem; process may have ended after we found it or it
|
||||
// might be delivered from a non-checking source like a PID file
|
||||
// of a dead process.
|
||||
continue
|
||||
}
|
||||
|
||||
// Use the cached processes as we need the existing instances
|
||||
// to compute delta-metrics (e.g. cpu-usage).
|
||||
if cached, found := p.processes[pid]; found {
|
||||
proc = cached
|
||||
} else {
|
||||
// We've found a process that was not recorded before so add it
|
||||
// to the list of processes
|
||||
|
||||
//nolint:errcheck // Assumption: if a process has no name, it probably does not exist
|
||||
if name, _ := proc.Name(); name == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Add initial tags
|
||||
for k, v := range r.Tags {
|
||||
proc.setTag(k, v)
|
||||
}
|
||||
|
||||
if p.ProcessName != "" {
|
||||
proc.setTag("process_name", p.ProcessName)
|
||||
}
|
||||
p.processes[pid] = proc
|
||||
}
|
||||
running[pid] = true
|
||||
metrics, err := proc.metrics(p.Prefix, &p.cfg, now)
|
||||
if err != nil {
|
||||
// Continue after logging an error as there might still be
|
||||
// metrics available
|
||||
acc.AddError(err)
|
||||
}
|
||||
for _, m := range metrics {
|
||||
acc.AddMetric(m)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup processes that are not running anymore
|
||||
for pid := range p.processes {
|
||||
if !running[pid] {
|
||||
delete(p.processes, pid)
|
||||
}
|
||||
}
|
||||
|
||||
// Add lookup statistics-metric
|
||||
fields := map[string]interface{}{
|
||||
"pid_count": count,
|
||||
"running": len(running),
|
||||
"result_code": 0,
|
||||
}
|
||||
tags := map[string]string{
|
||||
"pid_finder": p.PidFinder,
|
||||
"result": "success",
|
||||
}
|
||||
for _, pidTag := range results {
|
||||
for key, value := range pidTag.Tags {
|
||||
tags[key] = value
|
||||
}
|
||||
}
|
||||
if len(p.SupervisorUnits) > 0 {
|
||||
tags["supervisor_unit"] = strings.Join(p.SupervisorUnits, ";")
|
||||
}
|
||||
acc.AddFields("procstat_lookup", fields, tags, now)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Procstat) gatherNew(acc telegraf.Accumulator) error {
|
||||
now := time.Now()
|
||||
running := make(map[pid]bool)
|
||||
for _, f := range p.Filter {
|
||||
groups, err := f.applyFilter()
|
||||
if err != nil {
|
||||
// Add lookup error-metric
|
||||
acc.AddFields(
|
||||
"procstat_lookup",
|
||||
map[string]interface{}{
|
||||
"pid_count": 0,
|
||||
"running": 0,
|
||||
"result_code": 1,
|
||||
},
|
||||
map[string]string{
|
||||
"filter": f.Name,
|
||||
"result": "lookup_error",
|
||||
},
|
||||
now,
|
||||
)
|
||||
acc.AddError(fmt.Errorf("applying filter %q failed: %w", f.Name, err))
|
||||
continue
|
||||
}
|
||||
|
||||
var count int
|
||||
for _, g := range groups {
|
||||
count += len(g.processes)
|
||||
level := strconv.Itoa(g.level)
|
||||
for _, gp := range g.processes {
|
||||
// Skip over non-running processes
|
||||
if running, err := gp.IsRunning(); err != nil || !running {
|
||||
continue
|
||||
}
|
||||
|
||||
// Use the cached processes as we need the existing instances
|
||||
// to compute delta-metrics (e.g. cpu-usage).
|
||||
pid := pid(gp.Pid)
|
||||
process, found := p.processes[pid]
|
||||
if !found {
|
||||
//nolint:errcheck // Assumption: if a process has no name, it probably does not exist
|
||||
if name, _ := gp.Name(); name == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// We've found a process that was not recorded before so add it
|
||||
// to the list of processes
|
||||
tags := make(map[string]string, len(g.tags)+1)
|
||||
for k, v := range g.tags {
|
||||
tags[k] = v
|
||||
}
|
||||
if p.ProcessName != "" {
|
||||
process.setTag("process_name", p.ProcessName)
|
||||
}
|
||||
tags["filter"] = f.Name
|
||||
if p.cfg.tagging["level"] {
|
||||
tags["level"] = level
|
||||
}
|
||||
|
||||
process = &proc{
|
||||
Process: gp,
|
||||
hasCPUTimes: false,
|
||||
tags: tags,
|
||||
}
|
||||
p.processes[pid] = process
|
||||
}
|
||||
running[pid] = true
|
||||
metrics, err := process.metrics(p.Prefix, &p.cfg, now)
|
||||
if err != nil {
|
||||
// Continue after logging an error as there might still be
|
||||
// metrics available
|
||||
acc.AddError(err)
|
||||
}
|
||||
for _, m := range metrics {
|
||||
acc.AddMetric(m)
|
||||
}
|
||||
}
|
||||
if p.cfg.tagging["level"] {
|
||||
// Add lookup statistics-metric
|
||||
acc.AddFields(
|
||||
"procstat_lookup",
|
||||
map[string]interface{}{
|
||||
"pid_count": len(g.processes),
|
||||
"running": len(running),
|
||||
"result_code": 0,
|
||||
"level": g.level,
|
||||
},
|
||||
map[string]string{
|
||||
"filter": f.Name,
|
||||
"result": "success",
|
||||
},
|
||||
now,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Add lookup statistics-metric
|
||||
acc.AddFields(
|
||||
"procstat_lookup",
|
||||
map[string]interface{}{
|
||||
"pid_count": count,
|
||||
"running": len(running),
|
||||
"result_code": 0,
|
||||
},
|
||||
map[string]string{
|
||||
"filter": f.Name,
|
||||
"result": "success",
|
||||
},
|
||||
now,
|
||||
)
|
||||
}
|
||||
|
||||
// Cleanup processes that are not running anymore across all filters/groups
|
||||
for pid := range p.processes {
|
||||
if !running[pid] {
|
||||
delete(p.processes, pid)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get matching PIDs and their initial tags
|
||||
func (p *Procstat) findPids() ([]pidsTags, error) {
|
||||
switch {
|
||||
case len(p.SupervisorUnits) > 0:
|
||||
return p.findSupervisorUnits()
|
||||
case p.SystemdUnit != "":
|
||||
return p.systemdUnitPIDs()
|
||||
case p.WinService != "":
|
||||
pids, err := p.winServicePIDs()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tags := map[string]string{"win_service": p.WinService}
|
||||
return []pidsTags{{pids, tags}}, nil
|
||||
case p.CGroup != "":
|
||||
return p.cgroupPIDs()
|
||||
case p.PidFile != "":
|
||||
pids, err := p.finder.pidFile(p.PidFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tags := map[string]string{"pidfile": p.PidFile}
|
||||
return []pidsTags{{pids, tags}}, nil
|
||||
case p.Exe != "":
|
||||
pids, err := p.finder.pattern(p.Exe)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tags := map[string]string{"exe": p.Exe}
|
||||
return []pidsTags{{pids, tags}}, nil
|
||||
case p.Pattern != "":
|
||||
pids, err := p.finder.fullPattern(p.Pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tags := map[string]string{"pattern": p.Pattern}
|
||||
return []pidsTags{{pids, tags}}, nil
|
||||
case p.User != "":
|
||||
pids, err := p.finder.uid(p.User)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tags := map[string]string{"user": p.User}
|
||||
return []pidsTags{{pids, tags}}, nil
|
||||
}
|
||||
return nil, errors.New("no filter option set")
|
||||
}
|
||||
|
||||
func (p *Procstat) findSupervisorUnits() ([]pidsTags, error) {
|
||||
groups, groupsTags, err := p.supervisorPIDs()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("getting supervisor PIDs failed: %w", err)
|
||||
}
|
||||
|
||||
// According to the PID, find the system process number and get the child processes
|
||||
pidTags := make([]pidsTags, 0, len(groups))
|
||||
for _, group := range groups {
|
||||
grppid := groupsTags[group]["pid"]
|
||||
if grppid == "" {
|
||||
pidTags = append(pidTags, pidsTags{nil, groupsTags[group]})
|
||||
continue
|
||||
}
|
||||
|
||||
processID, err := strconv.ParseInt(grppid, 10, 32)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("converting PID %q failed: %w", grppid, err)
|
||||
}
|
||||
|
||||
// Get all children of the supervisor unit
|
||||
pids, err := p.finder.children(pid(processID))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("getting children for %d failed: %w", processID, err)
|
||||
}
|
||||
tags := map[string]string{"pattern": p.Pattern, "parent_pid": p.Pattern}
|
||||
|
||||
// Handle situations where the PID does not exist
|
||||
if len(pids) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// Merge tags map
|
||||
for k, v := range groupsTags[group] {
|
||||
_, ok := tags[k]
|
||||
if !ok {
|
||||
tags[k] = v
|
||||
}
|
||||
}
|
||||
// Remove duplicate pid tags
|
||||
delete(tags, "pid")
|
||||
pidTags = append(pidTags, pidsTags{pids, tags})
|
||||
}
|
||||
return pidTags, nil
|
||||
}
|
||||
|
||||
func (p *Procstat) supervisorPIDs() ([]string, map[string]map[string]string, error) {
|
||||
out, err := execCommand("supervisorctl", "status", strings.Join(p.SupervisorUnits, " ")).Output()
|
||||
if err != nil {
|
||||
if !strings.Contains(err.Error(), "exit status 3") {
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
lines := strings.Split(string(out), "\n")
|
||||
// Get the PID, running status, running time and boot time of the main process:
|
||||
// pid 11779, uptime 17:41:16
|
||||
// Exited too quickly (process log may have details)
|
||||
mainPids := make(map[string]map[string]string)
|
||||
for _, line := range lines {
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
kv := strings.Fields(line)
|
||||
if len(kv) < 2 {
|
||||
// Not a key-value pair
|
||||
continue
|
||||
}
|
||||
name := kv[0]
|
||||
|
||||
statusMap := map[string]string{
|
||||
"supervisor_unit": name,
|
||||
"status": kv[1],
|
||||
}
|
||||
|
||||
switch kv[1] {
|
||||
case "FATAL", "EXITED", "BACKOFF", "STOPPING":
|
||||
statusMap["error"] = strings.Join(kv[2:], " ")
|
||||
case "RUNNING":
|
||||
statusMap["pid"] = strings.ReplaceAll(kv[3], ",", "")
|
||||
statusMap["uptimes"] = kv[5]
|
||||
case "STOPPED", "UNKNOWN", "STARTING":
|
||||
// No additional info
|
||||
}
|
||||
mainPids[name] = statusMap
|
||||
}
|
||||
|
||||
return p.SupervisorUnits, mainPids, nil
|
||||
}
|
||||
|
||||
func (p *Procstat) systemdUnitPIDs() ([]pidsTags, error) {
|
||||
if p.IncludeSystemdChildren {
|
||||
p.CGroup = "systemd/system.slice/" + p.SystemdUnit
|
||||
return p.cgroupPIDs()
|
||||
}
|
||||
|
||||
var pidTags []pidsTags
|
||||
pids, err := p.simpleSystemdUnitPIDs()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tags := map[string]string{"systemd_unit": p.SystemdUnit}
|
||||
pidTags = append(pidTags, pidsTags{pids, tags})
|
||||
return pidTags, nil
|
||||
}
|
||||
|
||||
func (p *Procstat) simpleSystemdUnitPIDs() ([]pid, error) {
|
||||
out, err := execCommand("systemctl", "show", p.SystemdUnit).Output()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
lines := bytes.Split(out, []byte{'\n'})
|
||||
pids := make([]pid, 0, len(lines))
|
||||
for _, line := range lines {
|
||||
kv := bytes.SplitN(line, []byte{'='}, 2)
|
||||
if len(kv) != 2 {
|
||||
continue
|
||||
}
|
||||
if !bytes.Equal(kv[0], []byte("MainPID")) {
|
||||
continue
|
||||
}
|
||||
if len(kv[1]) == 0 || bytes.Equal(kv[1], []byte("0")) {
|
||||
return nil, nil
|
||||
}
|
||||
processID, err := strconv.ParseInt(string(kv[1]), 10, 32)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid pid %q", kv[1])
|
||||
}
|
||||
pids = append(pids, pid(processID))
|
||||
}
|
||||
|
||||
return pids, nil
|
||||
}
|
||||
|
||||
func (p *Procstat) cgroupPIDs() ([]pidsTags, error) {
|
||||
procsPath := p.CGroup
|
||||
if procsPath[0] != '/' {
|
||||
procsPath = "/sys/fs/cgroup/" + procsPath
|
||||
}
|
||||
|
||||
items, err := filepath.Glob(procsPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("glob failed: %w", err)
|
||||
}
|
||||
|
||||
pidTags := make([]pidsTags, 0, len(items))
|
||||
for _, item := range items {
|
||||
pids, err := singleCgroupPIDs(item)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tags := map[string]string{"cgroup": p.CGroup, "cgroup_full": item}
|
||||
pidTags = append(pidTags, pidsTags{pids, tags})
|
||||
}
|
||||
|
||||
return pidTags, nil
|
||||
}
|
||||
|
||||
func singleCgroupPIDs(path string) ([]pid, error) {
|
||||
ok, err := isDir(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("not a directory %s", path)
|
||||
}
|
||||
procsPath := filepath.Join(path, "cgroup.procs")
|
||||
out, err := os.ReadFile(procsPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
lines := bytes.Split(out, []byte{'\n'})
|
||||
pids := make([]pid, 0, len(lines))
|
||||
for _, pidBS := range lines {
|
||||
if len(pidBS) == 0 {
|
||||
continue
|
||||
}
|
||||
processID, err := strconv.ParseInt(string(pidBS), 10, 32)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid pid %q", pidBS)
|
||||
}
|
||||
pids = append(pids, pid(processID))
|
||||
}
|
||||
|
||||
return pids, nil
|
||||
}
|
||||
|
||||
func isDir(path string) (bool, error) {
|
||||
result, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return result.IsDir(), nil
|
||||
}
|
||||
|
||||
func (p *Procstat) winServicePIDs() ([]pid, error) {
|
||||
var pids []pid
|
||||
|
||||
processID, err := queryPidWithWinServiceName(p.WinService)
|
||||
if err != nil {
|
||||
return pids, err
|
||||
}
|
||||
|
||||
pids = append(pids, pid(processID))
|
||||
|
||||
return pids, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
inputs.Add("procstat", func() telegraf.Input {
|
||||
return &Procstat{
|
||||
Properties: []string{"cpu", "memory", "mmap"},
|
||||
createProcess: newProc,
|
||||
}
|
||||
})
|
||||
}
|
676
plugins/inputs/procstat/procstat_test.go
Normal file
676
plugins/inputs/procstat/procstat_test.go
Normal file
|
@ -0,0 +1,676 @@
|
|||
package procstat
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
gopsprocess "github.com/shirou/gopsutil/v4/process"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/metric"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
)
|
||||
|
||||
func init() {
|
||||
execCommand = mockExecCommand
|
||||
}
|
||||
func mockExecCommand(arg0 string, args ...string) *exec.Cmd {
|
||||
args = append([]string{"-test.run=TestMockExecCommand", "--", arg0}, args...)
|
||||
cmd := exec.Command(os.Args[0], args...)
|
||||
cmd.Stderr = os.Stderr
|
||||
return cmd
|
||||
}
|
||||
func TestMockExecCommand(_ *testing.T) {
|
||||
var cmd []string //nolint:prealloc // Pre-allocated this slice would break the algorithm
|
||||
for _, arg := range os.Args {
|
||||
if arg == "--" {
|
||||
cmd = make([]string, 0)
|
||||
continue
|
||||
}
|
||||
if cmd == nil {
|
||||
continue
|
||||
}
|
||||
cmd = append(cmd, arg)
|
||||
}
|
||||
if cmd == nil {
|
||||
return
|
||||
}
|
||||
cmdline := strings.Join(cmd, " ")
|
||||
|
||||
if cmdline == "systemctl show TestGather_systemdUnitPIDs" {
|
||||
fmt.Printf(`PIDFile=
|
||||
GuessMainPID=yes
|
||||
MainPID=11408
|
||||
ControlPID=0
|
||||
ExecMainPID=11408
|
||||
`)
|
||||
//nolint:revive // error code is important for this "test"
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
if cmdline == "supervisorctl status TestGather_supervisorUnitPIDs" {
|
||||
fmt.Printf(`TestGather_supervisorUnitPIDs RUNNING pid 7311, uptime 0:00:19
|
||||
`)
|
||||
//nolint:revive // error code is important for this "test"
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
if cmdline == "supervisorctl status TestGather_STARTINGsupervisorUnitPIDs TestGather_FATALsupervisorUnitPIDs" {
|
||||
fmt.Printf(`TestGather_FATALsupervisorUnitPIDs FATAL Exited too quickly (process log may have details)
|
||||
TestGather_STARTINGsupervisorUnitPIDs STARTING`)
|
||||
//nolint:revive // error code is important for this "test"
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
fmt.Printf("command not found\n")
|
||||
//nolint:revive // error code is important for this "test"
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
type testPgrep struct {
|
||||
pids []pid
|
||||
err error
|
||||
}
|
||||
|
||||
func newTestFinder(pids []pid) pidFinder {
|
||||
return &testPgrep{
|
||||
pids: pids,
|
||||
err: nil,
|
||||
}
|
||||
}
|
||||
|
||||
func (pg *testPgrep) pidFile(_ string) ([]pid, error) {
|
||||
return pg.pids, pg.err
|
||||
}
|
||||
|
||||
func (pg *testPgrep) pattern(_ string) ([]pid, error) {
|
||||
return pg.pids, pg.err
|
||||
}
|
||||
|
||||
func (pg *testPgrep) uid(_ string) ([]pid, error) {
|
||||
return pg.pids, pg.err
|
||||
}
|
||||
|
||||
func (pg *testPgrep) fullPattern(_ string) ([]pid, error) {
|
||||
return pg.pids, pg.err
|
||||
}
|
||||
|
||||
func (pg *testPgrep) children(_ pid) ([]pid, error) {
|
||||
pids := []pid{7311, 8111, 8112}
|
||||
return pids, pg.err
|
||||
}
|
||||
|
||||
type testProc struct {
|
||||
procID pid
|
||||
tags map[string]string
|
||||
}
|
||||
|
||||
func newTestProc(pid pid) (process, error) {
|
||||
proc := &testProc{
|
||||
procID: pid,
|
||||
tags: make(map[string]string),
|
||||
}
|
||||
return proc, nil
|
||||
}
|
||||
|
||||
func (p *testProc) pid() pid {
|
||||
return p.procID
|
||||
}
|
||||
|
||||
func (*testProc) Name() (string, error) {
|
||||
return "test_proc", nil
|
||||
}
|
||||
|
||||
func (p *testProc) setTag(k, v string) {
|
||||
p.tags[k] = v
|
||||
}
|
||||
|
||||
func (*testProc) MemoryMaps(bool) (*[]gopsprocess.MemoryMapsStat, error) {
|
||||
stats := make([]gopsprocess.MemoryMapsStat, 0)
|
||||
return &stats, nil
|
||||
}
|
||||
|
||||
func (p *testProc) metrics(prefix string, cfg *collectionConfig, t time.Time) ([]telegraf.Metric, error) {
|
||||
if prefix != "" {
|
||||
prefix += "_"
|
||||
}
|
||||
|
||||
fields := map[string]interface{}{
|
||||
prefix + "num_fds": int32(0),
|
||||
prefix + "num_threads": int32(0),
|
||||
prefix + "voluntary_context_switches": int64(0),
|
||||
prefix + "involuntary_context_switches": int64(0),
|
||||
prefix + "minor_faults": uint64(0),
|
||||
prefix + "major_faults": uint64(0),
|
||||
prefix + "child_major_faults": uint64(0),
|
||||
prefix + "child_minor_faults": uint64(0),
|
||||
prefix + "read_bytes": uint64(0),
|
||||
prefix + "read_count": uint64(0),
|
||||
prefix + "write_bytes": uint64(0),
|
||||
prefix + "write_count": uint64(0),
|
||||
prefix + "created_at": int64(0),
|
||||
}
|
||||
if cfg.features["cpu"] {
|
||||
fields[prefix+"cpu_time_user"] = float64(0)
|
||||
fields[prefix+"cpu_time_system"] = float64(0)
|
||||
fields[prefix+"cpu_time_iowait"] = float64(0)
|
||||
fields[prefix+"cpu_usage"] = float64(0)
|
||||
}
|
||||
if cfg.features["memory"] {
|
||||
fields[prefix+"memory_rss"] = uint64(0)
|
||||
fields[prefix+"memory_vms"] = uint64(0)
|
||||
fields[prefix+"memory_usage"] = float32(0)
|
||||
}
|
||||
|
||||
tags := map[string]string{
|
||||
"process_name": "test_proc",
|
||||
}
|
||||
for k, v := range p.tags {
|
||||
tags[k] = v
|
||||
}
|
||||
|
||||
// Add the tags as requested by the user
|
||||
if cfg.tagging["cmdline"] {
|
||||
tags["cmdline"] = "test_proc"
|
||||
} else {
|
||||
fields[prefix+"cmdline"] = "test_proc"
|
||||
}
|
||||
|
||||
if cfg.tagging["pid"] {
|
||||
tags["pid"] = strconv.Itoa(int(p.procID))
|
||||
} else {
|
||||
fields["pid"] = int32(p.procID)
|
||||
}
|
||||
|
||||
if cfg.tagging["ppid"] {
|
||||
tags["ppid"] = "0"
|
||||
} else {
|
||||
fields[prefix+"ppid"] = int32(0)
|
||||
}
|
||||
|
||||
if cfg.tagging["status"] {
|
||||
tags["status"] = "running"
|
||||
} else {
|
||||
fields[prefix+"status"] = "running"
|
||||
}
|
||||
|
||||
if cfg.tagging["user"] {
|
||||
tags["user"] = "testuser"
|
||||
} else {
|
||||
fields[prefix+"user"] = "testuser"
|
||||
}
|
||||
|
||||
return []telegraf.Metric{metric.New("procstat", tags, fields, t)}, nil
|
||||
}
|
||||
|
||||
var processID = pid(42)
|
||||
var exe = "foo"
|
||||
|
||||
func TestInitInvalidFinder(t *testing.T) {
|
||||
plugin := Procstat{
|
||||
PidFinder: "foo",
|
||||
Properties: []string{"cpu", "memory", "mmap"},
|
||||
Log: testutil.Logger{},
|
||||
createProcess: newTestProc,
|
||||
}
|
||||
require.Error(t, plugin.Init())
|
||||
}
|
||||
|
||||
func TestInitRequiresChildDarwin(t *testing.T) {
|
||||
if runtime.GOOS != "darwin" {
|
||||
t.Skip("Skipping test on non-darwin platform")
|
||||
}
|
||||
|
||||
p := Procstat{
|
||||
Pattern: "somepattern",
|
||||
SupervisorUnits: []string{"a_unit"},
|
||||
PidFinder: "native",
|
||||
Properties: []string{"cpu", "memory", "mmap"},
|
||||
Log: testutil.Logger{},
|
||||
}
|
||||
require.ErrorContains(t, p.Init(), "requires 'pgrep' finder")
|
||||
}
|
||||
|
||||
func TestInitMissingPidMethod(t *testing.T) {
|
||||
p := Procstat{
|
||||
Properties: []string{"cpu", "memory", "mmap"},
|
||||
Log: testutil.Logger{},
|
||||
createProcess: newTestProc,
|
||||
}
|
||||
require.ErrorContains(t, p.Init(), "require filter option but none set")
|
||||
}
|
||||
|
||||
func TestGather_CreateProcessErrorOk(t *testing.T) {
|
||||
expected := []telegraf.Metric{
|
||||
testutil.MustMetric(
|
||||
"procstat_lookup",
|
||||
map[string]string{
|
||||
"exe": "foo",
|
||||
"pid_finder": "test",
|
||||
"result": "success",
|
||||
},
|
||||
map[string]interface{}{
|
||||
"pid_count": int64(1),
|
||||
"result_code": int64(0),
|
||||
"running": int64(0),
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
telegraf.Untyped,
|
||||
),
|
||||
}
|
||||
|
||||
p := Procstat{
|
||||
Exe: exe,
|
||||
PidFinder: "test",
|
||||
Properties: []string{"cpu", "memory", "mmap"},
|
||||
Log: testutil.Logger{},
|
||||
finder: newTestFinder([]pid{processID}),
|
||||
createProcess: func(pid) (process, error) {
|
||||
return nil, errors.New("createProcess error")
|
||||
},
|
||||
}
|
||||
require.NoError(t, p.Init())
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, p.Gather(&acc))
|
||||
testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics(), testutil.IgnoreTime())
|
||||
}
|
||||
|
||||
func TestGather_ProcessName(t *testing.T) {
|
||||
expected := []telegraf.Metric{
|
||||
testutil.MustMetric(
|
||||
"procstat",
|
||||
map[string]string{
|
||||
"exe": "foo",
|
||||
"process_name": "custom_name",
|
||||
},
|
||||
map[string]interface{}{
|
||||
"child_major_faults": uint64(0),
|
||||
"child_minor_faults": uint64(0),
|
||||
"cmdline": "test_proc",
|
||||
"cpu_time_iowait": float64(0),
|
||||
"cpu_time_system": float64(0),
|
||||
"cpu_time_user": float64(0),
|
||||
"cpu_usage": float64(0),
|
||||
"created_at": int64(0),
|
||||
"involuntary_context_switches": int64(0),
|
||||
"major_faults": uint64(0),
|
||||
"memory_rss": uint64(0),
|
||||
"memory_usage": float32(0),
|
||||
"memory_vms": uint64(0),
|
||||
"minor_faults": uint64(0),
|
||||
"num_fds": int32(0),
|
||||
"num_threads": int32(0),
|
||||
"pid": int32(42),
|
||||
"ppid": int32(0),
|
||||
"read_bytes": uint64(0),
|
||||
"read_count": uint64(0),
|
||||
"status": "running",
|
||||
"user": "testuser",
|
||||
"voluntary_context_switches": int64(0),
|
||||
"write_bytes": uint64(0),
|
||||
"write_count": uint64(0),
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
telegraf.Untyped,
|
||||
),
|
||||
testutil.MustMetric(
|
||||
"procstat_lookup",
|
||||
map[string]string{
|
||||
"exe": "foo",
|
||||
"pid_finder": "test",
|
||||
"result": "success",
|
||||
},
|
||||
map[string]interface{}{
|
||||
"pid_count": int64(1),
|
||||
"result_code": int64(0),
|
||||
"running": int64(1),
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
telegraf.Untyped,
|
||||
),
|
||||
}
|
||||
|
||||
p := Procstat{
|
||||
Exe: exe,
|
||||
ProcessName: "custom_name",
|
||||
PidFinder: "test",
|
||||
Properties: []string{"cpu", "memory", "mmap"},
|
||||
Log: testutil.Logger{},
|
||||
finder: newTestFinder([]pid{processID}),
|
||||
createProcess: newTestProc,
|
||||
}
|
||||
require.NoError(t, p.Init())
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, p.Gather(&acc))
|
||||
require.Equal(t, "custom_name", acc.TagValue("procstat", "process_name"))
|
||||
testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics(), testutil.IgnoreTime())
|
||||
}
|
||||
|
||||
func TestGather_NoProcessNameUsesReal(t *testing.T) {
|
||||
processID := pid(os.Getpid())
|
||||
|
||||
p := Procstat{
|
||||
Exe: exe,
|
||||
PidFinder: "test",
|
||||
Properties: []string{"cpu", "memory", "mmap"},
|
||||
Log: testutil.Logger{},
|
||||
finder: newTestFinder([]pid{processID}),
|
||||
createProcess: newTestProc,
|
||||
}
|
||||
require.NoError(t, p.Init())
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, p.Gather(&acc))
|
||||
|
||||
require.True(t, acc.HasTag("procstat", "process_name"))
|
||||
}
|
||||
|
||||
func TestGather_NoPidTag(t *testing.T) {
|
||||
p := Procstat{
|
||||
Exe: exe,
|
||||
PidFinder: "test",
|
||||
Properties: []string{"cpu", "memory", "mmap"},
|
||||
Log: testutil.Logger{},
|
||||
finder: newTestFinder([]pid{processID}),
|
||||
createProcess: newTestProc,
|
||||
}
|
||||
require.NoError(t, p.Init())
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, p.Gather(&acc))
|
||||
|
||||
require.True(t, acc.HasInt64Field("procstat", "pid"))
|
||||
require.False(t, acc.HasTag("procstat", "pid"))
|
||||
}
|
||||
|
||||
func TestGather_PidTag(t *testing.T) {
|
||||
p := Procstat{
|
||||
Exe: exe,
|
||||
PidTag: true,
|
||||
PidFinder: "test",
|
||||
Properties: []string{"cpu", "memory", "mmap"},
|
||||
Log: testutil.Logger{},
|
||||
finder: newTestFinder([]pid{processID}),
|
||||
createProcess: newTestProc,
|
||||
}
|
||||
require.NoError(t, p.Init())
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, p.Gather(&acc))
|
||||
|
||||
require.Equal(t, "42", acc.TagValue("procstat", "pid"))
|
||||
require.False(t, acc.HasInt32Field("procstat", "pid"))
|
||||
}
|
||||
|
||||
func TestGather_Prefix(t *testing.T) {
|
||||
p := Procstat{
|
||||
Exe: exe,
|
||||
Prefix: "custom_prefix",
|
||||
PidFinder: "test",
|
||||
Properties: []string{"cpu", "memory", "mmap"},
|
||||
Log: testutil.Logger{},
|
||||
finder: newTestFinder([]pid{processID}),
|
||||
createProcess: newTestProc,
|
||||
}
|
||||
require.NoError(t, p.Init())
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, p.Gather(&acc))
|
||||
|
||||
require.True(t, acc.HasInt64Field("procstat", "custom_prefix_num_fds"))
|
||||
}
|
||||
|
||||
func TestGather_Exe(t *testing.T) {
|
||||
p := Procstat{
|
||||
Exe: exe,
|
||||
PidFinder: "test",
|
||||
Properties: []string{"cpu", "memory", "mmap"},
|
||||
Log: testutil.Logger{},
|
||||
finder: newTestFinder([]pid{processID}),
|
||||
createProcess: newTestProc,
|
||||
}
|
||||
require.NoError(t, p.Init())
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, p.Gather(&acc))
|
||||
|
||||
require.Equal(t, exe, acc.TagValue("procstat", "exe"))
|
||||
}
|
||||
|
||||
func TestGather_User(t *testing.T) {
|
||||
user := "ada"
|
||||
|
||||
p := Procstat{
|
||||
User: user,
|
||||
PidFinder: "test",
|
||||
Properties: []string{"cpu", "memory", "mmap"},
|
||||
Log: testutil.Logger{},
|
||||
finder: newTestFinder([]pid{processID}),
|
||||
createProcess: newTestProc,
|
||||
}
|
||||
require.NoError(t, p.Init())
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, p.Gather(&acc))
|
||||
|
||||
require.Equal(t, user, acc.TagValue("procstat", "user"))
|
||||
}
|
||||
|
||||
func TestGather_Pattern(t *testing.T) {
|
||||
pattern := "foo"
|
||||
|
||||
p := Procstat{
|
||||
Pattern: pattern,
|
||||
PidFinder: "test",
|
||||
Properties: []string{"cpu", "memory", "mmap"},
|
||||
Log: testutil.Logger{},
|
||||
finder: newTestFinder([]pid{processID}),
|
||||
createProcess: newTestProc,
|
||||
}
|
||||
require.NoError(t, p.Init())
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, p.Gather(&acc))
|
||||
|
||||
require.Equal(t, pattern, acc.TagValue("procstat", "pattern"))
|
||||
}
|
||||
|
||||
func TestGather_PidFile(t *testing.T) {
|
||||
pidfile := "/path/to/pidfile"
|
||||
|
||||
p := Procstat{
|
||||
PidFile: pidfile,
|
||||
PidFinder: "test",
|
||||
Properties: []string{"cpu", "memory", "mmap"},
|
||||
Log: testutil.Logger{},
|
||||
finder: newTestFinder([]pid{processID}),
|
||||
createProcess: newTestProc,
|
||||
}
|
||||
require.NoError(t, p.Init())
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, p.Gather(&acc))
|
||||
|
||||
require.Equal(t, pidfile, acc.TagValue("procstat", "pidfile"))
|
||||
}
|
||||
|
||||
func TestGather_PercentFirstPass(t *testing.T) {
|
||||
processID := pid(os.Getpid())
|
||||
|
||||
p := Procstat{
|
||||
Pattern: "foo",
|
||||
PidTag: true,
|
||||
PidFinder: "test",
|
||||
Properties: []string{"cpu", "memory", "mmap"},
|
||||
Log: testutil.Logger{},
|
||||
finder: newTestFinder([]pid{processID}),
|
||||
createProcess: newProc,
|
||||
}
|
||||
require.NoError(t, p.Init())
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, p.Gather(&acc))
|
||||
|
||||
require.True(t, acc.HasFloatField("procstat", "cpu_time_user"))
|
||||
require.False(t, acc.HasFloatField("procstat", "cpu_usage"))
|
||||
}
|
||||
|
||||
func TestGather_PercentSecondPass(t *testing.T) {
|
||||
processID := pid(os.Getpid())
|
||||
|
||||
p := Procstat{
|
||||
Pattern: "foo",
|
||||
PidTag: true,
|
||||
PidFinder: "test",
|
||||
Properties: []string{"cpu", "memory", "mmap"},
|
||||
Log: testutil.Logger{},
|
||||
finder: newTestFinder([]pid{processID}),
|
||||
createProcess: newProc,
|
||||
}
|
||||
require.NoError(t, p.Init())
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, p.Gather(&acc))
|
||||
require.NoError(t, p.Gather(&acc))
|
||||
|
||||
require.True(t, acc.HasFloatField("procstat", "cpu_time_user"))
|
||||
require.True(t, acc.HasFloatField("procstat", "cpu_usage"))
|
||||
}
|
||||
|
||||
func TestGather_systemdUnitPIDs(t *testing.T) {
|
||||
p := Procstat{
|
||||
SystemdUnit: "TestGather_systemdUnitPIDs",
|
||||
PidFinder: "test",
|
||||
Properties: []string{"cpu", "memory", "mmap"},
|
||||
Log: testutil.Logger{},
|
||||
finder: newTestFinder([]pid{processID}),
|
||||
}
|
||||
require.NoError(t, p.Init())
|
||||
|
||||
pidsTags, err := p.findPids()
|
||||
require.NoError(t, err)
|
||||
|
||||
for _, pidsTag := range pidsTags {
|
||||
require.Equal(t, []pid{11408}, pidsTag.PIDs)
|
||||
require.Equal(t, "TestGather_systemdUnitPIDs", pidsTag.Tags["systemd_unit"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestGather_cgroupPIDs(t *testing.T) {
|
||||
// no cgroups in windows
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("no cgroups in windows")
|
||||
}
|
||||
td := t.TempDir()
|
||||
err := os.WriteFile(filepath.Join(td, "cgroup.procs"), []byte("1234\n5678\n"), 0640)
|
||||
require.NoError(t, err)
|
||||
|
||||
p := Procstat{
|
||||
CGroup: td,
|
||||
PidFinder: "test",
|
||||
Properties: []string{"cpu", "memory", "mmap"},
|
||||
Log: testutil.Logger{},
|
||||
finder: newTestFinder([]pid{processID}),
|
||||
}
|
||||
require.NoError(t, p.Init())
|
||||
|
||||
pidsTags, err := p.findPids()
|
||||
require.NoError(t, err)
|
||||
for _, pidsTag := range pidsTags {
|
||||
require.Equal(t, []pid{1234, 5678}, pidsTag.PIDs)
|
||||
require.Equal(t, td, pidsTag.Tags["cgroup"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcstatLookupMetric(t *testing.T) {
|
||||
p := Procstat{
|
||||
Exe: "-Gsys",
|
||||
PidFinder: "test",
|
||||
Properties: []string{"cpu", "memory", "mmap"},
|
||||
Log: testutil.Logger{},
|
||||
finder: newTestFinder([]pid{543}),
|
||||
createProcess: newProc,
|
||||
}
|
||||
require.NoError(t, p.Init())
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, p.Gather(&acc))
|
||||
require.NotEmpty(t, acc.GetTelegrafMetrics())
|
||||
}
|
||||
|
||||
func TestGather_SameTimestamps(t *testing.T) {
|
||||
pidfile := "/path/to/pidfile"
|
||||
|
||||
p := Procstat{
|
||||
PidFile: pidfile,
|
||||
PidFinder: "test",
|
||||
Properties: []string{"cpu", "memory", "mmap"},
|
||||
Log: testutil.Logger{},
|
||||
finder: newTestFinder([]pid{processID}),
|
||||
createProcess: newTestProc,
|
||||
}
|
||||
require.NoError(t, p.Init())
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, p.Gather(&acc))
|
||||
|
||||
procstat, _ := acc.Get("procstat")
|
||||
procstatLookup, _ := acc.Get("procstat_lookup")
|
||||
|
||||
require.Equal(t, procstat.Time, procstatLookup.Time)
|
||||
}
|
||||
|
||||
func TestGather_supervisorUnitPIDs(t *testing.T) {
|
||||
p := Procstat{
|
||||
SupervisorUnits: []string{"TestGather_supervisorUnitPIDs"},
|
||||
PidFinder: "test",
|
||||
Properties: []string{"cpu", "memory", "mmap"},
|
||||
Log: testutil.Logger{},
|
||||
finder: newTestFinder([]pid{processID}),
|
||||
}
|
||||
require.NoError(t, p.Init())
|
||||
|
||||
pidsTags, err := p.findPids()
|
||||
require.NoError(t, err)
|
||||
for _, pidsTag := range pidsTags {
|
||||
require.Equal(t, []pid{7311, 8111, 8112}, pidsTag.PIDs)
|
||||
require.Equal(t, "TestGather_supervisorUnitPIDs", pidsTag.Tags["supervisor_unit"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestGather_MoresupervisorUnitPIDs(t *testing.T) {
|
||||
p := Procstat{
|
||||
SupervisorUnits: []string{"TestGather_STARTINGsupervisorUnitPIDs", "TestGather_FATALsupervisorUnitPIDs"},
|
||||
PidFinder: "test",
|
||||
Properties: []string{"cpu", "memory", "mmap"},
|
||||
Log: testutil.Logger{},
|
||||
finder: newTestFinder([]pid{processID}),
|
||||
}
|
||||
require.NoError(t, p.Init())
|
||||
|
||||
pidsTags, err := p.findPids()
|
||||
require.NoError(t, err)
|
||||
for _, pidsTag := range pidsTags {
|
||||
require.Empty(t, pidsTag.PIDs)
|
||||
switch pidsTag.Tags["supervisor_unit"] {
|
||||
case "TestGather_STARTINGsupervisorUnitPIDs":
|
||||
require.Equal(t, "STARTING", pidsTag.Tags["status"])
|
||||
case "TestGather_FATALsupervisorUnitPIDs":
|
||||
require.Equal(t, "FATAL", pidsTag.Tags["status"])
|
||||
require.Equal(t, "Exited too quickly (process log may have details)", pidsTag.Tags["error"])
|
||||
default:
|
||||
t.Fatalf("unexpected value for tag 'supervisor_unit': %q", pidsTag.Tags["supervisor_unit"])
|
||||
}
|
||||
}
|
||||
}
|
106
plugins/inputs/procstat/sample.conf
Normal file
106
plugins/inputs/procstat/sample.conf
Normal file
|
@ -0,0 +1,106 @@
|
|||
# Monitor process cpu and memory usage
|
||||
[[inputs.procstat]]
|
||||
## PID file to monitor process
|
||||
pid_file = "/var/run/nginx.pid"
|
||||
## executable name (ie, pgrep <exe>)
|
||||
# exe = "nginx"
|
||||
## pattern as argument for pgrep (ie, pgrep -f <pattern>)
|
||||
# pattern = "nginx"
|
||||
## user as argument for pgrep (ie, pgrep -u <user>)
|
||||
# user = "nginx"
|
||||
## Systemd unit name, supports globs when include_systemd_children is set to true
|
||||
# systemd_unit = "nginx.service"
|
||||
# include_systemd_children = false
|
||||
## CGroup name or path, supports globs
|
||||
# cgroup = "systemd/system.slice/nginx.service"
|
||||
## Supervisor service names of hypervisorctl management
|
||||
# supervisor_units = ["webserver", "proxy"]
|
||||
|
||||
## Windows service name
|
||||
# win_service = ""
|
||||
|
||||
## override for process_name
|
||||
## This is optional; default is sourced from /proc/<pid>/status
|
||||
# process_name = "bar"
|
||||
|
||||
## Field name prefix
|
||||
# prefix = ""
|
||||
|
||||
## Mode to use when calculating CPU usage. Can be one of 'solaris' or 'irix'.
|
||||
# mode = "irix"
|
||||
|
||||
## Add the given information tag instead of a field
|
||||
## This allows to create unique metrics/series when collecting processes with
|
||||
## otherwise identical tags. However, please be careful as this can easily
|
||||
## result in a large number of series, especially with short-lived processes,
|
||||
## creating high cardinality at the output.
|
||||
## Available options are:
|
||||
## cmdline -- full commandline
|
||||
## pid -- ID of the process
|
||||
## ppid -- ID of the process' parent
|
||||
## status -- state of the process
|
||||
## user -- username owning the process
|
||||
## socket only options:
|
||||
## protocol -- protocol type of the process socket
|
||||
## state -- state of the process socket
|
||||
## src -- source address of the process socket (non-unix sockets)
|
||||
## src_port -- source port of the process socket (non-unix sockets)
|
||||
## dest -- destination address of the process socket (non-unix sockets)
|
||||
## dest_port -- destination port of the process socket (non-unix sockets)
|
||||
## name -- name of the process socket (unix sockets only)
|
||||
## Available for procstat_lookup:
|
||||
## level -- level of the process filtering
|
||||
# tag_with = []
|
||||
|
||||
## Properties to collect
|
||||
## Available options are
|
||||
## cpu -- CPU usage statistics
|
||||
## limits -- set resource limits
|
||||
## memory -- memory usage statistics
|
||||
## mmap -- mapped memory usage statistics (caution: can cause high load)
|
||||
## sockets -- socket statistics for protocols in 'socket_protocols'
|
||||
# properties = ["cpu", "limits", "memory", "mmap"]
|
||||
|
||||
## Protocol filter for the sockets property
|
||||
## Available options are
|
||||
## all -- all of the protocols below
|
||||
## tcp4 -- TCP socket statistics for IPv4
|
||||
## tcp6 -- TCP socket statistics for IPv6
|
||||
## udp4 -- UDP socket statistics for IPv4
|
||||
## udp6 -- UDP socket statistics for IPv6
|
||||
## unix -- Unix socket statistics
|
||||
# socket_protocols = ["all"]
|
||||
|
||||
## Method to use when finding process IDs. Can be one of 'pgrep', or
|
||||
## 'native'. The pgrep finder calls the pgrep executable in the PATH while
|
||||
## the native finder performs the search directly in a manor dependent on the
|
||||
## platform. Default is 'pgrep'
|
||||
# pid_finder = "pgrep"
|
||||
|
||||
## New-style filtering configuration (multiple filter sections are allowed)
|
||||
# [[inputs.procstat.filter]]
|
||||
# ## Name of the filter added as 'filter' tag
|
||||
# name = "shell"
|
||||
#
|
||||
# ## Service filters, only one is allowed
|
||||
# ## Systemd unit names (wildcards are supported)
|
||||
# # systemd_units = []
|
||||
# ## CGroup name or path (wildcards are supported)
|
||||
# # cgroups = []
|
||||
# ## Supervisor service names of hypervisorctl management
|
||||
# # supervisor_units = []
|
||||
# ## Windows service names
|
||||
# # win_service = []
|
||||
#
|
||||
# ## Process filters, multiple are allowed
|
||||
# ## Regular expressions to use for matching against the full command
|
||||
# # patterns = ['.*']
|
||||
# ## List of users owning the process (wildcards are supported)
|
||||
# # users = ['*']
|
||||
# ## List of executable paths of the process (wildcards are supported)
|
||||
# # executables = ['*']
|
||||
# ## List of process names (wildcards are supported)
|
||||
# # process_names = ['*']
|
||||
# ## Recursion depth for determining children of the matched processes
|
||||
# ## A negative value means all children with infinite depth
|
||||
# # recursion_depth = 0
|
166
plugins/inputs/procstat/service_finders.go
Normal file
166
plugins/inputs/procstat/service_finders.go
Normal file
|
@ -0,0 +1,166 @@
|
|||
package procstat
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
gopsprocess "github.com/shirou/gopsutil/v4/process"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
)
|
||||
|
||||
type processFinder struct {
|
||||
errPidFiles map[string]bool
|
||||
log telegraf.Logger
|
||||
}
|
||||
|
||||
func newProcessFinder(log telegraf.Logger) *processFinder {
|
||||
return &processFinder{
|
||||
errPidFiles: make(map[string]bool),
|
||||
log: log,
|
||||
}
|
||||
}
|
||||
|
||||
func (f *processFinder) findByPidFiles(paths []string) ([]processGroup, error) {
|
||||
groups := make([]processGroup, 0, len(paths))
|
||||
for _, path := range paths {
|
||||
buf, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read pidfile %q: %w", path, err)
|
||||
}
|
||||
pid, err := strconv.ParseInt(strings.TrimSpace(string(buf)), 10, 32)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse PID in file %q: %w", path, err)
|
||||
}
|
||||
|
||||
p, err := gopsprocess.NewProcess(int32(pid))
|
||||
if err != nil && !f.errPidFiles[path] {
|
||||
f.log.Errorf("failed to find process for PID %d of file %q: %v", pid, path, err)
|
||||
f.errPidFiles[path] = true
|
||||
}
|
||||
groups = append(groups, processGroup{
|
||||
processes: []*gopsprocess.Process{p},
|
||||
tags: map[string]string{"pidfile": path},
|
||||
})
|
||||
}
|
||||
|
||||
return groups, nil
|
||||
}
|
||||
|
||||
func findByCgroups(cgroups []string) ([]processGroup, error) {
|
||||
groups := make([]processGroup, 0, len(cgroups))
|
||||
for _, cgroup := range cgroups {
|
||||
path := cgroup
|
||||
if !filepath.IsAbs(cgroup) {
|
||||
path = filepath.Join("sys", "fs", "cgroup"+cgroup)
|
||||
}
|
||||
|
||||
files, err := filepath.Glob(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to determine files for cgroup %q: %w", cgroup, err)
|
||||
}
|
||||
|
||||
for _, fpath := range files {
|
||||
if f, err := os.Stat(fpath); err != nil {
|
||||
return nil, fmt.Errorf("accessing %q failed: %w", fpath, err)
|
||||
} else if !f.IsDir() {
|
||||
return nil, fmt.Errorf("%q is not a directory", fpath)
|
||||
}
|
||||
|
||||
fn := filepath.Join(fpath, "cgroup.procs")
|
||||
buf, err := os.ReadFile(fn)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
lines := bytes.Split(buf, []byte{'\n'})
|
||||
procs := make([]*gopsprocess.Process, 0, len(lines))
|
||||
for _, l := range lines {
|
||||
l := strings.TrimSpace(string(l))
|
||||
if len(l) == 0 {
|
||||
continue
|
||||
}
|
||||
pid, err := strconv.ParseInt(l, 10, 32)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse PID %q in file %q", l, fpath)
|
||||
}
|
||||
p, err := gopsprocess.NewProcess(int32(pid))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to find process for PID %d of %q: %w", pid, fpath, err)
|
||||
}
|
||||
procs = append(procs, p)
|
||||
}
|
||||
|
||||
groups = append(groups, processGroup{
|
||||
processes: procs,
|
||||
tags: map[string]string{"cgroup": cgroup, "cgroup_full": fpath}})
|
||||
}
|
||||
}
|
||||
|
||||
return groups, nil
|
||||
}
|
||||
|
||||
func findBySupervisorUnits(units string) ([]processGroup, error) {
|
||||
buf, err := execCommand("supervisorctl", "status", units, " ").Output()
|
||||
if err != nil && !strings.Contains(err.Error(), "exit status 3") {
|
||||
// Exit 3 means at least on process is in one of the "STOPPED" states
|
||||
return nil, fmt.Errorf("failed to execute 'supervisorctl': %w", err)
|
||||
}
|
||||
lines := strings.Split(string(buf), "\n")
|
||||
|
||||
// Get the PID, running status, running time and boot time of the main process:
|
||||
// pid 11779, uptime 17:41:16
|
||||
// Exited too quickly (process log may have details)
|
||||
groups := make([]processGroup, 0, len(lines))
|
||||
for _, line := range lines {
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
kv := strings.Fields(line)
|
||||
if len(kv) < 2 {
|
||||
// Not a key-value pair
|
||||
continue
|
||||
}
|
||||
name, status := kv[0], kv[1]
|
||||
tags := map[string]string{
|
||||
"supervisor_unit": name,
|
||||
"status": status,
|
||||
}
|
||||
|
||||
var procs []*gopsprocess.Process
|
||||
switch status {
|
||||
case "FATAL", "EXITED", "BACKOFF", "STOPPING":
|
||||
tags["error"] = strings.Join(kv[2:], " ")
|
||||
case "RUNNING":
|
||||
tags["uptimes"] = kv[5]
|
||||
rawpid := strings.ReplaceAll(kv[3], ",", "")
|
||||
grouppid, err := strconv.ParseInt(rawpid, 10, 32)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse group PID %q: %w", rawpid, err)
|
||||
}
|
||||
p, err := gopsprocess.NewProcess(int32(grouppid))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to find process for PID %d of unit %q: %w", grouppid, name, err)
|
||||
}
|
||||
// Get all children of the supervisor unit
|
||||
procs, err = p.Children()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get children for PID %d of unit %q: %w", grouppid, name, err)
|
||||
}
|
||||
tags["parent_pid"] = rawpid
|
||||
case "STOPPED", "UNKNOWN", "STARTING":
|
||||
// No additional info
|
||||
}
|
||||
|
||||
groups = append(groups, processGroup{
|
||||
processes: procs,
|
||||
tags: tags,
|
||||
})
|
||||
}
|
||||
|
||||
return groups, nil
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue