Adding upstream version 1.34.4.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
e393c3af3f
commit
4978089aab
4963 changed files with 677545 additions and 0 deletions
98
plugins/inputs/processes/README.md
Normal file
98
plugins/inputs/processes/README.md
Normal file
|
@ -0,0 +1,98 @@
|
|||
# Processes Input Plugin
|
||||
|
||||
This plugin gathers info about the total number of processes and groups
|
||||
them by status (zombie, sleeping, running, etc.)
|
||||
|
||||
On linux this plugin requires access to procfs (/proc), on other OSes
|
||||
it requires access to execute `ps`.
|
||||
|
||||
**Supported Platforms**: Linux, FreeBSD, Darwin
|
||||
|
||||
## Global configuration options <!-- @/docs/includes/plugin_config.md -->
|
||||
|
||||
In addition to the plugin-specific configuration settings, plugins support
|
||||
additional global and plugin configuration settings. These settings are used to
|
||||
modify metrics, tags, and field or create aliases and configure ordering, etc.
|
||||
See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
||||
|
||||
[CONFIGURATION.md]: ../../../docs/CONFIGURATION.md#plugins
|
||||
|
||||
## Configuration
|
||||
|
||||
```toml @sample.conf
|
||||
# Get the number of processes and group them by status
|
||||
# This plugin ONLY supports non-Windows
|
||||
[[inputs.processes]]
|
||||
## Use sudo to run ps command on *BSD systems. Linux systems will read
|
||||
## /proc, so this does not apply there.
|
||||
# use_sudo = false
|
||||
```
|
||||
|
||||
Another possible configuration is to define an alternative path for resolving
|
||||
the /proc location. Using the environment variable `HOST_PROC` the plugin will
|
||||
retrieve process information from the specified location.
|
||||
|
||||
`docker run -v /proc:/rootfs/proc:ro -e HOST_PROC=/rootfs/proc`
|
||||
|
||||
### Using sudo
|
||||
|
||||
Linux systems will read from `/proc`, while BSD systems will use the `ps`
|
||||
command. The `ps` command generally does not require elevated permissions.
|
||||
However, if a user wants to collect system-wide stats, elevated permissions are
|
||||
required. If the user has configured sudo with the ability to run this
|
||||
command, then set the `use_sudo` to true.
|
||||
|
||||
If your account does not already have the ability to run commands with
|
||||
passwordless sudo then updates to the sudoers file are required. Below is an
|
||||
example to allow the requires ps commands:
|
||||
|
||||
First, use the `visudo` command to start editing the sudoers file. Then add
|
||||
the following content, where `<username>` is the username of the user that
|
||||
needs this access:
|
||||
|
||||
```text
|
||||
Cmnd_Alias PS = /bin/ps
|
||||
<username> ALL=(root) NOPASSWD: PS
|
||||
Defaults!PS !logfile, !syslog, !pam_session
|
||||
```
|
||||
|
||||
## Metrics
|
||||
|
||||
- processes
|
||||
- fields:
|
||||
- blocked (aka disk sleep or uninterruptible sleep)
|
||||
- running
|
||||
- sleeping
|
||||
- stopped
|
||||
- total
|
||||
- zombie
|
||||
- dead
|
||||
- wait (freebsd only)
|
||||
- idle (bsd and Linux 4+ only)
|
||||
- paging (linux only)
|
||||
- parked (linux only)
|
||||
- total_threads (linux only)
|
||||
|
||||
## Process State Mappings
|
||||
|
||||
Different OSes use slightly different State codes for their processes, these
|
||||
state codes are documented in `man ps`, and I will give a mapping of what major
|
||||
OS state codes correspond to in telegraf metrics:
|
||||
|
||||
```sh
|
||||
Linux FreeBSD Darwin meaning
|
||||
R R R running
|
||||
S S S sleeping
|
||||
Z Z Z zombie
|
||||
X none none dead
|
||||
T T T stopped
|
||||
I I I idle (sleeping for longer than about 20 seconds)
|
||||
D D,L U blocked (waiting in uninterruptible sleep, or locked)
|
||||
W W none paging (linux kernel < 2.6 only), wait (freebsd)
|
||||
```
|
||||
|
||||
## Example Output
|
||||
|
||||
```text
|
||||
processes blocked=8i,running=1i,sleeping=265i,stopped=0i,total=274i,zombie=0i,dead=0i,paging=0i,total_threads=687i 1457478636980905042
|
||||
```
|
11
plugins/inputs/processes/processes.go
Normal file
11
plugins/inputs/processes/processes.go
Normal file
|
@ -0,0 +1,11 @@
|
|||
//go:generate ../../../tools/readme_config_includer/generator
|
||||
package processes
|
||||
|
||||
import _ "embed"
|
||||
|
||||
//go:embed sample.conf
|
||||
var sampleConfig string
|
||||
|
||||
func (*Processes) SampleConfig() string {
|
||||
return sampleConfig
|
||||
}
|
239
plugins/inputs/processes/processes_notwindows.go
Normal file
239
plugins/inputs/processes/processes_notwindows.go
Normal file
|
@ -0,0 +1,239 @@
|
|||
//go:build !windows
|
||||
|
||||
package processes
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"syscall"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/internal"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
)
|
||||
|
||||
type Processes struct {
|
||||
UseSudo bool `toml:"use_sudo"`
|
||||
Log telegraf.Logger `toml:"-"`
|
||||
|
||||
execPS func(UseSudo bool) ([]byte, error)
|
||||
readProcFile func(filename string) ([]byte, error)
|
||||
forcePS bool
|
||||
forceProc bool
|
||||
}
|
||||
|
||||
func (p *Processes) Gather(acc telegraf.Accumulator) error {
|
||||
// Get an empty map of metric fields
|
||||
fields := getEmptyFields()
|
||||
|
||||
// Decide if we will use 'ps' to get stats (use procfs otherwise)
|
||||
usePS := true
|
||||
if runtime.GOOS == "linux" {
|
||||
usePS = false
|
||||
}
|
||||
if p.forcePS {
|
||||
usePS = true
|
||||
} else if p.forceProc {
|
||||
usePS = false
|
||||
}
|
||||
|
||||
// Gather stats from 'ps' or procfs
|
||||
if usePS {
|
||||
if err := p.gatherFromPS(fields); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := p.gatherFromProc(fields); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
acc.AddGauge("processes", fields, nil)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Gets empty fields of metrics based on the OS
|
||||
func getEmptyFields() map[string]interface{} {
|
||||
fields := map[string]interface{}{
|
||||
"blocked": int64(0),
|
||||
"zombies": int64(0),
|
||||
"stopped": int64(0),
|
||||
"running": int64(0),
|
||||
"sleeping": int64(0),
|
||||
"total": int64(0),
|
||||
"unknown": int64(0),
|
||||
}
|
||||
switch runtime.GOOS {
|
||||
case "freebsd":
|
||||
fields["idle"] = int64(0)
|
||||
fields["wait"] = int64(0)
|
||||
case "darwin":
|
||||
fields["idle"] = int64(0)
|
||||
case "openbsd":
|
||||
fields["idle"] = int64(0)
|
||||
case "linux":
|
||||
fields["dead"] = int64(0)
|
||||
fields["paging"] = int64(0)
|
||||
fields["total_threads"] = int64(0)
|
||||
fields["idle"] = int64(0)
|
||||
}
|
||||
return fields
|
||||
}
|
||||
|
||||
// exec `ps` to get all process states
|
||||
func (p *Processes) gatherFromPS(fields map[string]interface{}) error {
|
||||
out, err := p.execPS(p.UseSudo)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for i, status := range bytes.Fields(out) {
|
||||
if i == 0 && string(status) == "STAT" {
|
||||
// This is a header, skip it
|
||||
continue
|
||||
}
|
||||
switch status[0] {
|
||||
case 'W':
|
||||
fields["wait"] = fields["wait"].(int64) + int64(1)
|
||||
case 'U', 'D', 'L':
|
||||
// Also known as uninterruptible sleep or disk sleep
|
||||
fields["blocked"] = fields["blocked"].(int64) + int64(1)
|
||||
case 'Z':
|
||||
fields["zombies"] = fields["zombies"].(int64) + int64(1)
|
||||
case 'X':
|
||||
fields["dead"] = fields["dead"].(int64) + int64(1)
|
||||
case 'T':
|
||||
fields["stopped"] = fields["stopped"].(int64) + int64(1)
|
||||
case 'R':
|
||||
fields["running"] = fields["running"].(int64) + int64(1)
|
||||
case 'S':
|
||||
fields["sleeping"] = fields["sleeping"].(int64) + int64(1)
|
||||
case 'I':
|
||||
fields["idle"] = fields["idle"].(int64) + int64(1)
|
||||
case '?':
|
||||
fields["unknown"] = fields["unknown"].(int64) + int64(1)
|
||||
default:
|
||||
p.Log.Infof("Unknown state %q from ps", string(status[0]))
|
||||
}
|
||||
fields["total"] = fields["total"].(int64) + int64(1)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// get process states from /proc/(pid)/stat files
|
||||
func (p *Processes) gatherFromProc(fields map[string]interface{}) error {
|
||||
filenames, err := filepath.Glob(internal.GetProcPath() + "/[0-9]*/stat")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, filename := range filenames {
|
||||
data, err := p.readProcFile(filename)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if data == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Parse out data after (<cmd name>)
|
||||
i := bytes.LastIndex(data, []byte(")"))
|
||||
if i == -1 {
|
||||
continue
|
||||
}
|
||||
data = data[i+2:]
|
||||
|
||||
stats := bytes.Fields(data)
|
||||
if len(stats) < 3 {
|
||||
return fmt.Errorf("something is terribly wrong with %s", filename)
|
||||
}
|
||||
switch stats[0][0] {
|
||||
case 'R':
|
||||
fields["running"] = fields["running"].(int64) + int64(1)
|
||||
case 'S':
|
||||
fields["sleeping"] = fields["sleeping"].(int64) + int64(1)
|
||||
case 'D':
|
||||
fields["blocked"] = fields["blocked"].(int64) + int64(1)
|
||||
case 'Z':
|
||||
fields["zombies"] = fields["zombies"].(int64) + int64(1)
|
||||
case 'X':
|
||||
fields["dead"] = fields["dead"].(int64) + int64(1)
|
||||
case 'T', 't':
|
||||
fields["stopped"] = fields["stopped"].(int64) + int64(1)
|
||||
case 'W':
|
||||
fields["paging"] = fields["paging"].(int64) + int64(1)
|
||||
case 'I':
|
||||
fields["idle"] = fields["idle"].(int64) + int64(1)
|
||||
case 'P':
|
||||
if _, ok := fields["parked"]; ok {
|
||||
fields["parked"] = fields["parked"].(int64) + int64(1)
|
||||
}
|
||||
fields["parked"] = int64(1)
|
||||
default:
|
||||
p.Log.Infof("Unknown state %q in file %q", string(stats[0][0]), filename)
|
||||
}
|
||||
fields["total"] = fields["total"].(int64) + int64(1)
|
||||
|
||||
threads, err := strconv.Atoi(string(stats[17]))
|
||||
if err != nil {
|
||||
p.Log.Infof("Error parsing thread count: %s", err.Error())
|
||||
continue
|
||||
}
|
||||
fields["total_threads"] = fields["total_threads"].(int64) + int64(threads)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func readProcFile(filename string) ([]byte, error) {
|
||||
data, err := os.ReadFile(filename)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Reading from /proc/<PID> fails with ESRCH if the process has
|
||||
// been terminated between open() and read().
|
||||
var perr *os.PathError
|
||||
if errors.As(err, &perr) && errors.Is(perr.Err, syscall.ESRCH) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func execPS(useSudo bool) ([]byte, error) {
|
||||
bin, err := exec.LookPath("ps")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cmd := []string{bin, "axo", "state"}
|
||||
if useSudo {
|
||||
cmd = append([]string{"sudo", "-n"}, cmd...)
|
||||
}
|
||||
|
||||
out, err := exec.Command(cmd[0], cmd[1:]...).Output()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return out, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
inputs.Add("processes", func() telegraf.Input {
|
||||
return &Processes{
|
||||
execPS: execPS,
|
||||
readProcFile: readProcFile,
|
||||
}
|
||||
})
|
||||
}
|
198
plugins/inputs/processes/processes_test.go
Normal file
198
plugins/inputs/processes/processes_test.go
Normal file
|
@ -0,0 +1,198 @@
|
|||
//go:build !windows
|
||||
|
||||
package processes
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"runtime"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
)
|
||||
|
||||
func TestProcesses(t *testing.T) {
|
||||
tester := tester{}
|
||||
processes := &Processes{
|
||||
Log: testutil.Logger{},
|
||||
execPS: testExecPS(
|
||||
"STAT\n Ss \n S \n Z \n R \n S< \n SNs \n Ss+ \n \n \n",
|
||||
),
|
||||
readProcFile: tester.testProcFile,
|
||||
}
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, processes.Gather(&acc))
|
||||
|
||||
require.True(t, acc.HasInt64Field("processes", "running"))
|
||||
require.True(t, acc.HasInt64Field("processes", "sleeping"))
|
||||
require.True(t, acc.HasInt64Field("processes", "stopped"))
|
||||
require.True(t, acc.HasInt64Field("processes", "total"))
|
||||
total, ok := acc.Get("processes")
|
||||
require.True(t, ok)
|
||||
require.Positive(t, total.Fields["total"])
|
||||
}
|
||||
|
||||
func TestFromPS(t *testing.T) {
|
||||
processes := &Processes{
|
||||
Log: testutil.Logger{},
|
||||
execPS: testExecPS("\nSTAT\nD\nI\nL\nR\nR+\nS\nS+\nSNs\nSs\nU\nZ\n"),
|
||||
forcePS: true,
|
||||
}
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, processes.Gather(&acc))
|
||||
|
||||
fields := getEmptyFields()
|
||||
fields["blocked"] = int64(3)
|
||||
fields["zombies"] = int64(1)
|
||||
fields["running"] = int64(2)
|
||||
fields["sleeping"] = int64(4)
|
||||
fields["idle"] = int64(1)
|
||||
fields["total"] = int64(11)
|
||||
|
||||
acc.AssertContainsTaggedFields(t, "processes", fields, map[string]string{})
|
||||
}
|
||||
|
||||
func TestFromPSError(t *testing.T) {
|
||||
processes := &Processes{
|
||||
Log: testutil.Logger{},
|
||||
execPS: testExecPSError,
|
||||
forcePS: true,
|
||||
}
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.Error(t, processes.Gather(&acc))
|
||||
}
|
||||
|
||||
func TestFromProcFiles(t *testing.T) {
|
||||
if runtime.GOOS != "linux" {
|
||||
t.Skip("This test only runs on linux")
|
||||
}
|
||||
tester := tester{}
|
||||
processes := &Processes{
|
||||
Log: testutil.Logger{},
|
||||
readProcFile: tester.testProcFile,
|
||||
forceProc: true,
|
||||
}
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, processes.Gather(&acc))
|
||||
|
||||
fields := getEmptyFields()
|
||||
fields["sleeping"] = tester.calls
|
||||
fields["total_threads"] = tester.calls * 2
|
||||
fields["total"] = tester.calls
|
||||
|
||||
acc.AssertContainsTaggedFields(t, "processes", fields, map[string]string{})
|
||||
}
|
||||
|
||||
func TestFromProcFilesWithSpaceInCmd(t *testing.T) {
|
||||
if runtime.GOOS != "linux" {
|
||||
t.Skip("This test only runs on linux")
|
||||
}
|
||||
tester := tester{}
|
||||
processes := &Processes{
|
||||
Log: testutil.Logger{},
|
||||
readProcFile: tester.testProcFile2,
|
||||
forceProc: true,
|
||||
}
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, processes.Gather(&acc))
|
||||
|
||||
fields := getEmptyFields()
|
||||
fields["sleeping"] = tester.calls
|
||||
fields["total_threads"] = tester.calls * 2
|
||||
fields["total"] = tester.calls
|
||||
|
||||
acc.AssertContainsTaggedFields(t, "processes", fields, map[string]string{})
|
||||
}
|
||||
|
||||
// Based on `man 5 proc`, parked processes an be found in a
|
||||
// limited range of Linux versions:
|
||||
//
|
||||
// > P Parked (Linux 3.9 to 3.13 only)
|
||||
//
|
||||
// However, we have had reports of this process state on Ubuntu
|
||||
// Bionic w/ Linux 4.15 (#6270)
|
||||
func TestParkedProcess(t *testing.T) {
|
||||
if runtime.GOOS != "linux" {
|
||||
t.Skip("Parked process test only relevant on linux")
|
||||
}
|
||||
procstat := `88 (watchdog/13) P 2 0 0 0 -1 69238848 0 0 0 0 0 0 0 0 20 0 1 0 20 0 0 18446744073709551615 0 0 0 0 0 0 0 ` +
|
||||
`2147483647 0 1 0 0 17 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
`
|
||||
plugin := &Processes{
|
||||
Log: testutil.Logger{},
|
||||
readProcFile: func(string) ([]byte, error) {
|
||||
return []byte(procstat), nil
|
||||
},
|
||||
forceProc: true,
|
||||
}
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, plugin.Gather(&acc))
|
||||
|
||||
expected := []telegraf.Metric{
|
||||
testutil.MustMetric(
|
||||
"processes",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"blocked": 0,
|
||||
"dead": 0,
|
||||
"idle": 0,
|
||||
"paging": 0,
|
||||
"parked": 1,
|
||||
"running": 0,
|
||||
"sleeping": 0,
|
||||
"stopped": 0,
|
||||
"unknown": 0,
|
||||
"zombies": 0,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
telegraf.Gauge,
|
||||
),
|
||||
}
|
||||
|
||||
options := []cmp.Option{
|
||||
testutil.IgnoreTime(),
|
||||
testutil.IgnoreFields("total", "total_threads"),
|
||||
}
|
||||
testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics(), options...)
|
||||
}
|
||||
|
||||
func testExecPS(out string) func(_ bool) ([]byte, error) {
|
||||
return func(_ bool) ([]byte, error) { return []byte(out), nil }
|
||||
}
|
||||
|
||||
// struct for counting calls to testProcFile
|
||||
type tester struct {
|
||||
calls int64
|
||||
}
|
||||
|
||||
func (t *tester) testProcFile(_ string) ([]byte, error) {
|
||||
t.calls++
|
||||
return []byte(fmt.Sprintf(testProcStat, "S", "2")), nil
|
||||
}
|
||||
|
||||
func (t *tester) testProcFile2(_ string) ([]byte, error) {
|
||||
t.calls++
|
||||
return []byte(fmt.Sprintf(testProcStat2, "S", "2")), nil
|
||||
}
|
||||
|
||||
func testExecPSError(_ bool) ([]byte, error) {
|
||||
return []byte("\nSTAT\nD\nI\nL\nR\nR+\nS\nS+\nSNs\nSs\nU\nZ\n"), errors.New("error")
|
||||
}
|
||||
|
||||
const testProcStat = `10 (rcuob/0) %s 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 20 0 %s 0 11 0 0 18446744073709551615 0 0 0 0 0 0 0 ` +
|
||||
`2147483647 0 18446744073709551615 0 0 17 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
`
|
||||
|
||||
const testProcStat2 = `10 (rcuob 0) %s 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 20 0 %s 0 11 0 0 18446744073709551615 0 0 0 0 0 0 0 ` +
|
||||
`2147483647 0 18446744073709551615 0 0 17 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
`
|
27
plugins/inputs/processes/processes_windows.go
Normal file
27
plugins/inputs/processes/processes_windows.go
Normal file
|
@ -0,0 +1,27 @@
|
|||
//go:build windows
|
||||
|
||||
package processes
|
||||
|
||||
import (
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
)
|
||||
|
||||
type Processes struct {
|
||||
Log telegraf.Logger
|
||||
}
|
||||
|
||||
func (e *Processes) Init() error {
|
||||
e.Log.Warn("Current platform is not supported")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (*Processes) Gather(telegraf.Accumulator) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
inputs.Add("processes", func() telegraf.Input {
|
||||
return &Processes{}
|
||||
})
|
||||
}
|
6
plugins/inputs/processes/sample.conf
Normal file
6
plugins/inputs/processes/sample.conf
Normal file
|
@ -0,0 +1,6 @@
|
|||
# Get the number of processes and group them by status
|
||||
# This plugin ONLY supports non-Windows
|
||||
[[inputs.processes]]
|
||||
## Use sudo to run ps command on *BSD systems. Linux systems will read
|
||||
## /proc, so this does not apply there.
|
||||
# use_sudo = false
|
Loading…
Add table
Add a link
Reference in a new issue