Adding upstream version 1.34.4.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
e393c3af3f
commit
4978089aab
4963 changed files with 677545 additions and 0 deletions
367
plugins/inputs/mesos/README.md
Normal file
367
plugins/inputs/mesos/README.md
Normal file
|
@ -0,0 +1,367 @@
|
|||
# Apache Mesos Input Plugin
|
||||
|
||||
This plugin gathers metrics from [Apache Mesos][mesos] instances. For more
|
||||
information, please check the [Mesos Observability Metrics][monitoring] page.
|
||||
|
||||
⭐ Telegraf v0.10.3
|
||||
🏷️ containers
|
||||
💻 all
|
||||
|
||||
[mesos]:https://mesos.apache.org/
|
||||
[monitoring]: http://mesos.apache.org/documentation/latest/monitoring/
|
||||
|
||||
## Global configuration options <!-- @/docs/includes/plugin_config.md -->
|
||||
|
||||
In addition to the plugin-specific configuration settings, plugins support
|
||||
additional global and plugin configuration settings. These settings are used to
|
||||
modify metrics, tags, and field or create aliases and configure ordering, etc.
|
||||
See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
||||
|
||||
[CONFIGURATION.md]: ../../../docs/CONFIGURATION.md#plugins
|
||||
|
||||
## Configuration
|
||||
|
||||
```toml @sample.conf
|
||||
# Telegraf plugin for gathering metrics from N Mesos masters
|
||||
[[inputs.mesos]]
|
||||
## Timeout, in ms.
|
||||
timeout = 100
|
||||
|
||||
## A list of Mesos masters.
|
||||
masters = ["http://localhost:5050"]
|
||||
|
||||
## Master metrics groups to be collected, by default, all enabled.
|
||||
master_collections = [
|
||||
"resources",
|
||||
"master",
|
||||
"system",
|
||||
"agents",
|
||||
"frameworks",
|
||||
"framework_offers",
|
||||
"tasks",
|
||||
"messages",
|
||||
"evqueue",
|
||||
"registrar",
|
||||
"allocator",
|
||||
]
|
||||
|
||||
## A list of Mesos slaves, default is []
|
||||
# slaves = []
|
||||
|
||||
## Slave metrics groups to be collected, by default, all enabled.
|
||||
# slave_collections = [
|
||||
# "resources",
|
||||
# "agent",
|
||||
# "system",
|
||||
# "executors",
|
||||
# "tasks",
|
||||
# "messages",
|
||||
# ]
|
||||
|
||||
## Optional TLS Config
|
||||
# tls_ca = "/etc/telegraf/ca.pem"
|
||||
# tls_cert = "/etc/telegraf/cert.pem"
|
||||
# tls_key = "/etc/telegraf/key.pem"
|
||||
## Use TLS but skip chain & host verification
|
||||
# insecure_skip_verify = false
|
||||
```
|
||||
|
||||
By default this plugin is not configured to gather metrics from mesos. Since a
|
||||
mesos cluster can be deployed in numerous ways it does not provide any default
|
||||
values. User needs to specify master/slave nodes this plugin will gather metrics
|
||||
from.
|
||||
|
||||
## Metrics
|
||||
|
||||
Mesos master metric groups
|
||||
|
||||
- resources
|
||||
- master/cpus_percent
|
||||
- master/cpus_used
|
||||
- master/cpus_total
|
||||
- master/cpus_revocable_percent
|
||||
- master/cpus_revocable_total
|
||||
- master/cpus_revocable_used
|
||||
- master/disk_percent
|
||||
- master/disk_used
|
||||
- master/disk_total
|
||||
- master/disk_revocable_percent
|
||||
- master/disk_revocable_total
|
||||
- master/disk_revocable_used
|
||||
- master/gpus_percent
|
||||
- master/gpus_used
|
||||
- master/gpus_total
|
||||
- master/gpus_revocable_percent
|
||||
- master/gpus_revocable_total
|
||||
- master/gpus_revocable_used
|
||||
- master/mem_percent
|
||||
- master/mem_used
|
||||
- master/mem_total
|
||||
- master/mem_revocable_percent
|
||||
- master/mem_revocable_total
|
||||
- master/mem_revocable_used
|
||||
|
||||
- master
|
||||
- master/elected
|
||||
- master/uptime_secs
|
||||
|
||||
- system
|
||||
- system/cpus_total
|
||||
- system/load_15min
|
||||
- system/load_5min
|
||||
- system/load_1min
|
||||
- system/mem_free_bytes
|
||||
- system/mem_total_bytes
|
||||
|
||||
- slaves
|
||||
- master/slave_registrations
|
||||
- master/slave_removals
|
||||
- master/slave_reregistrations
|
||||
- master/slave_shutdowns_scheduled
|
||||
- master/slave_shutdowns_canceled
|
||||
- master/slave_shutdowns_completed
|
||||
- master/slaves_active
|
||||
- master/slaves_connected
|
||||
- master/slaves_disconnected
|
||||
- master/slaves_inactive
|
||||
- master/slave_unreachable_canceled
|
||||
- master/slave_unreachable_completed
|
||||
- master/slave_unreachable_scheduled
|
||||
- master/slaves_unreachable
|
||||
|
||||
- frameworks
|
||||
- master/frameworks_active
|
||||
- master/frameworks_connected
|
||||
- master/frameworks_disconnected
|
||||
- master/frameworks_inactive
|
||||
- master/outstanding_offers
|
||||
|
||||
- framework offers
|
||||
- master/frameworks/subscribed
|
||||
- master/frameworks/calls_total
|
||||
- master/frameworks/calls
|
||||
- master/frameworks/events_total
|
||||
- master/frameworks/events
|
||||
- master/frameworks/operations_total
|
||||
- master/frameworks/operations
|
||||
- master/frameworks/tasks/active
|
||||
- master/frameworks/tasks/terminal
|
||||
- master/frameworks/offers/sent
|
||||
- master/frameworks/offers/accepted
|
||||
- master/frameworks/offers/declined
|
||||
- master/frameworks/offers/rescinded
|
||||
- master/frameworks/roles/suppressed
|
||||
|
||||
- tasks
|
||||
- master/tasks_error
|
||||
- master/tasks_failed
|
||||
- master/tasks_finished
|
||||
- master/tasks_killed
|
||||
- master/tasks_lost
|
||||
- master/tasks_running
|
||||
- master/tasks_staging
|
||||
- master/tasks_starting
|
||||
- master/tasks_dropped
|
||||
- master/tasks_gone
|
||||
- master/tasks_gone_by_operator
|
||||
- master/tasks_killing
|
||||
- master/tasks_unreachable
|
||||
|
||||
- messages
|
||||
- master/invalid_executor_to_framework_messages
|
||||
- master/invalid_framework_to_executor_messages
|
||||
- master/invalid_status_update_acknowledgements
|
||||
- master/invalid_status_updates
|
||||
- master/dropped_messages
|
||||
- master/messages_authenticate
|
||||
- master/messages_deactivate_framework
|
||||
- master/messages_decline_offers
|
||||
- master/messages_executor_to_framework
|
||||
- master/messages_exited_executor
|
||||
- master/messages_framework_to_executor
|
||||
- master/messages_kill_task
|
||||
- master/messages_launch_tasks
|
||||
- master/messages_reconcile_tasks
|
||||
- master/messages_register_framework
|
||||
- master/messages_register_slave
|
||||
- master/messages_reregister_framework
|
||||
- master/messages_reregister_slave
|
||||
- master/messages_resource_request
|
||||
- master/messages_revive_offers
|
||||
- master/messages_status_update
|
||||
- master/messages_status_update_acknowledgement
|
||||
- master/messages_unregister_framework
|
||||
- master/messages_unregister_slave
|
||||
- master/messages_update_slave
|
||||
- master/recovery_slave_removals
|
||||
- master/slave_removals/reason_registered
|
||||
- master/slave_removals/reason_unhealthy
|
||||
- master/slave_removals/reason_unregistered
|
||||
- master/valid_framework_to_executor_messages
|
||||
- master/valid_status_update_acknowledgements
|
||||
- master/valid_status_updates
|
||||
- master/task_lost/source_master/reason_invalid_offers
|
||||
- master/task_lost/source_master/reason_slave_removed
|
||||
- master/task_lost/source_slave/reason_executor_terminated
|
||||
- master/valid_executor_to_framework_messages
|
||||
- master/invalid_operation_status_update_acknowledgements
|
||||
- master/messages_operation_status_update_acknowledgement
|
||||
- master/messages_reconcile_operations
|
||||
- master/messages_suppress_offers
|
||||
- master/valid_operation_status_update_acknowledgements
|
||||
|
||||
- evqueue
|
||||
- master/event_queue_dispatches
|
||||
- master/event_queue_http_requests
|
||||
- master/event_queue_messages
|
||||
- master/operator_event_stream_subscribers
|
||||
|
||||
- registrar
|
||||
- registrar/state_fetch_ms
|
||||
- registrar/state_store_ms
|
||||
- registrar/state_store_ms/max
|
||||
- registrar/state_store_ms/min
|
||||
- registrar/state_store_ms/p50
|
||||
- registrar/state_store_ms/p90
|
||||
- registrar/state_store_ms/p95
|
||||
- registrar/state_store_ms/p99
|
||||
- registrar/state_store_ms/p999
|
||||
- registrar/state_store_ms/p9999
|
||||
- registrar/state_store_ms/count
|
||||
- registrar/log/ensemble_size
|
||||
- registrar/log/recovered
|
||||
- registrar/queued_operations
|
||||
- registrar/registry_size_bytes
|
||||
|
||||
- allocator
|
||||
- allocator/allocation_run_ms
|
||||
- allocator/allocation_run_ms/count
|
||||
- allocator/allocation_run_ms/max
|
||||
- allocator/allocation_run_ms/min
|
||||
- allocator/allocation_run_ms/p50
|
||||
- allocator/allocation_run_ms/p90
|
||||
- allocator/allocation_run_ms/p95
|
||||
- allocator/allocation_run_ms/p99
|
||||
- allocator/allocation_run_ms/p999
|
||||
- allocator/allocation_run_ms/p9999
|
||||
- allocator/allocation_runs
|
||||
- allocator/allocation_run_latency_ms
|
||||
- allocator/allocation_run_latency_ms/count
|
||||
- allocator/allocation_run_latency_ms/max
|
||||
- allocator/allocation_run_latency_ms/min
|
||||
- allocator/allocation_run_latency_ms/p50
|
||||
- allocator/allocation_run_latency_ms/p90
|
||||
- allocator/allocation_run_latency_ms/p95
|
||||
- allocator/allocation_run_latency_ms/p99
|
||||
- allocator/allocation_run_latency_ms/p999
|
||||
- allocator/allocation_run_latency_ms/p9999
|
||||
- allocator/roles/shares/dominant
|
||||
- allocator/event_queue_dispatches
|
||||
- allocator/offer_filters/roles/active
|
||||
- allocator/quota/roles/resources/offered_or_allocated
|
||||
- allocator/quota/roles/resources/guarantee
|
||||
- allocator/resources/cpus/offered_or_allocated
|
||||
- allocator/resources/cpus/total
|
||||
- allocator/resources/disk/offered_or_allocated
|
||||
- allocator/resources/disk/total
|
||||
- allocator/resources/mem/offered_or_allocated
|
||||
- allocator/resources/mem/total
|
||||
|
||||
Mesos slave metric groups
|
||||
|
||||
- resources
|
||||
- slave/cpus_percent
|
||||
- slave/cpus_used
|
||||
- slave/cpus_total
|
||||
- slave/cpus_revocable_percent
|
||||
- slave/cpus_revocable_total
|
||||
- slave/cpus_revocable_used
|
||||
- slave/disk_percent
|
||||
- slave/disk_used
|
||||
- slave/disk_total
|
||||
- slave/disk_revocable_percent
|
||||
- slave/disk_revocable_total
|
||||
- slave/disk_revocable_used
|
||||
- slave/gpus_percent
|
||||
- slave/gpus_used
|
||||
- slave/gpus_total,
|
||||
- slave/gpus_revocable_percent
|
||||
- slave/gpus_revocable_total
|
||||
- slave/gpus_revocable_used
|
||||
- slave/mem_percent
|
||||
- slave/mem_used
|
||||
- slave/mem_total
|
||||
- slave/mem_revocable_percent
|
||||
- slave/mem_revocable_total
|
||||
- slave/mem_revocable_used
|
||||
|
||||
- agent
|
||||
- slave/registered
|
||||
- slave/uptime_secs
|
||||
|
||||
- system
|
||||
- system/cpus_total
|
||||
- system/load_15min
|
||||
- system/load_5min
|
||||
- system/load_1min
|
||||
- system/mem_free_bytes
|
||||
- system/mem_total_bytes
|
||||
|
||||
- executors
|
||||
- containerizer/mesos/container_destroy_errors
|
||||
- slave/container_launch_errors
|
||||
- slave/executors_preempted
|
||||
- slave/frameworks_active
|
||||
- slave/executor_directory_max_allowed_age_secs
|
||||
- slave/executors_registering
|
||||
- slave/executors_running
|
||||
- slave/executors_terminated
|
||||
- slave/executors_terminating
|
||||
- slave/recovery_errors
|
||||
|
||||
- tasks
|
||||
- slave/tasks_failed
|
||||
- slave/tasks_finished
|
||||
- slave/tasks_killed
|
||||
- slave/tasks_lost
|
||||
- slave/tasks_running
|
||||
- slave/tasks_staging
|
||||
- slave/tasks_starting
|
||||
|
||||
- messages
|
||||
- slave/invalid_framework_messages
|
||||
- slave/invalid_status_updates
|
||||
- slave/valid_framework_messages
|
||||
- slave/valid_status_updates
|
||||
|
||||
## Tags
|
||||
|
||||
- All master/slave measurements have the following tags:
|
||||
- server (network location of server: `host:port`)
|
||||
- url (URL origin of server: `scheme://host:port`)
|
||||
- role (master/slave)
|
||||
|
||||
- All master measurements have the extra tags:
|
||||
- state (leader/follower)
|
||||
|
||||
## Example Output
|
||||
|
||||
```text
|
||||
mesos,role=master,state=leader,host=172.17.8.102,server=172.17.8.101
|
||||
allocator/event_queue_dispatches=0,master/cpus_percent=0,
|
||||
master/cpus_revocable_percent=0,master/cpus_revocable_total=0,
|
||||
master/cpus_revocable_used=0,master/cpus_total=2,
|
||||
master/cpus_used=0,master/disk_percent=0,master/disk_revocable_percent=0,
|
||||
master/disk_revocable_total=0,master/disk_revocable_used=0,master/disk_total=10823,
|
||||
master/disk_used=0,master/dropped_messages=2,master/elected=1,
|
||||
master/event_queue_dispatches=10,master/event_queue_http_requests=0,
|
||||
master/event_queue_messages=0,master/frameworks_active=2,master/frameworks_connected=2,
|
||||
master/frameworks_disconnected=0,master/frameworks_inactive=0,
|
||||
master/invalid_executor_to_framework_messages=0,
|
||||
master/invalid_framework_to_executor_messages=0,
|
||||
master/invalid_status_update_acknowledgements=0,master/invalid_status_updates=0,master/mem_percent=0,
|
||||
master/mem_revocable_percent=0,master/mem_revocable_total=0,
|
||||
master/mem_revocable_used=0,master/mem_total=1002,
|
||||
master/mem_used=0,master/messages_authenticate=0,
|
||||
master/messages_deactivate_framework=0 ...
|
||||
```
|
544
plugins/inputs/mesos/mesos.go
Normal file
544
plugins/inputs/mesos/mesos.go
Normal file
|
@ -0,0 +1,544 @@
|
|||
//go:generate ../../../tools/readme_config_includer/generator
|
||||
package mesos
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/common/tls"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
parsers_json "github.com/influxdata/telegraf/plugins/parsers/json"
|
||||
)
|
||||
|
||||
//go:embed sample.conf
|
||||
var sampleConfig string
|
||||
|
||||
type role string
|
||||
|
||||
const (
|
||||
master role = "master"
|
||||
slave role = "slave"
|
||||
)
|
||||
|
||||
var allMetrics = map[role][]string{
|
||||
master: {"resources", "master", "system", "agents", "frameworks", "framework_offers", "tasks", "messages", "evqueue", "registrar", "allocator"},
|
||||
slave: {"resources", "agent", "system", "executors", "tasks", "messages"},
|
||||
}
|
||||
|
||||
type Mesos struct {
|
||||
Timeout int `toml:"timeout"`
|
||||
Masters []string `toml:"masters"`
|
||||
MasterCols []string `toml:"master_collections"`
|
||||
Slaves []string `toml:"slaves"`
|
||||
SlaveCols []string `toml:"slave_collections"`
|
||||
tls.ClientConfig
|
||||
|
||||
Log telegraf.Logger `toml:"-"`
|
||||
|
||||
initialized bool
|
||||
client *http.Client
|
||||
masterURLs []*url.URL
|
||||
slaveURLs []*url.URL
|
||||
}
|
||||
|
||||
func (*Mesos) SampleConfig() string {
|
||||
return sampleConfig
|
||||
}
|
||||
|
||||
func (m *Mesos) Gather(acc telegraf.Accumulator) error {
|
||||
if !m.initialized {
|
||||
err := m.initialize()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.initialized = true
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
|
||||
for _, mstr := range m.masterURLs {
|
||||
wg.Add(1)
|
||||
go func(mstr *url.URL) {
|
||||
acc.AddError(m.gatherMainMetrics(mstr, master, acc))
|
||||
wg.Done()
|
||||
}(mstr)
|
||||
}
|
||||
|
||||
for _, slv := range m.slaveURLs {
|
||||
wg.Add(1)
|
||||
go func(slv *url.URL) {
|
||||
acc.AddError(m.gatherMainMetrics(slv, slave, acc))
|
||||
wg.Done()
|
||||
}(slv)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Mesos) parseURL(s string, role role) (*url.URL, error) {
|
||||
if !strings.HasPrefix(s, "http://") && !strings.HasPrefix(s, "https://") {
|
||||
host, port, err := net.SplitHostPort(s)
|
||||
// no port specified
|
||||
if err != nil {
|
||||
host = s
|
||||
switch role {
|
||||
case master:
|
||||
port = "5050"
|
||||
case slave:
|
||||
port = "5051"
|
||||
}
|
||||
}
|
||||
|
||||
s = "http://" + host + ":" + port
|
||||
m.Log.Warnf("using %q as connection URL; please update your configuration to use an URL", s)
|
||||
}
|
||||
|
||||
return url.Parse(s)
|
||||
}
|
||||
|
||||
func (m *Mesos) initialize() error {
|
||||
if len(m.MasterCols) == 0 {
|
||||
m.MasterCols = allMetrics[master]
|
||||
}
|
||||
|
||||
if len(m.SlaveCols) == 0 {
|
||||
m.SlaveCols = allMetrics[slave]
|
||||
}
|
||||
|
||||
if m.Timeout == 0 {
|
||||
m.Log.Info("Missing timeout value, setting default value (100ms)")
|
||||
m.Timeout = 100
|
||||
}
|
||||
|
||||
rawQuery := "timeout=" + strconv.Itoa(m.Timeout) + "ms"
|
||||
|
||||
m.masterURLs = make([]*url.URL, 0, len(m.Masters))
|
||||
for _, mstr := range m.Masters {
|
||||
u, err := m.parseURL(mstr, master)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
u.RawQuery = rawQuery
|
||||
m.masterURLs = append(m.masterURLs, u)
|
||||
}
|
||||
|
||||
m.slaveURLs = make([]*url.URL, 0, len(m.Slaves))
|
||||
for _, slv := range m.Slaves {
|
||||
u, err := m.parseURL(slv, slave)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
u.RawQuery = rawQuery
|
||||
m.slaveURLs = append(m.slaveURLs, u)
|
||||
}
|
||||
|
||||
client, err := m.createHTTPClient()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.client = client
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Mesos) createHTTPClient() (*http.Client, error) {
|
||||
tlsCfg, err := m.ClientConfig.TLSConfig()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
client := &http.Client{
|
||||
Transport: &http.Transport{
|
||||
Proxy: http.ProxyFromEnvironment,
|
||||
TLSClientConfig: tlsCfg,
|
||||
},
|
||||
Timeout: 4 * time.Second,
|
||||
}
|
||||
|
||||
return client, nil
|
||||
}
|
||||
|
||||
// metricsDiff() returns set names for removal
|
||||
func metricsDiff(role role, w []string) []string {
|
||||
b := make([]string, 0, len(allMetrics[role]))
|
||||
s := make(map[string]bool)
|
||||
|
||||
if len(w) == 0 {
|
||||
return b
|
||||
}
|
||||
|
||||
for _, v := range w {
|
||||
s[v] = true
|
||||
}
|
||||
|
||||
for _, d := range allMetrics[role] {
|
||||
if _, ok := s[d]; !ok {
|
||||
b = append(b, d)
|
||||
}
|
||||
}
|
||||
|
||||
return b
|
||||
}
|
||||
|
||||
// masterBlocks serves as kind of metrics registry grouping them in sets
|
||||
func (m *Mesos) getMetrics(role role, group string) []string {
|
||||
metrics := make(map[string][]string)
|
||||
|
||||
if role == master {
|
||||
metrics["resources"] = []string{
|
||||
"master/cpus_percent",
|
||||
"master/cpus_used",
|
||||
"master/cpus_total",
|
||||
"master/cpus_revocable_percent",
|
||||
"master/cpus_revocable_total",
|
||||
"master/cpus_revocable_used",
|
||||
"master/disk_percent",
|
||||
"master/disk_used",
|
||||
"master/disk_total",
|
||||
"master/disk_revocable_percent",
|
||||
"master/disk_revocable_total",
|
||||
"master/disk_revocable_used",
|
||||
"master/gpus_percent",
|
||||
"master/gpus_used",
|
||||
"master/gpus_total",
|
||||
"master/gpus_revocable_percent",
|
||||
"master/gpus_revocable_total",
|
||||
"master/gpus_revocable_used",
|
||||
"master/mem_percent",
|
||||
"master/mem_used",
|
||||
"master/mem_total",
|
||||
"master/mem_revocable_percent",
|
||||
"master/mem_revocable_total",
|
||||
"master/mem_revocable_used",
|
||||
}
|
||||
|
||||
metrics["master"] = []string{
|
||||
"master/elected",
|
||||
"master/uptime_secs",
|
||||
}
|
||||
|
||||
metrics["system"] = []string{
|
||||
"system/cpus_total",
|
||||
"system/load_15min",
|
||||
"system/load_5min",
|
||||
"system/load_1min",
|
||||
"system/mem_free_bytes",
|
||||
"system/mem_total_bytes",
|
||||
}
|
||||
|
||||
metrics["agents"] = []string{
|
||||
"master/slave_registrations",
|
||||
"master/slave_removals",
|
||||
"master/slave_reregistrations",
|
||||
"master/slave_shutdowns_scheduled",
|
||||
"master/slave_shutdowns_canceled",
|
||||
"master/slave_shutdowns_completed",
|
||||
"master/slaves_active",
|
||||
"master/slaves_connected",
|
||||
"master/slaves_disconnected",
|
||||
"master/slaves_inactive",
|
||||
"master/slave_unreachable_canceled",
|
||||
"master/slave_unreachable_completed",
|
||||
"master/slave_unreachable_scheduled",
|
||||
"master/slaves_unreachable",
|
||||
}
|
||||
|
||||
metrics["frameworks"] = []string{
|
||||
"master/frameworks_active",
|
||||
"master/frameworks_connected",
|
||||
"master/frameworks_disconnected",
|
||||
"master/frameworks_inactive",
|
||||
"master/outstanding_offers",
|
||||
}
|
||||
|
||||
// framework_offers and allocator metrics have unpredictable names, so they can't be listed here.
|
||||
// These empty groups are included to prevent the "unknown metrics group" info log below.
|
||||
// filterMetrics() filters these metrics by looking for names with the corresponding prefix.
|
||||
metrics["framework_offers"] = make([]string, 0)
|
||||
metrics["allocator"] = make([]string, 0)
|
||||
|
||||
metrics["tasks"] = []string{
|
||||
"master/tasks_error",
|
||||
"master/tasks_failed",
|
||||
"master/tasks_finished",
|
||||
"master/tasks_killed",
|
||||
"master/tasks_lost",
|
||||
"master/tasks_running",
|
||||
"master/tasks_staging",
|
||||
"master/tasks_starting",
|
||||
"master/tasks_dropped",
|
||||
"master/tasks_gone",
|
||||
"master/tasks_gone_by_operator",
|
||||
"master/tasks_killing",
|
||||
"master/tasks_unreachable",
|
||||
}
|
||||
|
||||
metrics["messages"] = []string{
|
||||
"master/invalid_executor_to_framework_messages",
|
||||
"master/invalid_framework_to_executor_messages",
|
||||
"master/invalid_status_update_acknowledgements",
|
||||
"master/invalid_status_updates",
|
||||
"master/dropped_messages",
|
||||
"master/messages_authenticate",
|
||||
"master/messages_deactivate_framework",
|
||||
"master/messages_decline_offers",
|
||||
"master/messages_executor_to_framework",
|
||||
"master/messages_exited_executor",
|
||||
"master/messages_framework_to_executor",
|
||||
"master/messages_kill_task",
|
||||
"master/messages_launch_tasks",
|
||||
"master/messages_reconcile_tasks",
|
||||
"master/messages_register_framework",
|
||||
"master/messages_register_slave",
|
||||
"master/messages_reregister_framework",
|
||||
"master/messages_reregister_slave",
|
||||
"master/messages_resource_request",
|
||||
"master/messages_revive_offers",
|
||||
"master/messages_status_update",
|
||||
"master/messages_status_update_acknowledgement",
|
||||
"master/messages_unregister_framework",
|
||||
"master/messages_unregister_slave",
|
||||
"master/messages_update_slave",
|
||||
"master/recovery_slave_removals",
|
||||
"master/slave_removals/reason_registered",
|
||||
"master/slave_removals/reason_unhealthy",
|
||||
"master/slave_removals/reason_unregistered",
|
||||
"master/valid_framework_to_executor_messages",
|
||||
"master/valid_status_update_acknowledgements",
|
||||
"master/valid_status_updates",
|
||||
"master/task_lost/source_master/reason_invalid_offers",
|
||||
"master/task_lost/source_master/reason_slave_removed",
|
||||
"master/task_lost/source_slave/reason_executor_terminated",
|
||||
"master/valid_executor_to_framework_messages",
|
||||
"master/invalid_operation_status_update_acknowledgements",
|
||||
"master/messages_operation_status_update_acknowledgement",
|
||||
"master/messages_reconcile_operations",
|
||||
"master/messages_suppress_offers",
|
||||
"master/valid_operation_status_update_acknowledgements",
|
||||
}
|
||||
|
||||
metrics["evqueue"] = []string{
|
||||
"master/event_queue_dispatches",
|
||||
"master/event_queue_http_requests",
|
||||
"master/event_queue_messages",
|
||||
"master/operator_event_stream_subscribers",
|
||||
}
|
||||
|
||||
metrics["registrar"] = []string{
|
||||
"registrar/state_fetch_ms",
|
||||
"registrar/state_store_ms",
|
||||
"registrar/state_store_ms/max",
|
||||
"registrar/state_store_ms/min",
|
||||
"registrar/state_store_ms/p50",
|
||||
"registrar/state_store_ms/p90",
|
||||
"registrar/state_store_ms/p95",
|
||||
"registrar/state_store_ms/p99",
|
||||
"registrar/state_store_ms/p999",
|
||||
"registrar/state_store_ms/p9999",
|
||||
"registrar/log/ensemble_size",
|
||||
"registrar/log/recovered",
|
||||
"registrar/queued_operations",
|
||||
"registrar/registry_size_bytes",
|
||||
"registrar/state_store_ms/count",
|
||||
}
|
||||
} else if role == slave {
|
||||
metrics["resources"] = []string{
|
||||
"slave/cpus_percent",
|
||||
"slave/cpus_used",
|
||||
"slave/cpus_total",
|
||||
"slave/cpus_revocable_percent",
|
||||
"slave/cpus_revocable_total",
|
||||
"slave/cpus_revocable_used",
|
||||
"slave/disk_percent",
|
||||
"slave/disk_used",
|
||||
"slave/disk_total",
|
||||
"slave/disk_revocable_percent",
|
||||
"slave/disk_revocable_total",
|
||||
"slave/disk_revocable_used",
|
||||
"slave/gpus_percent",
|
||||
"slave/gpus_used",
|
||||
"slave/gpus_total",
|
||||
"slave/gpus_revocable_percent",
|
||||
"slave/gpus_revocable_total",
|
||||
"slave/gpus_revocable_used",
|
||||
"slave/mem_percent",
|
||||
"slave/mem_used",
|
||||
"slave/mem_total",
|
||||
"slave/mem_revocable_percent",
|
||||
"slave/mem_revocable_total",
|
||||
"slave/mem_revocable_used",
|
||||
}
|
||||
|
||||
metrics["agent"] = []string{
|
||||
"slave/registered",
|
||||
"slave/uptime_secs",
|
||||
}
|
||||
|
||||
metrics["system"] = []string{
|
||||
"system/cpus_total",
|
||||
"system/load_15min",
|
||||
"system/load_5min",
|
||||
"system/load_1min",
|
||||
"system/mem_free_bytes",
|
||||
"system/mem_total_bytes",
|
||||
}
|
||||
|
||||
metrics["executors"] = []string{
|
||||
"containerizer/mesos/container_destroy_errors",
|
||||
"slave/container_launch_errors",
|
||||
"slave/executors_preempted",
|
||||
"slave/frameworks_active",
|
||||
"slave/executor_directory_max_allowed_age_secs",
|
||||
"slave/executors_registering",
|
||||
"slave/executors_running",
|
||||
"slave/executors_terminated",
|
||||
"slave/executors_terminating",
|
||||
"slave/recovery_errors",
|
||||
}
|
||||
|
||||
metrics["tasks"] = []string{
|
||||
"slave/tasks_failed",
|
||||
"slave/tasks_finished",
|
||||
"slave/tasks_killed",
|
||||
"slave/tasks_lost",
|
||||
"slave/tasks_running",
|
||||
"slave/tasks_staging",
|
||||
"slave/tasks_starting",
|
||||
}
|
||||
|
||||
metrics["messages"] = []string{
|
||||
"slave/invalid_framework_messages",
|
||||
"slave/invalid_status_updates",
|
||||
"slave/valid_framework_messages",
|
||||
"slave/valid_status_updates",
|
||||
}
|
||||
}
|
||||
|
||||
ret, ok := metrics[group]
|
||||
if !ok {
|
||||
m.Log.Infof("Unknown role %q metrics group: %s", role, group)
|
||||
return nil
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func (m *Mesos) filterMetrics(role role, metrics *map[string]interface{}) {
|
||||
var ok bool
|
||||
var selectedMetrics []string
|
||||
|
||||
if role == master {
|
||||
selectedMetrics = m.MasterCols
|
||||
} else if role == slave {
|
||||
selectedMetrics = m.SlaveCols
|
||||
}
|
||||
|
||||
for _, k := range metricsDiff(role, selectedMetrics) {
|
||||
switch k {
|
||||
// allocator and framework_offers metrics have unpredictable names, so we have to identify them by name prefix.
|
||||
case "allocator":
|
||||
for m := range *metrics {
|
||||
if strings.HasPrefix(m, "allocator/") {
|
||||
delete(*metrics, m)
|
||||
}
|
||||
}
|
||||
case "framework_offers":
|
||||
for m := range *metrics {
|
||||
if strings.HasPrefix(m, "master/frameworks/") || strings.HasPrefix(m, "frameworks/") {
|
||||
delete(*metrics, m)
|
||||
}
|
||||
}
|
||||
|
||||
// All other metrics have predictable names. We can use getMetrics() to retrieve them.
|
||||
default:
|
||||
for _, v := range m.getMetrics(role, k) {
|
||||
if _, ok = (*metrics)[v]; ok {
|
||||
delete(*metrics, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func withPath(u *url.URL, path string) *url.URL {
|
||||
c := *u
|
||||
c.Path = path
|
||||
return &c
|
||||
}
|
||||
|
||||
func urlTag(u *url.URL) string {
|
||||
c := *u
|
||||
c.Path = ""
|
||||
c.User = nil
|
||||
c.RawQuery = ""
|
||||
return c.String()
|
||||
}
|
||||
|
||||
// This should not belong to the object
|
||||
func (m *Mesos) gatherMainMetrics(u *url.URL, role role, acc telegraf.Accumulator) error {
|
||||
var jsonOut map[string]interface{}
|
||||
|
||||
tags := map[string]string{
|
||||
"server": u.Hostname(),
|
||||
"url": urlTag(u),
|
||||
"role": string(role),
|
||||
}
|
||||
|
||||
resp, err := m.client.Get(withPath(u, "/metrics/snapshot").String())
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
data, err := io.ReadAll(resp.Body)
|
||||
resp.Body.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = json.Unmarshal(data, &jsonOut); err != nil {
|
||||
return errors.New("error decoding JSON response")
|
||||
}
|
||||
|
||||
m.filterMetrics(role, &jsonOut)
|
||||
|
||||
jf := parsers_json.JSONFlattener{}
|
||||
|
||||
err = jf.FlattenJSON("", jsonOut)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if role == master {
|
||||
if jf.Fields["master/elected"] != 0.0 {
|
||||
tags["state"] = "leader"
|
||||
} else {
|
||||
tags["state"] = "standby"
|
||||
}
|
||||
}
|
||||
|
||||
acc.AddFields("mesos", jf.Fields, tags)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
inputs.Add("mesos", func() telegraf.Input {
|
||||
return &Mesos{}
|
||||
})
|
||||
}
|
428
plugins/inputs/mesos/mesos_test.go
Normal file
428
plugins/inputs/mesos/mesos_test.go
Normal file
|
@ -0,0 +1,428 @@
|
|||
package mesos
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"math/rand"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
)
|
||||
|
||||
var masterMetrics map[string]interface{}
|
||||
var masterTestServer *httptest.Server
|
||||
var slaveMetrics map[string]interface{}
|
||||
|
||||
var slaveTestServer *httptest.Server
|
||||
|
||||
// master metrics that will be returned by generateMetrics()
|
||||
var masterMetricNames = []string{
|
||||
// resources
|
||||
"master/cpus_percent",
|
||||
"master/cpus_used",
|
||||
"master/cpus_total",
|
||||
"master/cpus_revocable_percent",
|
||||
"master/cpus_revocable_total",
|
||||
"master/cpus_revocable_used",
|
||||
"master/disk_percent",
|
||||
"master/disk_used",
|
||||
"master/disk_total",
|
||||
"master/disk_revocable_percent",
|
||||
"master/disk_revocable_total",
|
||||
"master/disk_revocable_used",
|
||||
"master/gpus_percent",
|
||||
"master/gpus_used",
|
||||
"master/gpus_total",
|
||||
"master/gpus_revocable_percent",
|
||||
"master/gpus_revocable_total",
|
||||
"master/gpus_revocable_used",
|
||||
"master/mem_percent",
|
||||
"master/mem_used",
|
||||
"master/mem_total",
|
||||
"master/mem_revocable_percent",
|
||||
"master/mem_revocable_total",
|
||||
"master/mem_revocable_used",
|
||||
// master
|
||||
"master/elected",
|
||||
"master/uptime_secs",
|
||||
// system
|
||||
"system/cpus_total",
|
||||
"system/load_15min",
|
||||
"system/load_5min",
|
||||
"system/load_1min",
|
||||
"system/mem_free_bytes",
|
||||
"system/mem_total_bytes",
|
||||
// agents
|
||||
"master/slave_registrations",
|
||||
"master/slave_removals",
|
||||
"master/slave_reregistrations",
|
||||
"master/slave_shutdowns_scheduled",
|
||||
"master/slave_shutdowns_canceled",
|
||||
"master/slave_shutdowns_completed",
|
||||
"master/slaves_active",
|
||||
"master/slaves_connected",
|
||||
"master/slaves_disconnected",
|
||||
"master/slaves_inactive",
|
||||
"master/slave_unreachable_canceled",
|
||||
"master/slave_unreachable_completed",
|
||||
"master/slave_unreachable_scheduled",
|
||||
"master/slaves_unreachable",
|
||||
// frameworks
|
||||
"master/frameworks_active",
|
||||
"master/frameworks_connected",
|
||||
"master/frameworks_disconnected",
|
||||
"master/frameworks_inactive",
|
||||
"master/outstanding_offers",
|
||||
// framework offers
|
||||
"master/frameworks/marathon/abc-123/calls",
|
||||
"master/frameworks/marathon/abc-123/calls/accept",
|
||||
"master/frameworks/marathon/abc-123/events",
|
||||
"master/frameworks/marathon/abc-123/events/error",
|
||||
"master/frameworks/marathon/abc-123/offers/sent",
|
||||
"master/frameworks/marathon/abc-123/operations",
|
||||
"master/frameworks/marathon/abc-123/operations/create",
|
||||
"master/frameworks/marathon/abc-123/roles/*/suppressed",
|
||||
"master/frameworks/marathon/abc-123/subscribed",
|
||||
"master/frameworks/marathon/abc-123/tasks/active/task_killing",
|
||||
"master/frameworks/marathon/abc-123/tasks/active/task_dropped",
|
||||
"master/frameworks/marathon/abc-123/tasks/terminal/task_dropped",
|
||||
"master/frameworks/marathon/abc-123/unknown/unknown", // test case for unknown metric type
|
||||
// tasks
|
||||
"master/tasks_error",
|
||||
"master/tasks_failed",
|
||||
"master/tasks_finished",
|
||||
"master/tasks_killed",
|
||||
"master/tasks_lost",
|
||||
"master/tasks_running",
|
||||
"master/tasks_staging",
|
||||
"master/tasks_starting",
|
||||
"master/tasks_dropped",
|
||||
"master/tasks_gone",
|
||||
"master/tasks_gone_by_operator",
|
||||
"master/tasks_killing",
|
||||
"master/tasks_unreachable",
|
||||
// messages
|
||||
"master/invalid_executor_to_framework_messages",
|
||||
"master/invalid_framework_to_executor_messages",
|
||||
"master/invalid_status_update_acknowledgements",
|
||||
"master/invalid_status_updates",
|
||||
"master/dropped_messages",
|
||||
"master/messages_authenticate",
|
||||
"master/messages_deactivate_framework",
|
||||
"master/messages_decline_offers",
|
||||
"master/messages_executor_to_framework",
|
||||
"master/messages_exited_executor",
|
||||
"master/messages_framework_to_executor",
|
||||
"master/messages_kill_task",
|
||||
"master/messages_launch_tasks",
|
||||
"master/messages_reconcile_tasks",
|
||||
"master/messages_register_framework",
|
||||
"master/messages_register_slave",
|
||||
"master/messages_reregister_framework",
|
||||
"master/messages_reregister_slave",
|
||||
"master/messages_resource_request",
|
||||
"master/messages_revive_offers",
|
||||
"master/messages_status_update",
|
||||
"master/messages_status_update_acknowledgement",
|
||||
"master/messages_unregister_framework",
|
||||
"master/messages_unregister_slave",
|
||||
"master/messages_update_slave",
|
||||
"master/recovery_slave_removals",
|
||||
"master/slave_removals/reason_registered",
|
||||
"master/slave_removals/reason_unhealthy",
|
||||
"master/slave_removals/reason_unregistered",
|
||||
"master/valid_framework_to_executor_messages",
|
||||
"master/valid_status_update_acknowledgements",
|
||||
"master/valid_status_updates",
|
||||
"master/task_lost/source_master/reason_invalid_offers",
|
||||
"master/task_lost/source_master/reason_slave_removed",
|
||||
"master/task_lost/source_slave/reason_executor_terminated",
|
||||
"master/valid_executor_to_framework_messages",
|
||||
"master/invalid_operation_status_update_acknowledgements",
|
||||
"master/messages_operation_status_update_acknowledgement",
|
||||
"master/messages_reconcile_operations",
|
||||
"master/messages_suppress_offers",
|
||||
"master/valid_operation_status_update_acknowledgements",
|
||||
// evgqueue
|
||||
"master/event_queue_dispatches",
|
||||
"master/event_queue_http_requests",
|
||||
"master/event_queue_messages",
|
||||
"master/operator_event_stream_subscribers",
|
||||
// registrar
|
||||
"registrar/log/ensemble_size",
|
||||
"registrar/log/recovered",
|
||||
"registrar/queued_operations",
|
||||
"registrar/registry_size_bytes",
|
||||
"registrar/state_fetch_ms",
|
||||
"registrar/state_store_ms",
|
||||
"registrar/state_store_ms/max",
|
||||
"registrar/state_store_ms/min",
|
||||
"registrar/state_store_ms/p50",
|
||||
"registrar/state_store_ms/p90",
|
||||
"registrar/state_store_ms/p95",
|
||||
"registrar/state_store_ms/p99",
|
||||
"registrar/state_store_ms/p999",
|
||||
"registrar/state_store_ms/p9999",
|
||||
"registrar/state_store_ms/count",
|
||||
// allocator
|
||||
"allocator/mesos/allocation_run_ms",
|
||||
"allocator/mesos/allocation_run_ms/count",
|
||||
"allocator/mesos/allocation_run_ms/max",
|
||||
"allocator/mesos/allocation_run_ms/min",
|
||||
"allocator/mesos/allocation_run_ms/p50",
|
||||
"allocator/mesos/allocation_run_ms/p90",
|
||||
"allocator/mesos/allocation_run_ms/p95",
|
||||
"allocator/mesos/allocation_run_ms/p99",
|
||||
"allocator/mesos/allocation_run_ms/p999",
|
||||
"allocator/mesos/allocation_run_ms/p9999",
|
||||
"allocator/mesos/allocation_runs",
|
||||
"allocator/mesos/allocation_run_latency_ms",
|
||||
"allocator/mesos/allocation_run_latency_ms/count",
|
||||
"allocator/mesos/allocation_run_latency_ms/max",
|
||||
"allocator/mesos/allocation_run_latency_ms/min",
|
||||
"allocator/mesos/allocation_run_latency_ms/p50",
|
||||
"allocator/mesos/allocation_run_latency_ms/p90",
|
||||
"allocator/mesos/allocation_run_latency_ms/p95",
|
||||
"allocator/mesos/allocation_run_latency_ms/p99",
|
||||
"allocator/mesos/allocation_run_latency_ms/p999",
|
||||
"allocator/mesos/allocation_run_latency_ms/p9999",
|
||||
"allocator/mesos/roles/*/shares/dominant",
|
||||
"allocator/mesos/event_queue_dispatches",
|
||||
"allocator/mesos/offer_filters/roles/*/active",
|
||||
"allocator/mesos/quota/roles/*/resources/disk/offered_or_allocated",
|
||||
"allocator/mesos/quota/roles/*/resources/mem/guarantee",
|
||||
"allocator/mesos/quota/roles/*/resources/disk/guarantee",
|
||||
"allocator/mesos/resources/cpus/offered_or_allocated",
|
||||
"allocator/mesos/resources/cpus/total",
|
||||
"allocator/mesos/resources/disk/offered_or_allocated",
|
||||
"allocator/mesos/resources/disk/total",
|
||||
"allocator/mesos/resources/mem/offered_or_allocated",
|
||||
"allocator/mesos/resources/mem/total",
|
||||
}
|
||||
|
||||
// slave metrics that will be returned by generateMetrics()
|
||||
var slaveMetricNames = []string{
|
||||
// resources
|
||||
"slave/cpus_percent",
|
||||
"slave/cpus_used",
|
||||
"slave/cpus_total",
|
||||
"slave/cpus_revocable_percent",
|
||||
"slave/cpus_revocable_total",
|
||||
"slave/cpus_revocable_used",
|
||||
"slave/disk_percent",
|
||||
"slave/disk_used",
|
||||
"slave/disk_total",
|
||||
"slave/disk_revocable_percent",
|
||||
"slave/disk_revocable_total",
|
||||
"slave/disk_revocable_used",
|
||||
"slave/gpus_percent",
|
||||
"slave/gpus_used",
|
||||
"slave/gpus_total",
|
||||
"slave/gpus_revocable_percent",
|
||||
"slave/gpus_revocable_total",
|
||||
"slave/gpus_revocable_used",
|
||||
"slave/mem_percent",
|
||||
"slave/mem_used",
|
||||
"slave/mem_total",
|
||||
"slave/mem_revocable_percent",
|
||||
"slave/mem_revocable_total",
|
||||
"slave/mem_revocable_used",
|
||||
// agent
|
||||
"slave/registered",
|
||||
"slave/uptime_secs",
|
||||
// system
|
||||
"system/cpus_total",
|
||||
"system/load_15min",
|
||||
"system/load_5min",
|
||||
"system/load_1min",
|
||||
"system/mem_free_bytes",
|
||||
"system/mem_total_bytes",
|
||||
// executors
|
||||
"containerizer/mesos/container_destroy_errors",
|
||||
"slave/container_launch_errors",
|
||||
"slave/executors_preempted",
|
||||
"slave/frameworks_active",
|
||||
"slave/executor_directory_max_allowed_age_secs",
|
||||
"slave/executors_registering",
|
||||
"slave/executors_running",
|
||||
"slave/executors_terminated",
|
||||
"slave/executors_terminating",
|
||||
"slave/recovery_errors",
|
||||
// tasks
|
||||
"slave/tasks_failed",
|
||||
"slave/tasks_finished",
|
||||
"slave/tasks_killed",
|
||||
"slave/tasks_lost",
|
||||
"slave/tasks_running",
|
||||
"slave/tasks_staging",
|
||||
"slave/tasks_starting",
|
||||
// messages
|
||||
"slave/invalid_framework_messages",
|
||||
"slave/invalid_status_updates",
|
||||
"slave/valid_framework_messages",
|
||||
"slave/valid_status_updates",
|
||||
}
|
||||
|
||||
func generateMetrics() {
|
||||
masterMetrics = make(map[string]interface{})
|
||||
for _, k := range masterMetricNames {
|
||||
masterMetrics[k] = rand.Float64()
|
||||
}
|
||||
|
||||
slaveMetrics = make(map[string]interface{})
|
||||
for _, k := range slaveMetricNames {
|
||||
slaveMetrics[k] = rand.Float64()
|
||||
}
|
||||
}
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
generateMetrics()
|
||||
|
||||
masterRouter := http.NewServeMux()
|
||||
masterRouter.HandleFunc("/metrics/snapshot", func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(masterMetrics) //nolint:errcheck // ignore the returned error as we cannot do anything about it anyway
|
||||
})
|
||||
masterTestServer = httptest.NewServer(masterRouter)
|
||||
|
||||
slaveRouter := http.NewServeMux()
|
||||
slaveRouter.HandleFunc("/metrics/snapshot", func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(slaveMetrics) //nolint:errcheck // ignore the returned error as we cannot do anything about it anyway
|
||||
})
|
||||
slaveTestServer = httptest.NewServer(slaveRouter)
|
||||
|
||||
rc := m.Run()
|
||||
|
||||
masterTestServer.Close()
|
||||
slaveTestServer.Close()
|
||||
os.Exit(rc)
|
||||
}
|
||||
|
||||
func TestMesosMaster(t *testing.T) {
|
||||
var acc testutil.Accumulator
|
||||
|
||||
m := Mesos{
|
||||
Log: testutil.Logger{},
|
||||
Masters: []string{masterTestServer.Listener.Addr().String()},
|
||||
Timeout: 10,
|
||||
}
|
||||
|
||||
require.NoError(t, acc.GatherError(m.Gather))
|
||||
|
||||
acc.AssertContainsFields(t, "mesos", masterMetrics)
|
||||
}
|
||||
|
||||
func TestMasterFilter(t *testing.T) {
|
||||
m := Mesos{
|
||||
Log: testutil.Logger{},
|
||||
MasterCols: []string{
|
||||
"resources", "master", "registrar", "allocator",
|
||||
},
|
||||
}
|
||||
b := []string{
|
||||
"system", "agents", "frameworks",
|
||||
"messages", "evqueue", "tasks",
|
||||
}
|
||||
|
||||
m.filterMetrics(master, &masterMetrics)
|
||||
|
||||
// Assert expected metrics are present.
|
||||
for _, v := range m.MasterCols {
|
||||
for _, x := range m.getMetrics(master, v) {
|
||||
_, ok := masterMetrics[x]
|
||||
require.Truef(t, ok, "Didn't find key %s, it should present.", x)
|
||||
}
|
||||
}
|
||||
// m.MasterCols includes "allocator", so allocator metrics should be present.
|
||||
// allocator metrics have unpredictable names, so we can't rely on the list of metrics returned from
|
||||
// getMetrics(). We have to find them by checking name prefixes.
|
||||
for _, x := range masterMetricNames {
|
||||
if strings.HasPrefix(x, "allocator/") {
|
||||
_, ok := masterMetrics[x]
|
||||
require.Truef(t, ok, "Didn't find key %s, it should present.", x)
|
||||
}
|
||||
}
|
||||
|
||||
// Assert unexpected metrics are not present.
|
||||
for _, v := range b {
|
||||
for _, x := range m.getMetrics(master, v) {
|
||||
_, ok := masterMetrics[x]
|
||||
require.Falsef(t, ok, "Found key %s, it should be gone.", x)
|
||||
}
|
||||
}
|
||||
// m.MasterCols does not include "framework_offers", so framework_offers metrics should not be present.
|
||||
// framework_offers metrics have unpredictable names, so we can't rely on the list of metrics returned from
|
||||
// getMetrics(). We have to find them by checking name prefixes.
|
||||
for k := range masterMetrics {
|
||||
if strings.HasPrefix(k, "master/frameworks/") || strings.HasPrefix(k, "frameworks/") {
|
||||
require.Failf(t, "Wrong key", "Found key %s, it should be gone.", k)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMesosSlave(t *testing.T) {
|
||||
var acc testutil.Accumulator
|
||||
|
||||
m := Mesos{
|
||||
Log: testutil.Logger{},
|
||||
Slaves: []string{slaveTestServer.Listener.Addr().String()},
|
||||
// SlaveTasks: true,
|
||||
Timeout: 10,
|
||||
}
|
||||
|
||||
require.NoError(t, acc.GatherError(m.Gather))
|
||||
|
||||
acc.AssertContainsFields(t, "mesos", slaveMetrics)
|
||||
}
|
||||
|
||||
func TestSlaveFilter(t *testing.T) {
|
||||
m := Mesos{
|
||||
Log: testutil.Logger{},
|
||||
SlaveCols: []string{
|
||||
"resources", "agent", "tasks",
|
||||
},
|
||||
}
|
||||
b := []string{
|
||||
"system", "executors", "messages",
|
||||
}
|
||||
|
||||
m.filterMetrics(slave, &slaveMetrics)
|
||||
|
||||
for _, v := range b {
|
||||
for _, x := range m.getMetrics(slave, v) {
|
||||
_, ok := slaveMetrics[x]
|
||||
require.Falsef(t, ok, "Found key %s, it should be gone.", x)
|
||||
}
|
||||
}
|
||||
for _, v := range m.MasterCols {
|
||||
for _, x := range m.getMetrics(slave, v) {
|
||||
_, ok := slaveMetrics[x]
|
||||
require.Truef(t, ok, "Didn't find key %s, it should present.", x)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestWithPathDoesNotModify(t *testing.T) {
|
||||
u, err := url.Parse("http://localhost:5051")
|
||||
require.NoError(t, err)
|
||||
v := withPath(u, "/xyzzy")
|
||||
require.Equal(t, "http://localhost:5051", u.String())
|
||||
require.Equal(t, "http://localhost:5051/xyzzy", v.String())
|
||||
}
|
||||
|
||||
func TestURLTagDoesNotModify(t *testing.T) {
|
||||
u, err := url.Parse("http://a:b@localhost:5051?timeout=1ms")
|
||||
require.NoError(t, err)
|
||||
v := urlTag(u)
|
||||
require.Equal(t, "http://a:b@localhost:5051?timeout=1ms", u.String())
|
||||
require.Equal(t, "http://localhost:5051", v)
|
||||
}
|
42
plugins/inputs/mesos/sample.conf
Normal file
42
plugins/inputs/mesos/sample.conf
Normal file
|
@ -0,0 +1,42 @@
|
|||
# Telegraf plugin for gathering metrics from N Mesos masters
|
||||
[[inputs.mesos]]
|
||||
## Timeout, in ms.
|
||||
timeout = 100
|
||||
|
||||
## A list of Mesos masters.
|
||||
masters = ["http://localhost:5050"]
|
||||
|
||||
## Master metrics groups to be collected, by default, all enabled.
|
||||
master_collections = [
|
||||
"resources",
|
||||
"master",
|
||||
"system",
|
||||
"agents",
|
||||
"frameworks",
|
||||
"framework_offers",
|
||||
"tasks",
|
||||
"messages",
|
||||
"evqueue",
|
||||
"registrar",
|
||||
"allocator",
|
||||
]
|
||||
|
||||
## A list of Mesos slaves, default is []
|
||||
# slaves = []
|
||||
|
||||
## Slave metrics groups to be collected, by default, all enabled.
|
||||
# slave_collections = [
|
||||
# "resources",
|
||||
# "agent",
|
||||
# "system",
|
||||
# "executors",
|
||||
# "tasks",
|
||||
# "messages",
|
||||
# ]
|
||||
|
||||
## Optional TLS Config
|
||||
# tls_ca = "/etc/telegraf/ca.pem"
|
||||
# tls_cert = "/etc/telegraf/cert.pem"
|
||||
# tls_key = "/etc/telegraf/key.pem"
|
||||
## Use TLS but skip chain & host verification
|
||||
# insecure_skip_verify = false
|
Loading…
Add table
Add a link
Reference in a new issue