Adding upstream version 1.34.4.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
e393c3af3f
commit
4978089aab
4963 changed files with 677545 additions and 0 deletions
197
plugins/inputs/slurm/README.md
Normal file
197
plugins/inputs/slurm/README.md
Normal file
|
@ -0,0 +1,197 @@
|
|||
# SLURM Input Plugin
|
||||
|
||||
This plugin gather diag, jobs, nodes, partitions and reservation metrics by
|
||||
leveraging SLURM's REST API as provided by the `slurmrestd` daemon.
|
||||
|
||||
This plugin targets the `openapi/v0.0.38` OpenAPI plugin as defined in SLURM's
|
||||
documentation. That particular plugin should be configured when starting the
|
||||
`slurmrestd` daemon up. For more information, be sure to check SLURM's
|
||||
documentation [here][SLURM Doc].
|
||||
|
||||
A great wealth of information can also be found on the repository of the
|
||||
Go module implementing the API client, [pcolladosoto/goslurm][].
|
||||
|
||||
[SLURM Doc]: https://slurm.schedmd.com/rest.html
|
||||
[pcolladosoto/goslurm]: https://github.com/pcolladosoto/goslurm
|
||||
|
||||
## Global configuration options <!-- @/docs/includes/plugin_config.md -->
|
||||
|
||||
In addition to the plugin-specific configuration settings, plugins support
|
||||
additional global and plugin configuration settings. These settings are used to
|
||||
modify metrics, tags, and field or create aliases and configure ordering, etc.
|
||||
See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
||||
|
||||
[CONFIGURATION.md]: ../../../docs/CONFIGURATION.md#plugins
|
||||
|
||||
## Configuration
|
||||
|
||||
```toml @sample.conf
|
||||
# Gather SLURM metrics
|
||||
[[inputs.slurm]]
|
||||
## Slurmrestd URL. Both http and https can be used as schemas.
|
||||
url = "http://127.0.0.1:6820"
|
||||
|
||||
## Credentials for JWT-based authentication.
|
||||
# username = "foo"
|
||||
# token = "topSecret"
|
||||
|
||||
## Enabled endpoints
|
||||
## List of endpoints a user can acquire data from.
|
||||
## Available values are: diag, jobs, nodes, partitions, reservations.
|
||||
# enabled_endpoints = ["diag", "jobs", "nodes", "partitions", "reservations"]
|
||||
|
||||
## Maximum time to receive a response. If set to 0s, the
|
||||
## request will not time out.
|
||||
# response_timeout = "5s"
|
||||
|
||||
## Optional TLS Config. Note these options will only
|
||||
## be taken into account when the scheme specififed on
|
||||
## the URL parameter is https. They will be silently
|
||||
## ignored otherwise.
|
||||
## Set to true/false to enforce TLS being enabled/disabled. If not set,
|
||||
## enable TLS only if any of the other options are specified.
|
||||
# tls_enable =
|
||||
## Trusted root certificates for server
|
||||
# tls_ca = "/path/to/cafile"
|
||||
## Used for TLS client certificate authentication
|
||||
# tls_cert = "/path/to/certfile"
|
||||
## Used for TLS client certificate authentication
|
||||
# tls_key = "/path/to/keyfile"
|
||||
## Password for the key file if it is encrypted
|
||||
# tls_key_pwd = ""
|
||||
## Send the specified TLS server name via SNI
|
||||
# tls_server_name = "kubernetes.example.com"
|
||||
## Minimal TLS version to accept by the client
|
||||
# tls_min_version = "TLS12"
|
||||
## List of ciphers to accept, by default all secure ciphers will be accepted
|
||||
## See https://pkg.go.dev/crypto/tls#pkg-constants for supported values.
|
||||
## Use "all", "secure" and "insecure" to add all support ciphers, secure
|
||||
## suites or insecure suites respectively.
|
||||
# tls_cipher_suites = ["secure"]
|
||||
## Renegotiation method, "never", "once" or "freely"
|
||||
# tls_renegotiation_method = "never"
|
||||
## Use TLS but skip chain & host verification
|
||||
# insecure_skip_verify = false
|
||||
```
|
||||
|
||||
## Metrics
|
||||
|
||||
Given the great deal of metrics offered by SLURM's API, an attempt has been
|
||||
done to strike a balance between verbosity and usefulness in terms of the
|
||||
gathered information.
|
||||
|
||||
- slurm_diag
|
||||
- tags:
|
||||
- source
|
||||
- fields:
|
||||
- server_thread_count
|
||||
- jobs_canceled
|
||||
- jobs_submitted
|
||||
- jobs_started
|
||||
- jobs_completed
|
||||
- jobs_failed
|
||||
- jobs_pending
|
||||
- jobs_running
|
||||
- schedule_cycle_last
|
||||
- schedule_cycle_mean
|
||||
- bf_queue_len
|
||||
- bf_queue_len_mean
|
||||
- bf_active
|
||||
- slurm_jobs
|
||||
- tags:
|
||||
- source
|
||||
- name
|
||||
- job_id
|
||||
- fields:
|
||||
- state
|
||||
- state_reason
|
||||
- partition
|
||||
- nodes
|
||||
- node_count
|
||||
- priority
|
||||
- nice
|
||||
- group_id
|
||||
- command
|
||||
- standard_output
|
||||
- standard_error
|
||||
- standard_input
|
||||
- current_working_directory
|
||||
- submit_time
|
||||
- start_time
|
||||
- cpus
|
||||
- tasks
|
||||
- time_limit
|
||||
- tres_cpu
|
||||
- tres_mem
|
||||
- tres_node
|
||||
- tres_billing
|
||||
- slurm_nodes
|
||||
- tags:
|
||||
- source
|
||||
- name
|
||||
- fields:
|
||||
- state
|
||||
- cores
|
||||
- cpus
|
||||
- cpu_load
|
||||
- alloc_cpu
|
||||
- real_memory
|
||||
- free_memory
|
||||
- alloc_memory
|
||||
- tres_cpu
|
||||
- tres_mem
|
||||
- tres_billing
|
||||
- tres_used_cpu
|
||||
- tres_used_mem
|
||||
- weight
|
||||
- slurmd_version
|
||||
- architecture
|
||||
- slurm_partitions
|
||||
- tags:
|
||||
- source
|
||||
- name
|
||||
- fields:
|
||||
- state
|
||||
- total_cpu
|
||||
- total_nodes
|
||||
- nodes
|
||||
- tres_cpu
|
||||
- tres_mem
|
||||
- tres_node
|
||||
- tres_billing
|
||||
- slurm_reservations
|
||||
- tags:
|
||||
- source
|
||||
- name
|
||||
- fields:
|
||||
- core_count
|
||||
- core_spec_count
|
||||
- groups
|
||||
- users
|
||||
- start_time
|
||||
- partition
|
||||
- accounts
|
||||
- node_count
|
||||
- node_list
|
||||
|
||||
## Example Output
|
||||
|
||||
```text
|
||||
slurm_diag,host=hoth,source=slurm_primary.example.net bf_active=false,bf_queue_len=1i,bf_queue_len_mean=1i,jobs_canceled=0i,jobs_completed=137i,jobs_failed=0i,jobs_pending=0i,jobs_running=100i,jobs_started=137i,jobs_submitted=137i,schedule_cycle_last=27i,schedule_cycle_mean=86i,server_thread_count=3i 1723466497000000000
|
||||
slurm_jobs,host=hoth,job_id=23160,name=gridjob,source=slurm_primary.example.net command="/tmp/SLURM_job_script.11BCgQ",cpus=2i,current_working_directory="/home/sessiondir/7CQODmQ3uw5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmG9JKDmILUkln",group_id=2005i,nice=50i,node_count=1i,nodes="naboo225",partition="atlas",priority=4294878569i,standard_error="/home/sessiondir/7CQODmQ3uw5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmG9JKDmILUkln.comment",standard_input="/dev/null",standard_output="/home/sessiondir/7CQODmQ3uw5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmG9JKDmILUkln.comment",start_time=1723354525i,state="RUNNING",state_reason="None",submit_time=1723354525i,tasks=1i,time_limit=3600i,tres_billing=1,tres_cpu=1,tres_mem=2000,tres_node=1 1723466497000000000
|
||||
slurm_jobs,host=hoth,job_id=23365,name=gridjob,source=slurm_primary.example.net command="/tmp/SLURM_job_script.yRcFYL",cpus=2i,current_working_directory="/home/sessiondir/LgwNDmTLAx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDm2BKKDm8bFZsm",group_id=2005i,nice=50i,node_count=1i,nodes="naboo224",partition="atlas",priority=4294878364i,standard_error="/home/sessiondir/LgwNDmTLAx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDm2BKKDm8bFZsm.comment",standard_input="/dev/null",standard_output="/home/sessiondir/LgwNDmTLAx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDm2BKKDm8bFZsm.comment",start_time=1723376763i,state="RUNNING",state_reason="None",submit_time=1723376761i,tasks=1i,time_limit=3600i,tres_billing=1,tres_cpu=1,tres_mem=1000,tres_node=1 1723466497000000000
|
||||
slurm_jobs,host=hoth,job_id=23366,name=gridjob,source=slurm_primary.example.net command="/tmp/SLURM_job_script.5Y9Ngb",cpus=2i,current_working_directory="/home/sessiondir/HFYKDmULAx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDm3BKKDmiyK3em",group_id=2005i,nice=50i,node_count=1i,nodes="naboo225",partition="atlas",priority=4294878363i,standard_error="/home/sessiondir/HFYKDmULAx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDm3BKKDmiyK3em.comment",standard_input="/dev/null",standard_output="/home/sessiondir/HFYKDmULAx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDm3BKKDmiyK3em.comment",start_time=1723376883i,state="RUNNING",state_reason="None",submit_time=1723376882i,tasks=1i,time_limit=3600i,tres_billing=1,tres_cpu=1,tres_mem=1000,tres_node=1 1723466497000000000
|
||||
slurm_jobs,host=hoth,job_id=23367,name=gridjob,source=slurm_primary.example.net command="/tmp/SLURM_job_script.NmOqMU",cpus=2i,current_working_directory="/home/sessiondir/nnLLDmULAx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDm4BKKDmfhjFPn",group_id=2005i,nice=50i,node_count=1i,nodes="naboo225",partition="atlas",priority=4294878362i,standard_error="/home/sessiondir/nnLLDmULAx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDm4BKKDmfhjFPn.comment",standard_input="/dev/null",standard_output="/home/sessiondir/nnLLDmULAx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDm4BKKDmfhjFPn.comment",start_time=1723376883i,state="RUNNING",state_reason="None",submit_time=1723376882i,tasks=1i,time_limit=3600i,tres_billing=1,tres_cpu=1,tres_mem=1000,tres_node=1 1723466497000000000
|
||||
slurm_jobs,host=hoth,job_id=23385,name=gridjob,source=slurm_primary.example.net command="/tmp/SLURM_job_script.NNsI08",cpus=2i,current_working_directory="/home/sessiondir/PWvNDmH7tw5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmz7JKDmqgKyRo",group_id=2005i,nice=50i,node_count=1i,nodes="naboo225",partition="atlas",priority=4294878344i,standard_error="/home/sessiondir/PWvNDmH7tw5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmz7JKDmqgKyRo.comment",standard_input="/dev/null",standard_output="/home/sessiondir/PWvNDmH7tw5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmz7JKDmqgKyRo.comment",start_time=1723378725i,state="RUNNING",state_reason="None",submit_time=1723378725i,tasks=1i,time_limit=3600i,tres_billing=1,tres_cpu=1,tres_mem=1000,tres_node=1 1723466497000000000
|
||||
slurm_jobs,host=hoth,job_id=23386,name=gridjob,source=slurm_primary.example.net command="/tmp/SLURM_job_script.bcmS4h",cpus=2i,current_working_directory="/home/sessiondir/ZNHMDmI7tw5nKG01gq4B3BRpm7wtQmABFKDmbnHPDm27JKDm3Ve66n",group_id=2005i,nice=50i,node_count=1i,nodes="naboo224",partition="atlas",priority=4294878343i,standard_error="/home/sessiondir/ZNHMDmI7tw5nKG01gq4B3BRpm7wtQmABFKDmbnHPDm27JKDm3Ve66n.comment",standard_input="/dev/null",standard_output="/home/sessiondir/ZNHMDmI7tw5nKG01gq4B3BRpm7wtQmABFKDmbnHPDm27JKDm3Ve66n.comment",start_time=1723379206i,state="RUNNING",state_reason="None",submit_time=1723379205i,tasks=1i,time_limit=3600i,tres_billing=1,tres_cpu=1,tres_mem=1000,tres_node=1 1723466497000000000
|
||||
slurm_jobs,host=hoth,job_id=23387,name=gridjob,source=slurm_primary.example.net command="/tmp/SLURM_job_script.OgpoQZ",cpus=2i,current_working_directory="/home/sessiondir/qohNDmUqBx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmMCKKDmzM4Yhn",group_id=2005i,nice=50i,node_count=1i,nodes="naboo222",partition="atlas",priority=4294878342i,standard_error="/home/sessiondir/qohNDmUqBx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmMCKKDmzM4Yhn.comment",standard_input="/dev/null",standard_output="/home/sessiondir/qohNDmUqBx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmMCKKDmzM4Yhn.comment",start_time=1723379246i,state="RUNNING",state_reason="None",submit_time=1723379245i,tasks=1i,time_limit=3600i,tres_billing=1,tres_cpu=1,tres_mem=1000,tres_node=1 1723466497000000000
|
||||
slurm_jobs,host=hoth,job_id=23388,name=gridjob,source=slurm_primary.example.net command="/tmp/SLURM_job_script.xYbxSe",cpus=2i,current_working_directory="/home/sessiondir/u9HODmXqBx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmWCKKDmRlccYn",group_id=2005i,nice=50i,node_count=1i,nodes="naboo224",partition="atlas",priority=4294878341i,standard_error="/home/sessiondir/u9HODmXqBx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmWCKKDmRlccYn.comment",standard_input="/dev/null",standard_output="/home/sessiondir/u9HODmXqBx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmWCKKDmRlccYn.comment",start_time=1723379326i,state="RUNNING",state_reason="None",submit_time=1723379326i,tasks=1i,time_limit=3600i,tres_billing=1,tres_cpu=1,tres_mem=1000,tres_node=1 1723466497000000000
|
||||
slurm_jobs,host=hoth,job_id=23389,name=gridjob,source=slurm_primary.example.net command="/tmp/SLURM_job_script.QHtIIm",cpus=2i,current_working_directory="/home/sessiondir/ZLvKDmYqBx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmXCKKDmjp19km",group_id=2005i,nice=50i,node_count=1i,nodes="naboo227",partition="atlas",priority=4294878340i,standard_error="/home/sessiondir/ZLvKDmYqBx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmXCKKDmjp19km.comment",standard_input="/dev/null",standard_output="/home/sessiondir/ZLvKDmYqBx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmXCKKDmjp19km.comment",start_time=1723379326i,state="RUNNING",state_reason="None",submit_time=1723379326i,tasks=1i,time_limit=3600i,tres_billing=1,tres_cpu=1,tres_mem=1000,tres_node=1 1723466497000000000
|
||||
slurm_jobs,host=hoth,job_id=23393,name=gridjob,source=slurm_primary.example.net command="/tmp/SLURM_job_script.IH19bN",cpus=2i,current_working_directory="/home/sessiondir/YdPODmVqBx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmSCKKDmrYDOwm",group_id=2005i,nice=50i,node_count=1i,nodes="naboo224",partition="atlas",priority=4294878336i,standard_error="/home/sessiondir/YdPODmVqBx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmSCKKDmrYDOwm.comment",standard_input="/dev/null",standard_output="/home/sessiondir/YdPODmVqBx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmSCKKDmrYDOwm.comment",start_time=1723379767i,state="RUNNING",state_reason="None",submit_time=1723379766i,tasks=1i,time_limit=3600i,tres_billing=1,tres_cpu=1,tres_mem=1000,tres_node=1 1723466497000000000
|
||||
slurm_nodes,host=hoth,name=naboo145,source=slurm_primary.example.net alloc_cpu=0i,alloc_memory=0i,architecture="x86_64",cores=18i,cpu_load=0i,cpus=36i,free_memory=86450i,real_memory=94791i,slurmd_version="22.05.9",state="idle",tres_billing=36,tres_cpu=36,tres_mem=94791,weight=1i 1723466497000000000
|
||||
slurm_nodes,host=hoth,name=naboo146,source=slurm_primary.example.net alloc_cpu=0i,alloc_memory=0i,architecture="x86_64",cores=18i,cpu_load=0i,cpus=36i,free_memory=92148i,real_memory=94791i,slurmd_version="22.05.9",state="idle",tres_billing=36,tres_cpu=36,tres_mem=94791,weight=1i 1723466497000000000
|
||||
slurm_nodes,host=hoth,name=naboo147,source=slurm_primary.example.net alloc_cpu=36i,alloc_memory=45000i,architecture="x86_64",cores=18i,cpu_load=3826i,cpus=36i,free_memory=1607i,real_memory=94793i,slurmd_version="22.05.9",state="allocated",tres_billing=36,tres_cpu=36,tres_mem=94793,tres_used_cpu=36,tres_used_mem=45000,weight=1i 1723466497000000000
|
||||
slurm_nodes,host=hoth,name=naboo216,source=slurm_primary.example.net alloc_cpu=8i,alloc_memory=8000i,architecture="x86_64",cores=4i,cpu_load=891i,cpus=8i,free_memory=17972i,real_memory=31877i,slurmd_version="22.05.9",state="allocated",tres_billing=8,tres_cpu=8,tres_mem=31877,tres_used_cpu=8,tres_used_mem=8000,weight=1i 1723466497000000000
|
||||
slurm_nodes,host=hoth,name=naboo219,source=slurm_primary.example.net alloc_cpu=16i,alloc_memory=16000i,architecture="x86_64",cores=4i,cpu_load=1382i,cpus=16i,free_memory=15645i,real_memory=31875i,slurmd_version="22.05.9",state="allocated",tres_billing=16,tres_cpu=16,tres_mem=31875,tres_used_cpu=16,tres_used_mem=16000,weight=1i 1723466497000000000
|
||||
slurm_partitions,host=hoth,name=atlas,source=slurm_primary.example.net nodes="naboo145,naboo146,naboo147,naboo216,naboo219,naboo222,naboo224,naboo225,naboo227,naboo228,naboo229,naboo234,naboo235,naboo236,naboo237,naboo238,naboo239,naboo240,naboo241,naboo242,naboo243",state="UP",total_cpu=632i,total_nodes=21i,tres_billing=632,tres_cpu=632,tres_mem=1415207,tres_node=21 1723466497000000000
|
||||
```
|
46
plugins/inputs/slurm/sample.conf
Normal file
46
plugins/inputs/slurm/sample.conf
Normal file
|
@ -0,0 +1,46 @@
|
|||
# Gather SLURM metrics
|
||||
[[inputs.slurm]]
|
||||
## Slurmrestd URL. Both http and https can be used as schemas.
|
||||
url = "http://127.0.0.1:6820"
|
||||
|
||||
## Credentials for JWT-based authentication.
|
||||
# username = "foo"
|
||||
# token = "topSecret"
|
||||
|
||||
## Enabled endpoints
|
||||
## List of endpoints a user can acquire data from.
|
||||
## Available values are: diag, jobs, nodes, partitions, reservations.
|
||||
# enabled_endpoints = ["diag", "jobs", "nodes", "partitions", "reservations"]
|
||||
|
||||
## Maximum time to receive a response. If set to 0s, the
|
||||
## request will not time out.
|
||||
# response_timeout = "5s"
|
||||
|
||||
## Optional TLS Config. Note these options will only
|
||||
## be taken into account when the scheme specififed on
|
||||
## the URL parameter is https. They will be silently
|
||||
## ignored otherwise.
|
||||
## Set to true/false to enforce TLS being enabled/disabled. If not set,
|
||||
## enable TLS only if any of the other options are specified.
|
||||
# tls_enable =
|
||||
## Trusted root certificates for server
|
||||
# tls_ca = "/path/to/cafile"
|
||||
## Used for TLS client certificate authentication
|
||||
# tls_cert = "/path/to/certfile"
|
||||
## Used for TLS client certificate authentication
|
||||
# tls_key = "/path/to/keyfile"
|
||||
## Password for the key file if it is encrypted
|
||||
# tls_key_pwd = ""
|
||||
## Send the specified TLS server name via SNI
|
||||
# tls_server_name = "kubernetes.example.com"
|
||||
## Minimal TLS version to accept by the client
|
||||
# tls_min_version = "TLS12"
|
||||
## List of ciphers to accept, by default all secure ciphers will be accepted
|
||||
## See https://pkg.go.dev/crypto/tls#pkg-constants for supported values.
|
||||
## Use "all", "secure" and "insecure" to add all support ciphers, secure
|
||||
## suites or insecure suites respectively.
|
||||
# tls_cipher_suites = ["secure"]
|
||||
## Renegotiation method, "never", "once" or "freely"
|
||||
# tls_renegotiation_method = "never"
|
||||
## Use TLS but skip chain & host verification
|
||||
# insecure_skip_verify = false
|
23
plugins/inputs/slurm/sample.conf.in
Normal file
23
plugins/inputs/slurm/sample.conf.in
Normal file
|
@ -0,0 +1,23 @@
|
|||
# Gather SLURM metrics
|
||||
[[inputs.slurm]]
|
||||
## Slurmrestd URL. Both http and https can be used as schemas.
|
||||
url = "http://127.0.0.1:6820"
|
||||
|
||||
## Credentials for JWT-based authentication.
|
||||
# username = "foo"
|
||||
# token = "topSecret"
|
||||
|
||||
## Enabled endpoints
|
||||
## List of endpoints a user can acquire data from.
|
||||
## Available values are: diag, jobs, nodes, partitions, reservations.
|
||||
# enabled_endpoints = ["diag", "jobs", "nodes", "partitions", "reservations"]
|
||||
|
||||
## Maximum time to receive a response. If set to 0s, the
|
||||
## request will not time out.
|
||||
# response_timeout = "5s"
|
||||
|
||||
## Optional TLS Config. Note these options will only
|
||||
## be taken into account when the scheme specififed on
|
||||
## the URL parameter is https. They will be silently
|
||||
## ignored otherwise.
|
||||
{{template "/plugins/common/tls/client.conf"}}
|
476
plugins/inputs/slurm/slurm.go
Normal file
476
plugins/inputs/slurm/slurm.go
Normal file
|
@ -0,0 +1,476 @@
|
|||
//go:generate ../../../tools/config_includer/generator
|
||||
//go:generate ../../../tools/readme_config_includer/generator
|
||||
package slurm
|
||||
|
||||
import (
|
||||
"context"
|
||||
_ "embed"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
goslurm "github.com/pcolladosoto/goslurm/v0038"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/config"
|
||||
"github.com/influxdata/telegraf/internal"
|
||||
"github.com/influxdata/telegraf/plugins/common/tls"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
)
|
||||
|
||||
//go:embed sample.conf
|
||||
var sampleConfig string
|
||||
|
||||
type Slurm struct {
|
||||
URL string `toml:"url"`
|
||||
Username string `toml:"username"`
|
||||
Token string `toml:"token"`
|
||||
EnabledEndpoints []string `toml:"enabled_endpoints"`
|
||||
ResponseTimeout config.Duration `toml:"response_timeout"`
|
||||
Log telegraf.Logger `toml:"-"`
|
||||
tls.ClientConfig
|
||||
|
||||
client *goslurm.APIClient
|
||||
baseURL *url.URL
|
||||
endpointMap map[string]bool
|
||||
}
|
||||
|
||||
func (*Slurm) SampleConfig() string {
|
||||
return sampleConfig
|
||||
}
|
||||
|
||||
func (s *Slurm) Init() error {
|
||||
if len(s.EnabledEndpoints) == 0 {
|
||||
s.EnabledEndpoints = []string{"diag", "jobs", "nodes", "partitions", "reservations"}
|
||||
}
|
||||
|
||||
s.endpointMap = make(map[string]bool, len(s.EnabledEndpoints))
|
||||
for _, endpoint := range s.EnabledEndpoints {
|
||||
switch e := strings.ToLower(endpoint); e {
|
||||
case "diag", "jobs", "nodes", "partitions", "reservations":
|
||||
s.endpointMap[e] = true
|
||||
default:
|
||||
return fmt.Errorf("unknown endpoint %q", endpoint)
|
||||
}
|
||||
}
|
||||
|
||||
if s.URL == "" {
|
||||
return errors.New("empty URL provided")
|
||||
}
|
||||
|
||||
u, err := url.Parse(s.URL)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if u.Hostname() == "" {
|
||||
return fmt.Errorf("empty hostname for url %q", s.URL)
|
||||
}
|
||||
|
||||
s.baseURL = u
|
||||
|
||||
if u.Scheme != "http" && u.Scheme != "https" {
|
||||
return fmt.Errorf("invalid scheme %q", u.Scheme)
|
||||
}
|
||||
|
||||
tlsCfg, err := s.ClientConfig.TLSConfig()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if u.Scheme == "http" && tlsCfg != nil {
|
||||
s.Log.Warn("non-empty TLS configuration for a URL with an http scheme. Ignoring it...")
|
||||
tlsCfg = nil
|
||||
}
|
||||
|
||||
configuration := goslurm.NewConfiguration()
|
||||
configuration.Host = u.Host
|
||||
configuration.Scheme = u.Scheme
|
||||
configuration.UserAgent = internal.ProductToken()
|
||||
configuration.HTTPClient = &http.Client{
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: tlsCfg,
|
||||
},
|
||||
Timeout: time.Duration(s.ResponseTimeout),
|
||||
}
|
||||
|
||||
s.client = goslurm.NewAPIClient(configuration)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Slurm) Gather(acc telegraf.Accumulator) (err error) {
|
||||
auth := context.WithValue(
|
||||
context.Background(),
|
||||
goslurm.ContextAPIKeys,
|
||||
map[string]goslurm.APIKey{
|
||||
"user": {Key: s.Username},
|
||||
"token": {Key: s.Token},
|
||||
},
|
||||
)
|
||||
|
||||
if s.endpointMap["diag"] {
|
||||
diagResp, respRaw, err := s.client.SlurmAPI.SlurmV0038Diag(auth).Execute()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting diag: %w", err)
|
||||
}
|
||||
if diag, ok := diagResp.GetStatisticsOk(); ok {
|
||||
s.gatherDiagMetrics(acc, diag)
|
||||
}
|
||||
respRaw.Body.Close()
|
||||
}
|
||||
|
||||
if s.endpointMap["jobs"] {
|
||||
jobsResp, respRaw, err := s.client.SlurmAPI.SlurmV0038GetJobs(auth).Execute()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting jobs: %w", err)
|
||||
}
|
||||
if jobs, ok := jobsResp.GetJobsOk(); ok {
|
||||
s.gatherJobsMetrics(acc, jobs)
|
||||
}
|
||||
respRaw.Body.Close()
|
||||
}
|
||||
|
||||
if s.endpointMap["nodes"] {
|
||||
nodesResp, respRaw, err := s.client.SlurmAPI.SlurmV0038GetNodes(auth).Execute()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting nodes: %w", err)
|
||||
}
|
||||
if nodes, ok := nodesResp.GetNodesOk(); ok {
|
||||
s.gatherNodesMetrics(acc, nodes)
|
||||
}
|
||||
respRaw.Body.Close()
|
||||
}
|
||||
|
||||
if s.endpointMap["partitions"] {
|
||||
partitionsResp, respRaw, err := s.client.SlurmAPI.SlurmV0038GetPartitions(auth).Execute()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting partitions: %w", err)
|
||||
}
|
||||
if partitions, ok := partitionsResp.GetPartitionsOk(); ok {
|
||||
s.gatherPartitionsMetrics(acc, partitions)
|
||||
}
|
||||
respRaw.Body.Close()
|
||||
}
|
||||
|
||||
if s.endpointMap["reservations"] {
|
||||
reservationsResp, respRaw, err := s.client.SlurmAPI.SlurmV0038GetReservations(auth).Execute()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting reservations: %w", err)
|
||||
}
|
||||
if reservations, ok := reservationsResp.GetReservationsOk(); ok {
|
||||
s.gatherReservationsMetrics(acc, reservations)
|
||||
}
|
||||
respRaw.Body.Close()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func parseTres(tres string) map[string]interface{} {
|
||||
tresKVs := strings.Split(tres, ",")
|
||||
parsedValues := make(map[string]interface{}, len(tresKVs))
|
||||
|
||||
for _, tresVal := range tresKVs {
|
||||
parsedTresVal := strings.Split(tresVal, "=")
|
||||
if len(parsedTresVal) != 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
tag := parsedTresVal[0]
|
||||
val := parsedTresVal[1]
|
||||
var factor float64 = 1
|
||||
|
||||
if tag == "mem" {
|
||||
var ok bool
|
||||
factor, ok = map[string]float64{
|
||||
"K": 1.0 / 1024.0,
|
||||
"M": 1,
|
||||
"G": 1024,
|
||||
"T": 1024 * 1024,
|
||||
"P": 1024 * 1024 * 1024,
|
||||
}[strings.ToUpper(val[len(val)-1:])]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
val = val[:len(val)-1]
|
||||
}
|
||||
|
||||
parsedFloat, err := strconv.ParseFloat(val, 64)
|
||||
if err == nil {
|
||||
parsedValues[tag] = parsedFloat * factor
|
||||
continue
|
||||
}
|
||||
parsedValues[tag] = val
|
||||
}
|
||||
|
||||
return parsedValues
|
||||
}
|
||||
|
||||
func (s *Slurm) gatherDiagMetrics(acc telegraf.Accumulator, diag *goslurm.V0038DiagStatistics) {
|
||||
records := make(map[string]interface{}, 13)
|
||||
tags := map[string]string{"source": s.baseURL.Hostname()}
|
||||
|
||||
if int32Ptr, ok := diag.GetServerThreadCountOk(); ok {
|
||||
records["server_thread_count"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := diag.GetJobsCanceledOk(); ok {
|
||||
records["jobs_canceled"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := diag.GetJobsSubmittedOk(); ok {
|
||||
records["jobs_submitted"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := diag.GetJobsStartedOk(); ok {
|
||||
records["jobs_started"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := diag.GetJobsCompletedOk(); ok {
|
||||
records["jobs_completed"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := diag.GetJobsFailedOk(); ok {
|
||||
records["jobs_failed"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := diag.GetJobsPendingOk(); ok {
|
||||
records["jobs_pending"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := diag.GetJobsRunningOk(); ok {
|
||||
records["jobs_running"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := diag.GetScheduleCycleLastOk(); ok {
|
||||
records["schedule_cycle_last"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := diag.GetScheduleCycleMeanOk(); ok {
|
||||
records["schedule_cycle_mean"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := diag.GetBfQueueLenOk(); ok {
|
||||
records["bf_queue_len"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := diag.GetBfQueueLenMeanOk(); ok {
|
||||
records["bf_queue_len_mean"] = *int32Ptr
|
||||
}
|
||||
if boolPtr, ok := diag.GetBfActiveOk(); ok {
|
||||
records["bf_active"] = *boolPtr
|
||||
}
|
||||
|
||||
acc.AddFields("slurm_diag", records, tags)
|
||||
}
|
||||
|
||||
func (s *Slurm) gatherJobsMetrics(acc telegraf.Accumulator, jobs []goslurm.V0038JobResponseProperties) {
|
||||
for i := range jobs {
|
||||
records := make(map[string]interface{}, 19)
|
||||
tags := make(map[string]string, 3)
|
||||
|
||||
tags["source"] = s.baseURL.Hostname()
|
||||
if strPtr, ok := jobs[i].GetNameOk(); ok {
|
||||
tags["name"] = *strPtr
|
||||
}
|
||||
if int32Ptr, ok := jobs[i].GetJobIdOk(); ok {
|
||||
tags["job_id"] = strconv.Itoa(int(*int32Ptr))
|
||||
}
|
||||
|
||||
if strPtr, ok := jobs[i].GetJobStateOk(); ok {
|
||||
records["state"] = *strPtr
|
||||
}
|
||||
if strPtr, ok := jobs[i].GetStateReasonOk(); ok {
|
||||
records["state_reason"] = *strPtr
|
||||
}
|
||||
if strPtr, ok := jobs[i].GetPartitionOk(); ok {
|
||||
records["partition"] = *strPtr
|
||||
}
|
||||
if strPtr, ok := jobs[i].GetNodesOk(); ok {
|
||||
records["nodes"] = *strPtr
|
||||
}
|
||||
if int32Ptr, ok := jobs[i].GetNodeCountOk(); ok {
|
||||
records["node_count"] = *int32Ptr
|
||||
}
|
||||
if int64Ptr, ok := jobs[i].GetPriorityOk(); ok {
|
||||
records["priority"] = *int64Ptr
|
||||
}
|
||||
if int32Ptr, ok := jobs[i].GetNiceOk(); ok {
|
||||
records["nice"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := jobs[i].GetGroupIdOk(); ok {
|
||||
records["group_id"] = *int32Ptr
|
||||
}
|
||||
if strPtr, ok := jobs[i].GetCommandOk(); ok {
|
||||
records["command"] = *strPtr
|
||||
}
|
||||
if strPtr, ok := jobs[i].GetStandardOutputOk(); ok {
|
||||
records["standard_output"] = strings.ReplaceAll(*strPtr, "\\", "")
|
||||
}
|
||||
if strPtr, ok := jobs[i].GetStandardErrorOk(); ok {
|
||||
records["standard_error"] = strings.ReplaceAll(*strPtr, "\\", "")
|
||||
}
|
||||
if strPtr, ok := jobs[i].GetStandardInputOk(); ok {
|
||||
records["standard_input"] = strings.ReplaceAll(*strPtr, "\\", "")
|
||||
}
|
||||
if strPtr, ok := jobs[i].GetCurrentWorkingDirectoryOk(); ok {
|
||||
records["current_working_directory"] = strings.ReplaceAll(*strPtr, "\\", "")
|
||||
}
|
||||
if int64Ptr, ok := jobs[i].GetSubmitTimeOk(); ok {
|
||||
records["submit_time"] = *int64Ptr
|
||||
}
|
||||
if int64Ptr, ok := jobs[i].GetStartTimeOk(); ok {
|
||||
records["start_time"] = *int64Ptr
|
||||
}
|
||||
if int32Ptr, ok := jobs[i].GetCpusOk(); ok {
|
||||
records["cpus"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := jobs[i].GetTasksOk(); ok {
|
||||
records["tasks"] = *int32Ptr
|
||||
}
|
||||
if int64Ptr, ok := jobs[i].GetTimeLimitOk(); ok {
|
||||
records["time_limit"] = *int64Ptr
|
||||
}
|
||||
if strPtr, ok := jobs[i].GetTresReqStrOk(); ok {
|
||||
for k, v := range parseTres(*strPtr) {
|
||||
records["tres_"+k] = v
|
||||
}
|
||||
}
|
||||
|
||||
acc.AddFields("slurm_jobs", records, tags)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Slurm) gatherNodesMetrics(acc telegraf.Accumulator, nodes []goslurm.V0038Node) {
|
||||
for _, node := range nodes {
|
||||
records := make(map[string]interface{}, 13)
|
||||
tags := make(map[string]string, 2)
|
||||
|
||||
tags["source"] = s.baseURL.Hostname()
|
||||
if strPtr, ok := node.GetNameOk(); ok {
|
||||
tags["name"] = *strPtr
|
||||
}
|
||||
|
||||
if strPtr, ok := node.GetStateOk(); ok {
|
||||
records["state"] = *strPtr
|
||||
}
|
||||
if int32Ptr, ok := node.GetCoresOk(); ok {
|
||||
records["cores"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := node.GetCpusOk(); ok {
|
||||
records["cpus"] = *int32Ptr
|
||||
}
|
||||
if int64Ptr, ok := node.GetCpuLoadOk(); ok {
|
||||
records["cpu_load"] = *int64Ptr
|
||||
}
|
||||
if int64Ptr, ok := node.GetAllocCpusOk(); ok {
|
||||
records["alloc_cpu"] = *int64Ptr
|
||||
}
|
||||
if int32Ptr, ok := node.GetRealMemoryOk(); ok {
|
||||
records["real_memory"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := node.GetFreeMemoryOk(); ok {
|
||||
records["free_memory"] = *int32Ptr
|
||||
}
|
||||
if int64Ptr, ok := node.GetAllocMemoryOk(); ok {
|
||||
records["alloc_memory"] = *int64Ptr
|
||||
}
|
||||
if strPtr, ok := node.GetTresOk(); ok {
|
||||
for k, v := range parseTres(*strPtr) {
|
||||
records["tres_"+k] = v
|
||||
}
|
||||
}
|
||||
if strPtr, ok := node.GetTresUsedOk(); ok {
|
||||
for k, v := range parseTres(*strPtr) {
|
||||
records["tres_used_"+k] = v
|
||||
}
|
||||
}
|
||||
if int32Ptr, ok := node.GetWeightOk(); ok {
|
||||
records["weight"] = *int32Ptr
|
||||
}
|
||||
if strPtr, ok := node.GetSlurmdVersionOk(); ok {
|
||||
records["slurmd_version"] = *strPtr
|
||||
}
|
||||
if strPtr, ok := node.GetArchitectureOk(); ok {
|
||||
records["architecture"] = *strPtr
|
||||
}
|
||||
|
||||
acc.AddFields("slurm_nodes", records, tags)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Slurm) gatherPartitionsMetrics(acc telegraf.Accumulator, partitions []goslurm.V0038Partition) {
|
||||
for _, partition := range partitions {
|
||||
records := make(map[string]interface{}, 5)
|
||||
tags := make(map[string]string, 2)
|
||||
|
||||
tags["source"] = s.baseURL.Hostname()
|
||||
if strPtr, ok := partition.GetNameOk(); ok {
|
||||
tags["name"] = *strPtr
|
||||
}
|
||||
|
||||
if strPtr, ok := partition.GetStateOk(); ok {
|
||||
records["state"] = *strPtr
|
||||
}
|
||||
if int32Ptr, ok := partition.GetTotalCpusOk(); ok {
|
||||
records["total_cpu"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := partition.GetTotalNodesOk(); ok {
|
||||
records["total_nodes"] = *int32Ptr
|
||||
}
|
||||
if strPtr, ok := partition.GetNodesOk(); ok {
|
||||
records["nodes"] = *strPtr
|
||||
}
|
||||
if strPtr, ok := partition.GetTresOk(); ok {
|
||||
for k, v := range parseTres(*strPtr) {
|
||||
records["tres_"+k] = v
|
||||
}
|
||||
}
|
||||
|
||||
acc.AddFields("slurm_partitions", records, tags)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Slurm) gatherReservationsMetrics(acc telegraf.Accumulator, reservations []goslurm.V0038Reservation) {
|
||||
for _, reservation := range reservations {
|
||||
records := make(map[string]interface{}, 9)
|
||||
tags := make(map[string]string, 2)
|
||||
|
||||
tags["source"] = s.baseURL.Hostname()
|
||||
if strPtr, ok := reservation.GetNameOk(); ok {
|
||||
tags["name"] = *strPtr
|
||||
}
|
||||
|
||||
if int32Ptr, ok := reservation.GetCoreCountOk(); ok {
|
||||
records["core_count"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := reservation.GetCoreSpecCntOk(); ok {
|
||||
records["core_spec_count"] = *int32Ptr
|
||||
}
|
||||
if strPtr, ok := reservation.GetGroupsOk(); ok {
|
||||
records["groups"] = *strPtr
|
||||
}
|
||||
if strPtr, ok := reservation.GetUsersOk(); ok {
|
||||
records["users"] = *strPtr
|
||||
}
|
||||
if int32Ptr, ok := reservation.GetStartTimeOk(); ok {
|
||||
records["start_time"] = *int32Ptr
|
||||
}
|
||||
if strPtr, ok := reservation.GetPartitionOk(); ok {
|
||||
records["partition"] = *strPtr
|
||||
}
|
||||
if strPtr, ok := reservation.GetAccountsOk(); ok {
|
||||
records["accounts"] = *strPtr
|
||||
}
|
||||
if int32Ptr, ok := reservation.GetNodeCountOk(); ok {
|
||||
records["node_count"] = *int32Ptr
|
||||
}
|
||||
if strPtr, ok := reservation.GetNodeListOk(); ok {
|
||||
records["node_list"] = *strPtr
|
||||
}
|
||||
|
||||
acc.AddFields("slurm_reservations", records, tags)
|
||||
}
|
||||
}
|
||||
|
||||
func init() {
|
||||
inputs.Add("slurm", func() telegraf.Input {
|
||||
return &Slurm{
|
||||
ResponseTimeout: config.Duration(5 * time.Second),
|
||||
}
|
||||
})
|
||||
}
|
161
plugins/inputs/slurm/slurm_test.go
Normal file
161
plugins/inputs/slurm/slurm_test.go
Normal file
|
@ -0,0 +1,161 @@
|
|||
package slurm
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/config"
|
||||
"github.com/influxdata/telegraf/plugins/parsers/influx"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
)
|
||||
|
||||
func TestGoodURLs(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
url string
|
||||
}{
|
||||
{"http", "http://example.com:6820"},
|
||||
{"https", "https://example.com:6820"},
|
||||
{"http no port", "http://example.com"},
|
||||
{"https no port", "https://example.com"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
plugin := Slurm{
|
||||
URL: tt.url,
|
||||
}
|
||||
require.NoError(t, plugin.Init())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestWrongURLs(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
url string
|
||||
}{
|
||||
{"wrong http scheme", "httpp://example.com:6820"},
|
||||
{"wrong https scheme", "httpss://example.com:6820"},
|
||||
{"empty url", ""},
|
||||
{"empty hostname", "http://:6820"},
|
||||
{"only scheme", "http://"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
plugin := Slurm{
|
||||
URL: tt.url,
|
||||
}
|
||||
require.Error(t, plugin.Init())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestWrongEndpoints(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
enabledEndpoints []string
|
||||
}{
|
||||
{"empty endpoint", []string{"diag", "", "jobs"}},
|
||||
{"mistyped endpoint", []string{"diagg", "jobs", "partitions"}},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
plugin := Slurm{
|
||||
URL: "http://example.net",
|
||||
EnabledEndpoints: tt.enabledEndpoints,
|
||||
}
|
||||
require.Error(t, plugin.Init())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCases(t *testing.T) {
|
||||
entries, err := os.ReadDir("testcases")
|
||||
require.NoError(t, err)
|
||||
|
||||
for _, entry := range entries {
|
||||
if !entry.IsDir() {
|
||||
continue
|
||||
}
|
||||
|
||||
t.Run(entry.Name(), func(t *testing.T) {
|
||||
testcasePath := filepath.Join("testcases", entry.Name())
|
||||
responsesPath := filepath.Join(testcasePath, "responses")
|
||||
expectedFilename := filepath.Join(testcasePath, "expected.out")
|
||||
configFilename := filepath.Join(testcasePath, "telegraf.conf")
|
||||
|
||||
responses, err := os.ReadDir(responsesPath)
|
||||
require.NoError(t, err)
|
||||
|
||||
pathToResponse := map[string][]byte{}
|
||||
for _, response := range responses {
|
||||
if response.IsDir() {
|
||||
continue
|
||||
}
|
||||
fName := response.Name()
|
||||
buf, err := os.ReadFile(filepath.Join(responsesPath, fName))
|
||||
require.NoError(t, err)
|
||||
pathToResponse[strings.TrimSuffix(fName, filepath.Ext(fName))] = buf
|
||||
}
|
||||
|
||||
// Prepare the influx parser for expectations
|
||||
parser := &influx.Parser{}
|
||||
require.NoError(t, parser.Init())
|
||||
|
||||
// Read expected values, if any
|
||||
var expected []telegraf.Metric
|
||||
if _, err := os.Stat(expectedFilename); err == nil {
|
||||
var err error
|
||||
expected, err = testutil.ParseMetricsFromFile(expectedFilename, parser)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
ts := httptest.NewServer(http.NotFoundHandler())
|
||||
defer ts.Close()
|
||||
|
||||
ts.Config.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
resp, ok := pathToResponse[strings.TrimPrefix(r.URL.Path, "/slurm/v0.0.38/")]
|
||||
if !ok {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
t.Errorf("Expected to have path to response: %s", r.URL.Path)
|
||||
return
|
||||
}
|
||||
w.Header().Add("Content-Type", "application/json")
|
||||
|
||||
if _, err := w.Write(resp); err != nil {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusOK)
|
||||
})
|
||||
|
||||
// Load the test-specific configuration
|
||||
cfg := config.NewConfig()
|
||||
cfg.Agent.Quiet = true
|
||||
require.NoError(t, cfg.LoadConfig(configFilename))
|
||||
require.Len(t, cfg.Inputs, 1)
|
||||
|
||||
// Instantiate the plugin. As seen on NewConfig's documentation,
|
||||
// parsing the configuration will instantiate the plugins, so that
|
||||
// we only need to assert the plugin's type!
|
||||
plugin := cfg.Inputs[0].Input.(*Slurm)
|
||||
plugin.URL = "http://" + ts.Listener.Addr().String()
|
||||
plugin.Log = testutil.Logger{}
|
||||
require.NoError(t, plugin.Init())
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, plugin.Gather(&acc))
|
||||
|
||||
actual := acc.GetTelegrafMetrics()
|
||||
testutil.RequireMetricsEqual(t, expected, actual, testutil.SortMetrics(), testutil.IgnoreTime())
|
||||
})
|
||||
}
|
||||
}
|
11
plugins/inputs/slurm/testcases/gather/expected.out
Normal file
11
plugins/inputs/slurm/testcases/gather/expected.out
Normal file
|
@ -0,0 +1,11 @@
|
|||
slurm_diag,source=127.0.0.1 bf_active=false,bf_queue_len=1i,bf_queue_len_mean=1i,jobs_canceled=0i,jobs_completed=287i,jobs_failed=1i,jobs_pending=0i,jobs_running=100i,jobs_started=287i,jobs_submitted=287i,schedule_cycle_last=298i,schedule_cycle_mean=137i,server_thread_count=3i 1723464650000000000
|
||||
|
||||
slurm_jobs,job_id=20464,name=gridjob,source=127.0.0.1 command="/tmp/SLURM_job_script.OjQEIH",cpus=2i,current_working_directory="/home/sessiondir/zv6NDmqNcv5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmXSJKDmFRYcQm",group_id=2005i,nice=50i,node_count=1i,nodes="naboo222",partition="atlas",priority=4294881265i,standard_error="/home/sessiondir/zv6NDmqNcv5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmXSJKDmFRYcQm.comment",standard_input="/dev/null",standard_output="/home/sessiondir/zv6NDmqNcv5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmXSJKDmFRYcQm.comment",start_time=1722989851i,state="RUNNING",state_reason="None",submit_time=1722989851i,tasks=1i,time_limit=3600i,tres_billing=1,tres_cpu=1,tres_mem=2000,tres_node=1 1723464650000000000
|
||||
slurm_jobs,job_id=20468,name=gridjob,source=127.0.0.1 command="/tmp/SLURM_job_script.XTwtdj",cpus=2i,current_working_directory="/home/sessiondir/ljvLDmQccv5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmcSJKDmor4c2n",group_id=2005i,nice=50i,node_count=1i,nodes="naboo222",partition="atlas",priority=4294881261i,standard_error="/home/sessiondir/ljvLDmQccv5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmcSJKDmor4c2n.comment",standard_input="/dev/null",standard_output="/home/sessiondir/ljvLDmQccv5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmcSJKDmor4c2n.comment",start_time=1722990772i,state="RUNNING",state_reason="None",submit_time=1722990772i,tasks=1i,time_limit=3600i,tres_billing=1,tres_cpu=1,tres_mem=2000,tres_node=1 1723464650000000000
|
||||
slurm_jobs,job_id=23772,name=gridjob,source=127.0.0.1 command="/tmp/SLURM_job_script.8PMmVe",cpus=8i,current_working_directory="/home/sessiondir/nN8KDmNMPx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmeIKKDml0xJjm",group_id=2005i,nice=50i,node_count=1i,nodes="naboo147",partition="atlas",priority=4294877957i,standard_error="/home/sessiondir/nN8KDmNMPx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmeIKKDml0xJjm.comment",standard_input="/dev/null",standard_output="/home/sessiondir/nN8KDmNMPx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmeIKKDml0xJjm.comment",start_time=1723457333i,state="COMPLETED",state_reason="None",submit_time=1723457333i,tasks=8i,time_limit=3600i,tres_billing=8,tres_cpu=8,tres_mem=16000,tres_node=1 1723464650000000000
|
||||
|
||||
slurm_nodes,name=naboo145,source=127.0.0.1 alloc_cpu=0i,alloc_memory=0i,architecture="x86_64",cores=18i,cpu_load=27i,cpus=36i,free_memory=86423i,real_memory=94791i,slurmd_version="22.05.9",state="idle",tres_billing=36,tres_cpu=36,tres_mem=94791,weight=1i 1723464650000000000
|
||||
slurm_nodes,name=naboo146,source=127.0.0.1 alloc_cpu=0i,alloc_memory=0i,architecture="x86_64",cores=18i,cpu_load=0i,cpus=36i,free_memory=92151i,real_memory=94791i,slurmd_version="22.05.9",state="idle",tres_billing=36,tres_cpu=36,tres_mem=94791,weight=1i 1723464650000000000
|
||||
slurm_nodes,name=naboo147,source=127.0.0.1 alloc_cpu=36i,alloc_memory=56000i,architecture="x86_64",cores=18i,cpu_load=2969i,cpus=36i,free_memory=10908i,real_memory=94793i,slurmd_version="22.05.9",state="allocated",tres_billing=36,tres_cpu=36,tres_mem=94793,tres_used_cpu=36,tres_used_mem=56000,weight=1i 1723464650000000000
|
||||
|
||||
slurm_partitions,name=atlas,source=127.0.0.1 nodes="naboo145,naboo146,naboo147,naboo216,naboo219,naboo222,naboo224,naboo225,naboo227,naboo228,naboo229,naboo234,naboo235,naboo236,naboo237,naboo238,naboo239,naboo240,naboo241,naboo242,naboo243",state="UP",total_cpu=632i,total_nodes=21i,tres_billing=632,tres_cpu=632,tres_mem=1415207,tres_node=21 1723464650000000000
|
224
plugins/inputs/slurm/testcases/gather/responses/diag.json
Normal file
224
plugins/inputs/slurm/testcases/gather/responses/diag.json
Normal file
|
@ -0,0 +1,224 @@
|
|||
{
|
||||
"meta": {
|
||||
"plugin": {
|
||||
"type": "openapi\/v0.0.38",
|
||||
"name": "Slurm OpenAPI v0.0.38"
|
||||
},
|
||||
"Slurm": {
|
||||
"version": {
|
||||
"major": 22,
|
||||
"micro": 9,
|
||||
"minor": 5
|
||||
},
|
||||
"release": "22.05.9"
|
||||
}
|
||||
},
|
||||
"errors": [
|
||||
],
|
||||
"statistics": {
|
||||
"rpcs_by_message_type": [
|
||||
{
|
||||
"message_type": "REQUEST_JOB_INFO",
|
||||
"type_id": 2003,
|
||||
"count": 73587,
|
||||
"average_time": 658,
|
||||
"total_time": 48479000
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_PARTITION_INFO",
|
||||
"type_id": 2009,
|
||||
"count": 158967,
|
||||
"average_time": 101,
|
||||
"total_time": 16185440
|
||||
},
|
||||
{
|
||||
"message_type": "MESSAGE_NODE_REGISTRATION_STATUS",
|
||||
"type_id": 1002,
|
||||
"count": 18690,
|
||||
"average_time": 137,
|
||||
"total_time": 2566758
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_COMPLETE_BATCH_SCRIPT",
|
||||
"type_id": 5018,
|
||||
"count": 12233,
|
||||
"average_time": 486,
|
||||
"total_time": 5946490
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_AUTH_TOKEN",
|
||||
"type_id": 5039,
|
||||
"count": 36,
|
||||
"average_time": 291,
|
||||
"total_time": 10489
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_BUILD_INFO",
|
||||
"type_id": 2001,
|
||||
"count": 28201,
|
||||
"average_time": 194,
|
||||
"total_time": 5486061
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_PING",
|
||||
"type_id": 1008,
|
||||
"count": 28201,
|
||||
"average_time": 103,
|
||||
"total_time": 2925195
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_NODE_INFO",
|
||||
"type_id": 2007,
|
||||
"count": 85379,
|
||||
"average_time": 175,
|
||||
"total_time": 15007960
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_FED_INFO",
|
||||
"type_id": 2049,
|
||||
"count": 24466,
|
||||
"average_time": 109,
|
||||
"total_time": 2681655
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_JOB_INFO_SINGLE",
|
||||
"type_id": 2021,
|
||||
"count": 24466,
|
||||
"average_time": 121,
|
||||
"total_time": 2963320
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_SUBMIT_BATCH_JOB",
|
||||
"type_id": 4003,
|
||||
"count": 12233,
|
||||
"average_time": 6504,
|
||||
"total_time": 79574600
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_STATS_INFO",
|
||||
"type_id": 2035,
|
||||
"count": 1040,
|
||||
"average_time": 61,
|
||||
"total_time": 64431
|
||||
},
|
||||
{
|
||||
"message_type": "MESSAGE_EPILOG_COMPLETE",
|
||||
"type_id": 6012,
|
||||
"count": 40,
|
||||
"average_time": 86,
|
||||
"total_time": 3455
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_RESERVATION_INFO",
|
||||
"type_id": 2024,
|
||||
"count": 1017,
|
||||
"average_time": 47,
|
||||
"total_time": 48788
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_LICENSE_INFO",
|
||||
"type_id": 1021,
|
||||
"count": 42,
|
||||
"average_time": 43,
|
||||
"total_time": 1823
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_UPDATE_NODE",
|
||||
"type_id": 3002,
|
||||
"count": 2,
|
||||
"average_time": 415,
|
||||
"total_time": 830
|
||||
}
|
||||
],
|
||||
"rpcs_by_user": [
|
||||
{
|
||||
"user": "root",
|
||||
"user_id": 0,
|
||||
"count": 456365,
|
||||
"average_time": 224,
|
||||
"total_time": 102371523
|
||||
},
|
||||
{
|
||||
"user": "atl001",
|
||||
"user_id": 2006,
|
||||
"count": 11699,
|
||||
"average_time": 6611,
|
||||
"total_time": 77353396
|
||||
},
|
||||
{
|
||||
"user": "atl002",
|
||||
"user_id": 2007,
|
||||
"count": 120,
|
||||
"average_time": 3684,
|
||||
"total_time": 442106
|
||||
},
|
||||
{
|
||||
"user": "ops001",
|
||||
"user_id": 18006,
|
||||
"count": 298,
|
||||
"average_time": 4447,
|
||||
"total_time": 1325496
|
||||
},
|
||||
{
|
||||
"user": "ops003",
|
||||
"user_id": 18008,
|
||||
"count": 58,
|
||||
"average_time": 3732,
|
||||
"total_time": 216488
|
||||
},
|
||||
{
|
||||
"user": "ops002",
|
||||
"user_id": 18007,
|
||||
"count": 58,
|
||||
"average_time": 4088,
|
||||
"total_time": 237114
|
||||
},
|
||||
{
|
||||
"user": "99",
|
||||
"user_id": 99,
|
||||
"count": 2,
|
||||
"average_time": 86,
|
||||
"total_time": 172
|
||||
}
|
||||
],
|
||||
"parts_packed": 1,
|
||||
"req_time": 1723103198,
|
||||
"req_time_start": 1723075200,
|
||||
"server_thread_count": 3,
|
||||
"agent_queue_size": 0,
|
||||
"agent_count": 0,
|
||||
"agent_thread_count": 0,
|
||||
"dbd_agent_queue_size": 0,
|
||||
"gettimeofday_latency": 21,
|
||||
"schedule_cycle_max": 1116,
|
||||
"schedule_cycle_last": 298,
|
||||
"schedule_cycle_total": 960,
|
||||
"schedule_cycle_mean": 137,
|
||||
"schedule_cycle_mean_depth": 0,
|
||||
"schedule_cycle_per_minute": 2,
|
||||
"schedule_queue_length": 1,
|
||||
"jobs_submitted": 287,
|
||||
"jobs_started": 287,
|
||||
"jobs_completed": 287,
|
||||
"jobs_canceled": 0,
|
||||
"jobs_failed": 1,
|
||||
"jobs_pending": 0,
|
||||
"jobs_running": 100,
|
||||
"job_states_ts": 1723103172,
|
||||
"bf_backfilled_jobs": 1626,
|
||||
"bf_last_backfilled_jobs": 14,
|
||||
"bf_backfilled_het_jobs": 0,
|
||||
"bf_cycle_counter": 12,
|
||||
"bf_cycle_mean": 440,
|
||||
"bf_depth_mean": 1,
|
||||
"bf_depth_mean_try": 1,
|
||||
"bf_cycle_last": 387,
|
||||
"bf_cycle_max": 811,
|
||||
"bf_queue_len": 1,
|
||||
"bf_queue_len_mean": 1,
|
||||
"bf_table_size": 1,
|
||||
"bf_table_size_mean": 1,
|
||||
"bf_when_last_cycle": 1723102514,
|
||||
"bf_active": false
|
||||
}
|
||||
}
|
448
plugins/inputs/slurm/testcases/gather/responses/jobs.json
Normal file
448
plugins/inputs/slurm/testcases/gather/responses/jobs.json
Normal file
|
@ -0,0 +1,448 @@
|
|||
{
|
||||
"meta": {
|
||||
"plugin": {
|
||||
"type": "openapi\/v0.0.38",
|
||||
"name": "Slurm OpenAPI v0.0.38"
|
||||
},
|
||||
"Slurm": {
|
||||
"version": {
|
||||
"major": 22,
|
||||
"micro": 9,
|
||||
"minor": 5
|
||||
},
|
||||
"release": "22.05.9"
|
||||
}
|
||||
},
|
||||
"errors": [
|
||||
],
|
||||
"jobs": [
|
||||
{
|
||||
"account": "",
|
||||
"accrue_time": 1722989851,
|
||||
"admin_comment": "",
|
||||
"array_job_id": 0,
|
||||
"array_task_id": null,
|
||||
"array_max_tasks": 0,
|
||||
"array_task_string": "",
|
||||
"association_id": 0,
|
||||
"batch_features": "",
|
||||
"batch_flag": true,
|
||||
"batch_host": "naboo222",
|
||||
"flags": [
|
||||
"JOB_WAS_RUNNING",
|
||||
"JOB_MEM_SET"
|
||||
],
|
||||
"burst_buffer": "",
|
||||
"burst_buffer_state": "",
|
||||
"cluster": "local",
|
||||
"cluster_features": "",
|
||||
"command": "\/tmp\/SLURM_job_script.OjQEIH",
|
||||
"comment": "",
|
||||
"container": "",
|
||||
"contiguous": false,
|
||||
"core_spec": null,
|
||||
"thread_spec": null,
|
||||
"cores_per_socket": null,
|
||||
"billable_tres": 2.0,
|
||||
"cpus_per_task": null,
|
||||
"cpu_frequency_minimum": null,
|
||||
"cpu_frequency_maximum": null,
|
||||
"cpu_frequency_governor": null,
|
||||
"cpus_per_tres": "",
|
||||
"deadline": 0,
|
||||
"delay_boot": 0,
|
||||
"dependency": "",
|
||||
"derived_exit_code": 0,
|
||||
"eligible_time": 1722989851,
|
||||
"end_time": 1723205851,
|
||||
"excluded_nodes": "",
|
||||
"exit_code": 0,
|
||||
"features": "",
|
||||
"federation_origin": "",
|
||||
"federation_siblings_active": "",
|
||||
"federation_siblings_viable": "",
|
||||
"gres_detail": [
|
||||
],
|
||||
"group_id": 2005,
|
||||
"group_name": "atlas",
|
||||
"job_id": 20464,
|
||||
"job_resources": {
|
||||
"nodes": "naboo222",
|
||||
"allocated_hosts": 1,
|
||||
"allocated_nodes": [
|
||||
{
|
||||
"sockets": {
|
||||
"0": {
|
||||
"cores": {
|
||||
"0": "allocated"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nodename": "naboo222",
|
||||
"cpus_used": 0,
|
||||
"memory_used": 0,
|
||||
"memory_allocated": 4000
|
||||
}
|
||||
]
|
||||
},
|
||||
"job_state": "RUNNING",
|
||||
"last_sched_evaluation": 1722989851,
|
||||
"licenses": "",
|
||||
"max_cpus": 0,
|
||||
"max_nodes": 0,
|
||||
"mcs_label": "",
|
||||
"memory_per_tres": "",
|
||||
"name": "gridjob",
|
||||
"nodes": "naboo222",
|
||||
"nice": 50,
|
||||
"tasks_per_core": null,
|
||||
"tasks_per_node": 0,
|
||||
"tasks_per_socket": null,
|
||||
"tasks_per_board": 0,
|
||||
"cpus": 2,
|
||||
"node_count": 1,
|
||||
"tasks": 1,
|
||||
"het_job_id": 0,
|
||||
"het_job_id_set": "",
|
||||
"het_job_offset": 0,
|
||||
"partition": "atlas",
|
||||
"prefer": "",
|
||||
"memory_per_node": null,
|
||||
"memory_per_cpu": 2000,
|
||||
"minimum_cpus_per_node": 1,
|
||||
"minimum_tmp_disk_per_node": 0,
|
||||
"preempt_time": 0,
|
||||
"pre_sus_time": 0,
|
||||
"priority": 4294881265,
|
||||
"profile": null,
|
||||
"qos": "",
|
||||
"reboot": false,
|
||||
"required_nodes": "",
|
||||
"requeue": false,
|
||||
"resize_time": 0,
|
||||
"restart_cnt": 0,
|
||||
"resv_name": "",
|
||||
"shared": null,
|
||||
"show_flags": [
|
||||
"SHOW_ALL",
|
||||
"SHOW_DETAIL",
|
||||
"SHOW_LOCAL"
|
||||
],
|
||||
"sockets_per_board": 0,
|
||||
"sockets_per_node": null,
|
||||
"start_time": 1722989851,
|
||||
"state_description": "",
|
||||
"state_reason": "None",
|
||||
"standard_error": "\/home\/sessiondir\/zv6NDmqNcv5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmXSJKDmFRYcQm.comment",
|
||||
"standard_input": "\/dev\/null",
|
||||
"standard_output": "\/home\/sessiondir\/zv6NDmqNcv5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmXSJKDmFRYcQm.comment",
|
||||
"submit_time": 1722989851,
|
||||
"suspend_time": 0,
|
||||
"system_comment": "",
|
||||
"time_limit": 3600,
|
||||
"time_minimum": 0,
|
||||
"threads_per_core": null,
|
||||
"tres_bind": "",
|
||||
"tres_freq": "",
|
||||
"tres_per_job": "",
|
||||
"tres_per_node": "",
|
||||
"tres_per_socket": "",
|
||||
"tres_per_task": "",
|
||||
"tres_req_str": "cpu=1,mem=2000M,node=1,billing=1",
|
||||
"tres_alloc_str": "cpu=2,mem=4000M,node=1,billing=2",
|
||||
"user_id": 2006,
|
||||
"user_name": "atl001",
|
||||
"wckey": "",
|
||||
"current_working_directory": "\/home\/sessiondir\/zv6NDmqNcv5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmXSJKDmFRYcQm"
|
||||
},
|
||||
{
|
||||
"account": "",
|
||||
"accrue_time": 1722990772,
|
||||
"admin_comment": "",
|
||||
"array_job_id": 0,
|
||||
"array_task_id": null,
|
||||
"array_max_tasks": 0,
|
||||
"array_task_string": "",
|
||||
"association_id": 0,
|
||||
"batch_features": "",
|
||||
"batch_flag": true,
|
||||
"batch_host": "naboo222",
|
||||
"flags": [
|
||||
"JOB_WAS_RUNNING",
|
||||
"JOB_MEM_SET"
|
||||
],
|
||||
"burst_buffer": "",
|
||||
"burst_buffer_state": "",
|
||||
"cluster": "local",
|
||||
"cluster_features": "",
|
||||
"command": "\/tmp\/SLURM_job_script.XTwtdj",
|
||||
"comment": "",
|
||||
"container": "",
|
||||
"contiguous": false,
|
||||
"core_spec": null,
|
||||
"thread_spec": null,
|
||||
"cores_per_socket": null,
|
||||
"billable_tres": 2.0,
|
||||
"cpus_per_task": null,
|
||||
"cpu_frequency_minimum": null,
|
||||
"cpu_frequency_maximum": null,
|
||||
"cpu_frequency_governor": null,
|
||||
"cpus_per_tres": "",
|
||||
"deadline": 0,
|
||||
"delay_boot": 0,
|
||||
"dependency": "",
|
||||
"derived_exit_code": 0,
|
||||
"eligible_time": 1722990772,
|
||||
"end_time": 1723206772,
|
||||
"excluded_nodes": "",
|
||||
"exit_code": 0,
|
||||
"features": "",
|
||||
"federation_origin": "",
|
||||
"federation_siblings_active": "",
|
||||
"federation_siblings_viable": "",
|
||||
"gres_detail": [
|
||||
],
|
||||
"group_id": 2005,
|
||||
"group_name": "atlas",
|
||||
"job_id": 20468,
|
||||
"job_resources": {
|
||||
"nodes": "naboo222",
|
||||
"allocated_hosts": 1,
|
||||
"allocated_nodes": [
|
||||
{
|
||||
"sockets": {
|
||||
"1": {
|
||||
"cores": {
|
||||
"2": "allocated"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nodename": "naboo222",
|
||||
"cpus_used": 0,
|
||||
"memory_used": 0,
|
||||
"memory_allocated": 4000
|
||||
}
|
||||
]
|
||||
},
|
||||
"job_state": "RUNNING",
|
||||
"last_sched_evaluation": 1722990772,
|
||||
"licenses": "",
|
||||
"max_cpus": 0,
|
||||
"max_nodes": 0,
|
||||
"mcs_label": "",
|
||||
"memory_per_tres": "",
|
||||
"name": "gridjob",
|
||||
"nodes": "naboo222",
|
||||
"nice": 50,
|
||||
"tasks_per_core": null,
|
||||
"tasks_per_node": 0,
|
||||
"tasks_per_socket": null,
|
||||
"tasks_per_board": 0,
|
||||
"cpus": 2,
|
||||
"node_count": 1,
|
||||
"tasks": 1,
|
||||
"het_job_id": 0,
|
||||
"het_job_id_set": "",
|
||||
"het_job_offset": 0,
|
||||
"partition": "atlas",
|
||||
"prefer": "",
|
||||
"memory_per_node": null,
|
||||
"memory_per_cpu": 2000,
|
||||
"minimum_cpus_per_node": 1,
|
||||
"minimum_tmp_disk_per_node": 0,
|
||||
"preempt_time": 0,
|
||||
"pre_sus_time": 0,
|
||||
"priority": 4294881261,
|
||||
"profile": null,
|
||||
"qos": "",
|
||||
"reboot": false,
|
||||
"required_nodes": "",
|
||||
"requeue": false,
|
||||
"resize_time": 0,
|
||||
"restart_cnt": 0,
|
||||
"resv_name": "",
|
||||
"shared": null,
|
||||
"show_flags": [
|
||||
"SHOW_ALL",
|
||||
"SHOW_DETAIL",
|
||||
"SHOW_LOCAL"
|
||||
],
|
||||
"sockets_per_board": 0,
|
||||
"sockets_per_node": null,
|
||||
"start_time": 1722990772,
|
||||
"state_description": "",
|
||||
"state_reason": "None",
|
||||
"standard_error": "\/home\/sessiondir\/ljvLDmQccv5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmcSJKDmor4c2n.comment",
|
||||
"standard_input": "\/dev\/null",
|
||||
"standard_output": "\/home\/sessiondir\/ljvLDmQccv5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmcSJKDmor4c2n.comment",
|
||||
"submit_time": 1722990772,
|
||||
"suspend_time": 0,
|
||||
"system_comment": "",
|
||||
"time_limit": 3600,
|
||||
"time_minimum": 0,
|
||||
"threads_per_core": null,
|
||||
"tres_bind": "",
|
||||
"tres_freq": "",
|
||||
"tres_per_job": "",
|
||||
"tres_per_node": "",
|
||||
"tres_per_socket": "",
|
||||
"tres_per_task": "",
|
||||
"tres_req_str": "cpu=1,mem=2000M,node=1,billing=1",
|
||||
"tres_alloc_str": "cpu=2,mem=4000M,node=1,billing=2",
|
||||
"user_id": 2006,
|
||||
"user_name": "atl001",
|
||||
"wckey": "",
|
||||
"current_working_directory": "\/home\/sessiondir\/ljvLDmQccv5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmcSJKDmor4c2n"
|
||||
},
|
||||
{
|
||||
"account": "",
|
||||
"accrue_time": 1723457333,
|
||||
"admin_comment": "",
|
||||
"array_job_id": 0,
|
||||
"array_task_id": null,
|
||||
"array_max_tasks": 0,
|
||||
"array_task_string": "",
|
||||
"association_id": 0,
|
||||
"batch_features": "",
|
||||
"batch_flag": true,
|
||||
"batch_host": "naboo147",
|
||||
"flags": [
|
||||
"TRES_STR_CALC",
|
||||
"JOB_MEM_SET"
|
||||
],
|
||||
"burst_buffer": "",
|
||||
"burst_buffer_state": "",
|
||||
"cluster": "local",
|
||||
"cluster_features": "",
|
||||
"command": "\/tmp\/SLURM_job_script.8PMmVe",
|
||||
"comment": "",
|
||||
"container": "",
|
||||
"contiguous": false,
|
||||
"core_spec": null,
|
||||
"thread_spec": null,
|
||||
"cores_per_socket": null,
|
||||
"billable_tres": 8.0,
|
||||
"cpus_per_task": null,
|
||||
"cpu_frequency_minimum": null,
|
||||
"cpu_frequency_maximum": null,
|
||||
"cpu_frequency_governor": null,
|
||||
"cpus_per_tres": "",
|
||||
"deadline": 0,
|
||||
"delay_boot": 0,
|
||||
"dependency": "",
|
||||
"derived_exit_code": 0,
|
||||
"eligible_time": 1723457333,
|
||||
"end_time": 1723463525,
|
||||
"excluded_nodes": "",
|
||||
"exit_code": 0,
|
||||
"features": "",
|
||||
"federation_origin": "",
|
||||
"federation_siblings_active": "",
|
||||
"federation_siblings_viable": "",
|
||||
"gres_detail": [
|
||||
],
|
||||
"group_id": 2005,
|
||||
"group_name": "atlas",
|
||||
"job_id": 23772,
|
||||
"job_resources": {
|
||||
"nodes": "naboo147",
|
||||
"allocated_hosts": 1,
|
||||
"allocated_nodes": [
|
||||
{
|
||||
"sockets": {
|
||||
"0": {
|
||||
"cores": {
|
||||
"3": "allocated",
|
||||
"10": "allocated",
|
||||
"12": "allocated",
|
||||
"13": "allocated"
|
||||
}
|
||||
},
|
||||
"1": {
|
||||
"cores": {
|
||||
"8": "allocated",
|
||||
"11": "allocated",
|
||||
"12": "allocated",
|
||||
"13": "allocated"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nodename": "naboo147",
|
||||
"cpus_used": 0,
|
||||
"memory_used": 0,
|
||||
"memory_allocated": 16000
|
||||
}
|
||||
]
|
||||
},
|
||||
"job_state": "COMPLETED",
|
||||
"last_sched_evaluation": 1723457333,
|
||||
"licenses": "",
|
||||
"max_cpus": 0,
|
||||
"max_nodes": 0,
|
||||
"mcs_label": "",
|
||||
"memory_per_tres": "",
|
||||
"name": "gridjob",
|
||||
"nodes": "naboo147",
|
||||
"nice": 50,
|
||||
"tasks_per_core": null,
|
||||
"tasks_per_node": 8,
|
||||
"tasks_per_socket": null,
|
||||
"tasks_per_board": 0,
|
||||
"cpus": 8,
|
||||
"node_count": 1,
|
||||
"tasks": 8,
|
||||
"het_job_id": 0,
|
||||
"het_job_id_set": "",
|
||||
"het_job_offset": 0,
|
||||
"partition": "atlas",
|
||||
"prefer": "",
|
||||
"memory_per_node": null,
|
||||
"memory_per_cpu": 2000,
|
||||
"minimum_cpus_per_node": 8,
|
||||
"minimum_tmp_disk_per_node": 0,
|
||||
"preempt_time": 0,
|
||||
"pre_sus_time": 0,
|
||||
"priority": 4294877957,
|
||||
"profile": null,
|
||||
"qos": "",
|
||||
"reboot": false,
|
||||
"required_nodes": "",
|
||||
"requeue": false,
|
||||
"resize_time": 0,
|
||||
"restart_cnt": 0,
|
||||
"resv_name": "",
|
||||
"shared": null,
|
||||
"show_flags": [
|
||||
"SHOW_ALL",
|
||||
"SHOW_DETAIL",
|
||||
"SHOW_LOCAL"
|
||||
],
|
||||
"sockets_per_board": 0,
|
||||
"sockets_per_node": null,
|
||||
"start_time": 1723457333,
|
||||
"state_description": "",
|
||||
"state_reason": "None",
|
||||
"standard_error": "\/home\/sessiondir\/nN8KDmNMPx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmeIKKDml0xJjm.comment",
|
||||
"standard_input": "\/dev\/null",
|
||||
"standard_output": "\/home\/sessiondir\/nN8KDmNMPx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmeIKKDml0xJjm.comment",
|
||||
"submit_time": 1723457333,
|
||||
"suspend_time": 0,
|
||||
"system_comment": "",
|
||||
"time_limit": 3600,
|
||||
"time_minimum": 0,
|
||||
"threads_per_core": null,
|
||||
"tres_bind": "",
|
||||
"tres_freq": "",
|
||||
"tres_per_job": "",
|
||||
"tres_per_node": "",
|
||||
"tres_per_socket": "",
|
||||
"tres_per_task": "",
|
||||
"tres_req_str": "cpu=8,mem=16000M,node=1,billing=8",
|
||||
"tres_alloc_str": "cpu=8,mem=16000M,node=1,billing=8",
|
||||
"user_id": 2006,
|
||||
"user_name": "atl001",
|
||||
"wckey": "",
|
||||
"current_working_directory": "\/home\/sessiondir\/nN8KDmNMPx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmeIKKDml0xJjm"
|
||||
}
|
||||
]
|
||||
}
|
175
plugins/inputs/slurm/testcases/gather/responses/nodes.json
Normal file
175
plugins/inputs/slurm/testcases/gather/responses/nodes.json
Normal file
|
@ -0,0 +1,175 @@
|
|||
{
|
||||
"meta": {
|
||||
"plugin": {
|
||||
"type": "openapi\/v0.0.38",
|
||||
"name": "Slurm OpenAPI v0.0.38"
|
||||
},
|
||||
"Slurm": {
|
||||
"version": {
|
||||
"major": 22,
|
||||
"micro": 9,
|
||||
"minor": 5
|
||||
},
|
||||
"release": "22.05.9"
|
||||
}
|
||||
},
|
||||
"errors": [
|
||||
],
|
||||
"nodes": [
|
||||
{
|
||||
"architecture": "x86_64",
|
||||
"burstbuffer_network_address": "",
|
||||
"boards": 1,
|
||||
"boot_time": 1719400973,
|
||||
"comment": "",
|
||||
"cores": 18,
|
||||
"cpu_binding": 0,
|
||||
"cpu_load": 27,
|
||||
"extra": "",
|
||||
"free_memory": 86423,
|
||||
"cpus": 36,
|
||||
"last_busy": 1723102876,
|
||||
"features": "",
|
||||
"active_features": "",
|
||||
"gres": "",
|
||||
"gres_drained": "N\/A",
|
||||
"gres_used": "",
|
||||
"mcs_label": "",
|
||||
"name": "naboo145",
|
||||
"next_state_after_reboot": "invalid",
|
||||
"address": "naboo145",
|
||||
"hostname": "naboo145",
|
||||
"state": "idle",
|
||||
"state_flags": [
|
||||
"DRAIN"
|
||||
],
|
||||
"next_state_after_reboot_flags": [
|
||||
],
|
||||
"operating_system": "Linux 5.14.0-427.13.1.el9_4.x86_64 #1 SMP PREEMPT_DYNAMIC Tue Apr 30 18:22:29 EDT 2024",
|
||||
"owner": null,
|
||||
"partitions": [
|
||||
"atlas"
|
||||
],
|
||||
"port": 6818,
|
||||
"real_memory": 94791,
|
||||
"reason": "Kill task failed",
|
||||
"reason_changed_at": 1723077306,
|
||||
"reason_set_by_user": "root",
|
||||
"slurmd_start_time": 1720394759,
|
||||
"sockets": 2,
|
||||
"threads": 1,
|
||||
"temporary_disk": 0,
|
||||
"weight": 1,
|
||||
"tres": "cpu=36,mem=94791M,billing=36",
|
||||
"slurmd_version": "22.05.9",
|
||||
"alloc_memory": 0,
|
||||
"alloc_cpus": 0,
|
||||
"idle_cpus": 36,
|
||||
"tres_used": null,
|
||||
"tres_weighted": 0.0
|
||||
},
|
||||
{
|
||||
"architecture": "x86_64",
|
||||
"burstbuffer_network_address": "",
|
||||
"boards": 1,
|
||||
"boot_time": 1719400759,
|
||||
"comment": "",
|
||||
"cores": 18,
|
||||
"cpu_binding": 0,
|
||||
"cpu_load": 0,
|
||||
"extra": "",
|
||||
"free_memory": 92151,
|
||||
"cpus": 36,
|
||||
"last_busy": 1722780995,
|
||||
"features": "",
|
||||
"active_features": "",
|
||||
"gres": "",
|
||||
"gres_drained": "N\/A",
|
||||
"gres_used": "",
|
||||
"mcs_label": "",
|
||||
"name": "naboo146",
|
||||
"next_state_after_reboot": "invalid",
|
||||
"address": "naboo146",
|
||||
"hostname": "naboo146",
|
||||
"state": "idle",
|
||||
"state_flags": [
|
||||
"DRAIN"
|
||||
],
|
||||
"next_state_after_reboot_flags": [
|
||||
],
|
||||
"operating_system": "Linux 5.14.0-427.13.1.el9_4.x86_64 #1 SMP PREEMPT_DYNAMIC Tue Apr 30 18:22:29 EDT 2024",
|
||||
"owner": null,
|
||||
"partitions": [
|
||||
"atlas"
|
||||
],
|
||||
"port": 6818,
|
||||
"real_memory": 94791,
|
||||
"reason": "Kill task failed",
|
||||
"reason_changed_at": 1722748927,
|
||||
"reason_set_by_user": "root",
|
||||
"slurmd_start_time": 1720394759,
|
||||
"sockets": 2,
|
||||
"threads": 1,
|
||||
"temporary_disk": 0,
|
||||
"weight": 1,
|
||||
"tres": "cpu=36,mem=94791M,billing=36",
|
||||
"slurmd_version": "22.05.9",
|
||||
"alloc_memory": 0,
|
||||
"alloc_cpus": 0,
|
||||
"idle_cpus": 36,
|
||||
"tres_used": null,
|
||||
"tres_weighted": 0.0
|
||||
},
|
||||
{
|
||||
"architecture": "x86_64",
|
||||
"burstbuffer_network_address": "",
|
||||
"boards": 1,
|
||||
"boot_time": 1719406605,
|
||||
"comment": "",
|
||||
"cores": 18,
|
||||
"cpu_binding": 0,
|
||||
"cpu_load": 2969,
|
||||
"extra": "",
|
||||
"free_memory": 10908,
|
||||
"cpus": 36,
|
||||
"last_busy": 1722881704,
|
||||
"features": "",
|
||||
"active_features": "",
|
||||
"gres": "",
|
||||
"gres_drained": "N\/A",
|
||||
"gres_used": "",
|
||||
"mcs_label": "",
|
||||
"name": "naboo147",
|
||||
"next_state_after_reboot": "invalid",
|
||||
"address": "naboo147",
|
||||
"hostname": "naboo147",
|
||||
"state": "allocated",
|
||||
"state_flags": [
|
||||
],
|
||||
"next_state_after_reboot_flags": [
|
||||
],
|
||||
"operating_system": "Linux 5.14.0-427.13.1.el9_4.x86_64 #1 SMP PREEMPT_DYNAMIC Tue Apr 30 18:22:29 EDT 2024",
|
||||
"owner": null,
|
||||
"partitions": [
|
||||
"atlas"
|
||||
],
|
||||
"port": 6818,
|
||||
"real_memory": 94793,
|
||||
"reason": "",
|
||||
"reason_changed_at": 0,
|
||||
"reason_set_by_user": null,
|
||||
"slurmd_start_time": 1720394759,
|
||||
"sockets": 2,
|
||||
"threads": 1,
|
||||
"temporary_disk": 0,
|
||||
"weight": 1,
|
||||
"tres": "cpu=36,mem=94793M,billing=36",
|
||||
"slurmd_version": "22.05.9",
|
||||
"alloc_memory": 56000,
|
||||
"alloc_cpus": 36,
|
||||
"idle_cpus": 0,
|
||||
"tres_used": "cpu=36,mem=56000M",
|
||||
"tres_weighted": 36.0
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
{
|
||||
"meta": {
|
||||
"plugin": {
|
||||
"type": "openapi\/v0.0.38",
|
||||
"name": "Slurm OpenAPI v0.0.38"
|
||||
},
|
||||
"Slurm": {
|
||||
"version": {
|
||||
"major": 22,
|
||||
"micro": 9,
|
||||
"minor": 5
|
||||
},
|
||||
"release": "22.05.9"
|
||||
}
|
||||
},
|
||||
"errors": [
|
||||
],
|
||||
"partitions": [
|
||||
{
|
||||
"flags": [
|
||||
"default"
|
||||
],
|
||||
"preemption_mode": [
|
||||
"disabled"
|
||||
],
|
||||
"allowed_allocation_nodes": "",
|
||||
"allowed_accounts": "",
|
||||
"allowed_groups": "",
|
||||
"allowed_qos": "",
|
||||
"alternative": "",
|
||||
"billing_weights": "",
|
||||
"default_memory_per_cpu": null,
|
||||
"default_memory_per_node": null,
|
||||
"default_time_limit": null,
|
||||
"denied_accounts": "",
|
||||
"denied_qos": "",
|
||||
"preemption_grace_time": 0,
|
||||
"maximum_cpus_per_node": -1,
|
||||
"maximum_memory_per_cpu": null,
|
||||
"maximum_memory_per_node": null,
|
||||
"maximum_nodes_per_job": -1,
|
||||
"max_time_limit": -1,
|
||||
"min nodes per job": 0,
|
||||
"name": "atlas",
|
||||
"nodes": "naboo145,naboo146,naboo147,naboo216,naboo219,naboo222,naboo224,naboo225,naboo227,naboo228,naboo229,naboo234,naboo235,naboo236,naboo237,naboo238,naboo239,naboo240,naboo241,naboo242,naboo243",
|
||||
"over_time_limit": null,
|
||||
"priority_job_factor": 1,
|
||||
"priority_tier": 1,
|
||||
"qos": "",
|
||||
"state": "UP",
|
||||
"total_cpus": 632,
|
||||
"total_nodes": 21,
|
||||
"tres": "cpu=632,mem=1415207M,node=21,billing=632"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
{
|
||||
"meta": {
|
||||
"plugin": {
|
||||
"type": "openapi\/v0.0.38",
|
||||
"name": "Slurm OpenAPI v0.0.38"
|
||||
},
|
||||
"Slurm": {
|
||||
"version": {
|
||||
"major": 22,
|
||||
"micro": 9,
|
||||
"minor": 5
|
||||
},
|
||||
"release": "22.05.9"
|
||||
}
|
||||
},
|
||||
"errors": [
|
||||
],
|
||||
"reservations": [
|
||||
]
|
||||
}
|
8
plugins/inputs/slurm/testcases/gather/telegraf.conf
Normal file
8
plugins/inputs/slurm/testcases/gather/telegraf.conf
Normal file
|
@ -0,0 +1,8 @@
|
|||
[[inputs.slurm]]
|
||||
url = "willBeOverriden"
|
||||
response_timeout = "5s"
|
||||
# enabled_endpoints = []
|
||||
|
||||
## Credentials for JWT-based authentication
|
||||
username = "root"
|
||||
token = "topSecret"
|
5
plugins/inputs/slurm/testcases/panic/responses/diag.json
Normal file
5
plugins/inputs/slurm/testcases/panic/responses/diag.json
Normal file
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"meta": {},
|
||||
"errors": [],
|
||||
"statistics": {}
|
||||
}
|
5
plugins/inputs/slurm/testcases/panic/responses/jobs.json
Normal file
5
plugins/inputs/slurm/testcases/panic/responses/jobs.json
Normal file
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"meta": {},
|
||||
"errors": [],
|
||||
"jobs": []
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"meta": {},
|
||||
"errors": [],
|
||||
"nodes": []
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"meta": {},
|
||||
"errors": [],
|
||||
"partitions": []
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"meta": {},
|
||||
"errors": [],
|
||||
"reservations": []
|
||||
}
|
8
plugins/inputs/slurm/testcases/panic/telegraf.conf
Normal file
8
plugins/inputs/slurm/testcases/panic/telegraf.conf
Normal file
|
@ -0,0 +1,8 @@
|
|||
[[inputs.slurm]]
|
||||
url = "willBeOverriden"
|
||||
response_timeout = "5s"
|
||||
enabled_endpoints = []
|
||||
|
||||
## Credentials for JWT-based authentication
|
||||
username = "root"
|
||||
token = "topSecret"
|
Loading…
Add table
Add a link
Reference in a new issue