Adding upstream version 1.34.4.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
e393c3af3f
commit
4978089aab
4963 changed files with 677545 additions and 0 deletions
82
plugins/processors/reverse_dns/README.md
Normal file
82
plugins/processors/reverse_dns/README.md
Normal file
|
@ -0,0 +1,82 @@
|
|||
# Reverse DNS Processor Plugin
|
||||
|
||||
The `reverse_dns` processor does a reverse-dns lookup on tags (or fields) with
|
||||
IPs in them.
|
||||
|
||||
Telegraf minimum version: Telegraf 1.15.0
|
||||
|
||||
## Global configuration options <!-- @/docs/includes/plugin_config.md -->
|
||||
|
||||
In addition to the plugin-specific configuration settings, plugins support
|
||||
additional global and plugin configuration settings. These settings are used to
|
||||
modify metrics, tags, and field or create aliases and configure ordering, etc.
|
||||
See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
||||
|
||||
[CONFIGURATION.md]: ../../../docs/CONFIGURATION.md#plugins
|
||||
|
||||
## Configuration
|
||||
|
||||
```toml @sample.conf
|
||||
# ReverseDNS does a reverse lookup on IP addresses to retrieve the DNS name
|
||||
[[processors.reverse_dns]]
|
||||
## For optimal performance, you may want to limit which metrics are passed to this
|
||||
## processor. eg:
|
||||
## namepass = ["my_metric_*"]
|
||||
|
||||
## cache_ttl is how long the dns entries should stay cached for.
|
||||
## generally longer is better, but if you expect a large number of diverse lookups
|
||||
## you'll want to consider memory use.
|
||||
cache_ttl = "24h"
|
||||
|
||||
## lookup_timeout is how long should you wait for a single dns request to respond.
|
||||
## this is also the maximum acceptable latency for a metric travelling through
|
||||
## the reverse_dns processor. After lookup_timeout is exceeded, a metric will
|
||||
## be passed on unaltered.
|
||||
## multiple simultaneous resolution requests for the same IP will only make a
|
||||
## single rDNS request, and they will all wait for the answer for this long.
|
||||
lookup_timeout = "3s"
|
||||
|
||||
## max_parallel_lookups is the maximum number of dns requests to be in flight
|
||||
## at the same time. Requesting hitting cached values do not count against this
|
||||
## total, and neither do mulptiple requests for the same IP.
|
||||
## It's probably best to keep this number fairly low.
|
||||
max_parallel_lookups = 10
|
||||
|
||||
## ordered controls whether or not the metrics need to stay in the same order
|
||||
## this plugin received them in. If false, this plugin will change the order
|
||||
## with requests hitting cached results moving through immediately and not
|
||||
## waiting on slower lookups. This may cause issues for you if you are
|
||||
## depending on the order of metrics staying the same. If so, set this to true.
|
||||
## keeping the metrics ordered may be slightly slower.
|
||||
ordered = false
|
||||
|
||||
[[processors.reverse_dns.lookup]]
|
||||
## get the ip from the field "source_ip", and put the result in the field "source_name"
|
||||
field = "source_ip"
|
||||
dest = "source_name"
|
||||
|
||||
[[processors.reverse_dns.lookup]]
|
||||
## get the ip from the tag "destination_ip", and put the result in the tag
|
||||
## "destination_name".
|
||||
tag = "destination_ip"
|
||||
dest = "destination_name"
|
||||
|
||||
## If you would prefer destination_name to be a field instead, you can use a
|
||||
## processors.converter after this one, specifying the order attribute.
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
example config:
|
||||
|
||||
```toml
|
||||
[[processors.reverse_dns]]
|
||||
[[processors.reverse_dns.lookup]]
|
||||
tag = "ip"
|
||||
dest = "domain"
|
||||
```
|
||||
|
||||
```diff
|
||||
- ping,ip=8.8.8.8 elapsed=300i 1502489900000000000
|
||||
+ ping,ip=8.8.8.8,domain=dns.google. elapsed=300i 1502489900000000000
|
||||
```
|
309
plugins/processors/reverse_dns/rdnscache.go
Normal file
309
plugins/processors/reverse_dns/rdnscache.go
Normal file
|
@ -0,0 +1,309 @@
|
|||
package reverse_dns
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"golang.org/x/sync/semaphore"
|
||||
)
|
||||
|
||||
const defaultMaxWorkers = 10
|
||||
|
||||
var (
|
||||
ErrTimeout = errors.New("request timed out")
|
||||
)
|
||||
|
||||
// AnyResolver is for the net.Resolver
|
||||
type AnyResolver interface {
|
||||
LookupAddr(ctx context.Context, addr string) (names []string, err error)
|
||||
}
|
||||
|
||||
// ReverseDNSCache is safe to use across multiple goroutines.
|
||||
// if multiple goroutines request the same IP at the same time, one of the
|
||||
// requests will trigger the lookup and the rest will wait for its response.
|
||||
type ReverseDNSCache struct {
|
||||
Resolver AnyResolver
|
||||
stats RDNSCacheStats
|
||||
|
||||
// settings
|
||||
ttl time.Duration
|
||||
lookupTimeout time.Duration
|
||||
maxWorkers int
|
||||
|
||||
// internal
|
||||
rwLock sync.RWMutex
|
||||
sem *semaphore.Weighted
|
||||
cancelCleanupWorker context.CancelFunc
|
||||
|
||||
cache map[string]*dnslookup
|
||||
|
||||
// keep an ordered list of what needs to be worked on and what is due to expire.
|
||||
// We can use this list for both with a job position marker, and by popping items
|
||||
// off the list as they expire. This avoids iterating over the whole map to find
|
||||
// things to do.
|
||||
// As a bonus, we only have to read the first item to know if anything in the
|
||||
// map has expired.
|
||||
// must lock to get access to this.
|
||||
expireList []*dnslookup
|
||||
expireListLock sync.Mutex
|
||||
}
|
||||
|
||||
type RDNSCacheStats struct {
|
||||
CacheHit uint64
|
||||
CacheMiss uint64
|
||||
CacheExpire uint64
|
||||
RequestsAbandoned uint64
|
||||
RequestsFilled uint64
|
||||
}
|
||||
|
||||
func NewReverseDNSCache(ttl, lookupTimeout time.Duration, workerPoolSize int) *ReverseDNSCache {
|
||||
if workerPoolSize <= 0 {
|
||||
workerPoolSize = defaultMaxWorkers
|
||||
}
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
d := &ReverseDNSCache{
|
||||
ttl: ttl,
|
||||
lookupTimeout: lookupTimeout,
|
||||
cache: make(map[string]*dnslookup),
|
||||
maxWorkers: workerPoolSize,
|
||||
sem: semaphore.NewWeighted(int64(workerPoolSize)),
|
||||
cancelCleanupWorker: cancel,
|
||||
Resolver: net.DefaultResolver,
|
||||
}
|
||||
d.startCleanupWorker(ctx)
|
||||
return d
|
||||
}
|
||||
|
||||
// dnslookup represents a lookup request/response. It may or may not be answered yet.
|
||||
// interested parties register themselves with existing requests or create new ones
|
||||
// to get their dns query answered. Answers will be pushed out to callbacks.
|
||||
type dnslookup struct {
|
||||
ip string // keep a copy for the expireList.
|
||||
domains []string
|
||||
expiresAt time.Time
|
||||
completed bool
|
||||
callbacks []callbackChannelType
|
||||
}
|
||||
|
||||
type lookupResult struct {
|
||||
domains []string
|
||||
err error
|
||||
}
|
||||
|
||||
type callbackChannelType chan lookupResult
|
||||
|
||||
// Lookup takes a string representing a parseable ipv4 or ipv6 IP, and blocks
|
||||
// until it has resolved to 0-n results, or until its lookup timeout has elapsed.
|
||||
// if the lookup timeout elapses, it returns an empty slice.
|
||||
func (d *ReverseDNSCache) Lookup(ip string) ([]string, error) {
|
||||
if len(ip) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// check if the value is cached
|
||||
d.rwLock.RLock()
|
||||
result, found := d.lockedGetFromCache(ip)
|
||||
if found && result.completed && !result.expiresAt.Before(time.Now()) {
|
||||
defer d.rwLock.RUnlock()
|
||||
atomic.AddUint64(&d.stats.CacheHit, 1)
|
||||
// cache is valid
|
||||
return result.domains, nil
|
||||
}
|
||||
d.rwLock.RUnlock()
|
||||
|
||||
// if it's not cached, kick off a lookup job and subscribe to the result.
|
||||
lookupChan := d.subscribeTo(ip)
|
||||
timer := time.NewTimer(d.lookupTimeout)
|
||||
defer timer.Stop()
|
||||
|
||||
// timer is still necessary even if doLookup respects timeout due to worker
|
||||
// pool starvation.
|
||||
select {
|
||||
case result := <-lookupChan:
|
||||
return result.domains, result.err
|
||||
case <-timer.C:
|
||||
return nil, ErrTimeout
|
||||
}
|
||||
}
|
||||
|
||||
func (d *ReverseDNSCache) subscribeTo(ip string) callbackChannelType {
|
||||
callback := make(callbackChannelType, 1)
|
||||
|
||||
d.rwLock.Lock()
|
||||
defer d.rwLock.Unlock()
|
||||
|
||||
// confirm it's still not in the cache. This needs to be done under an active lock.
|
||||
result, found := d.lockedGetFromCache(ip)
|
||||
if found {
|
||||
atomic.AddUint64(&d.stats.CacheHit, 1)
|
||||
// has the request been answered since we last checked?
|
||||
if result.completed {
|
||||
// we can return the answer with the channel.
|
||||
callback <- lookupResult{domains: result.domains}
|
||||
return callback
|
||||
}
|
||||
// there's a request but it hasn't been answered yet;
|
||||
// add yourself to the subscribers and return that.
|
||||
result.callbacks = append(result.callbacks, callback)
|
||||
d.lockedSaveToCache(result)
|
||||
return callback
|
||||
}
|
||||
|
||||
atomic.AddUint64(&d.stats.CacheMiss, 1)
|
||||
|
||||
// otherwise we need to register the request
|
||||
l := &dnslookup{
|
||||
ip: ip,
|
||||
expiresAt: time.Now().Add(d.ttl),
|
||||
callbacks: []callbackChannelType{callback},
|
||||
}
|
||||
|
||||
d.lockedSaveToCache(l)
|
||||
go d.doLookup(l.ip)
|
||||
return callback
|
||||
}
|
||||
|
||||
// lockedGetFromCache fetches from the correct internal ip cache.
|
||||
// you MUST first do a read or write lock before calling it, and keep locks around
|
||||
// the dnslookup that is returned until you clone it.
|
||||
func (d *ReverseDNSCache) lockedGetFromCache(ip string) (lookup *dnslookup, found bool) {
|
||||
lookup, found = d.cache[ip]
|
||||
if found && !lookup.expiresAt.After(time.Now()) {
|
||||
return nil, false
|
||||
}
|
||||
return lookup, found
|
||||
}
|
||||
|
||||
// lockedSaveToCache stores a lookup in the correct internal ip cache.
|
||||
// you MUST first do a write lock before calling it.
|
||||
func (d *ReverseDNSCache) lockedSaveToCache(lookup *dnslookup) {
|
||||
if !lookup.expiresAt.After(time.Now()) {
|
||||
return // don't cache.
|
||||
}
|
||||
d.cache[lookup.ip] = lookup
|
||||
}
|
||||
|
||||
func (d *ReverseDNSCache) startCleanupWorker(ctx context.Context) {
|
||||
go func() {
|
||||
cleanupTick := time.NewTicker(10 * time.Second)
|
||||
for {
|
||||
select {
|
||||
case <-cleanupTick.C:
|
||||
d.cleanup()
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func (d *ReverseDNSCache) doLookup(ip string) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), d.lookupTimeout)
|
||||
defer cancel()
|
||||
if err := d.sem.Acquire(ctx, 1); err != nil {
|
||||
// lookup timeout
|
||||
d.abandonLookup(ip, ErrTimeout)
|
||||
return
|
||||
}
|
||||
defer d.sem.Release(1)
|
||||
|
||||
names, err := d.Resolver.LookupAddr(ctx, ip)
|
||||
if err != nil {
|
||||
d.abandonLookup(ip, err)
|
||||
return
|
||||
}
|
||||
|
||||
d.rwLock.Lock()
|
||||
lookup, found := d.lockedGetFromCache(ip)
|
||||
if !found {
|
||||
d.rwLock.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
lookup.domains = names
|
||||
lookup.completed = true
|
||||
lookup.expiresAt = time.Now().Add(d.ttl) // extend the ttl now that we have a reply.
|
||||
callbacks := lookup.callbacks
|
||||
lookup.callbacks = nil
|
||||
|
||||
d.lockedSaveToCache(lookup)
|
||||
d.rwLock.Unlock()
|
||||
|
||||
d.expireListLock.Lock()
|
||||
// add it to the expireList.
|
||||
d.expireList = append(d.expireList, lookup)
|
||||
d.expireListLock.Unlock()
|
||||
|
||||
atomic.AddUint64(&d.stats.RequestsFilled, uint64(len(callbacks)))
|
||||
for _, cb := range callbacks {
|
||||
cb <- lookupResult{domains: names}
|
||||
close(cb)
|
||||
}
|
||||
}
|
||||
|
||||
func (d *ReverseDNSCache) abandonLookup(ip string, err error) {
|
||||
d.rwLock.Lock()
|
||||
lookup, found := d.lockedGetFromCache(ip)
|
||||
if !found {
|
||||
d.rwLock.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
callbacks := lookup.callbacks
|
||||
delete(d.cache, lookup.ip)
|
||||
d.rwLock.Unlock()
|
||||
// resolve the remaining callbacks to free the resources.
|
||||
atomic.AddUint64(&d.stats.RequestsAbandoned, uint64(len(callbacks)))
|
||||
for _, cb := range callbacks {
|
||||
cb <- lookupResult{err: err}
|
||||
close(cb)
|
||||
}
|
||||
}
|
||||
|
||||
func (d *ReverseDNSCache) cleanup() {
|
||||
now := time.Now()
|
||||
d.expireListLock.Lock()
|
||||
if len(d.expireList) == 0 {
|
||||
d.expireListLock.Unlock()
|
||||
return
|
||||
}
|
||||
ipsToDelete := make([]string, 0, len(d.expireList))
|
||||
for i := 0; i < len(d.expireList); i++ {
|
||||
if !d.expireList[i].expiresAt.Before(now) {
|
||||
break // done. Nothing after this point is expired.
|
||||
}
|
||||
ipsToDelete = append(ipsToDelete, d.expireList[i].ip)
|
||||
}
|
||||
if len(ipsToDelete) == 0 {
|
||||
d.expireListLock.Unlock()
|
||||
return
|
||||
}
|
||||
d.expireList = d.expireList[len(ipsToDelete):]
|
||||
d.expireListLock.Unlock()
|
||||
|
||||
atomic.AddUint64(&d.stats.CacheExpire, uint64(len(ipsToDelete)))
|
||||
|
||||
d.rwLock.Lock()
|
||||
defer d.rwLock.Unlock()
|
||||
for _, ip := range ipsToDelete {
|
||||
delete(d.cache, ip)
|
||||
}
|
||||
}
|
||||
|
||||
func (d *ReverseDNSCache) Stats() RDNSCacheStats {
|
||||
stats := RDNSCacheStats{}
|
||||
stats.CacheHit = atomic.LoadUint64(&d.stats.CacheHit)
|
||||
stats.CacheMiss = atomic.LoadUint64(&d.stats.CacheMiss)
|
||||
stats.CacheExpire = atomic.LoadUint64(&d.stats.CacheExpire)
|
||||
stats.RequestsAbandoned = atomic.LoadUint64(&d.stats.RequestsAbandoned)
|
||||
stats.RequestsFilled = atomic.LoadUint64(&d.stats.RequestsFilled)
|
||||
return stats
|
||||
}
|
||||
|
||||
func (d *ReverseDNSCache) Stop() {
|
||||
d.cancelCleanupWorker()
|
||||
}
|
142
plugins/processors/reverse_dns/rdnscache_test.go
Normal file
142
plugins/processors/reverse_dns/rdnscache_test.go
Normal file
|
@ -0,0 +1,142 @@
|
|||
package reverse_dns
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestSimpleReverseDNSLookup(t *testing.T) {
|
||||
d := NewReverseDNSCache(60*time.Second, 1*time.Second, -1)
|
||||
defer d.Stop()
|
||||
|
||||
d.Resolver = &localResolver{}
|
||||
answer, err := d.Lookup("127.0.0.1")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, []string{"localhost"}, answer)
|
||||
err = blockAllWorkers(t.Context(), d)
|
||||
require.NoError(t, err)
|
||||
|
||||
// do another request with no workers available.
|
||||
// it should read from cache instantly.
|
||||
answer, err = d.Lookup("127.0.0.1")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, []string{"localhost"}, answer)
|
||||
|
||||
require.Len(t, d.cache, 1)
|
||||
require.Len(t, d.expireList, 1)
|
||||
d.cleanup()
|
||||
require.Len(t, d.expireList, 1) // ttl hasn't hit yet.
|
||||
|
||||
stats := d.Stats()
|
||||
|
||||
require.EqualValues(t, 0, stats.CacheExpire)
|
||||
require.EqualValues(t, 1, stats.CacheMiss)
|
||||
require.EqualValues(t, 1, stats.CacheHit)
|
||||
require.EqualValues(t, 1, stats.RequestsFilled)
|
||||
require.EqualValues(t, 0, stats.RequestsAbandoned)
|
||||
}
|
||||
|
||||
func TestParallelReverseDNSLookup(t *testing.T) {
|
||||
d := NewReverseDNSCache(1*time.Second, 1*time.Second, -1)
|
||||
defer d.Stop()
|
||||
|
||||
d.Resolver = &localResolver{}
|
||||
var answer1, answer2 []string
|
||||
var err1, err2 error
|
||||
wg := &sync.WaitGroup{}
|
||||
wg.Add(2)
|
||||
go func() {
|
||||
answer1, err1 = d.Lookup("127.0.0.1")
|
||||
wg.Done()
|
||||
}()
|
||||
go func() {
|
||||
answer2, err2 = d.Lookup("127.0.0.1")
|
||||
wg.Done()
|
||||
}()
|
||||
|
||||
wg.Wait()
|
||||
|
||||
require.NoError(t, err1)
|
||||
require.NoError(t, err2)
|
||||
|
||||
t.Log(answer1)
|
||||
t.Log(answer2)
|
||||
|
||||
require.Equal(t, []string{"localhost"}, answer1)
|
||||
require.Equal(t, []string{"localhost"}, answer2)
|
||||
|
||||
require.Len(t, d.cache, 1)
|
||||
|
||||
stats := d.Stats()
|
||||
|
||||
require.EqualValues(t, 1, stats.CacheMiss)
|
||||
require.EqualValues(t, 1, stats.CacheHit)
|
||||
}
|
||||
|
||||
func TestUnavailableDNSServerRespectsTimeout(t *testing.T) {
|
||||
d := NewReverseDNSCache(0, 1, -1)
|
||||
defer d.Stop()
|
||||
|
||||
d.Resolver = &timeoutResolver{}
|
||||
|
||||
result, err := d.Lookup("192.153.33.3")
|
||||
require.Error(t, err)
|
||||
require.Equal(t, ErrTimeout, err)
|
||||
|
||||
require.Nil(t, result)
|
||||
}
|
||||
|
||||
func TestCleanupHappens(t *testing.T) {
|
||||
ttl := 100 * time.Millisecond
|
||||
d := NewReverseDNSCache(ttl, 1*time.Second, -1)
|
||||
defer d.Stop()
|
||||
|
||||
d.Resolver = &localResolver{}
|
||||
_, err := d.Lookup("127.0.0.1")
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Len(t, d.cache, 1)
|
||||
|
||||
time.Sleep(ttl) // wait for cache entry to expire.
|
||||
d.cleanup()
|
||||
require.Empty(t, d.expireList)
|
||||
|
||||
stats := d.Stats()
|
||||
|
||||
require.EqualValues(t, 1, stats.CacheExpire)
|
||||
require.EqualValues(t, 1, stats.CacheMiss)
|
||||
require.EqualValues(t, 0, stats.CacheHit)
|
||||
}
|
||||
|
||||
func TestLookupTimeout(t *testing.T) {
|
||||
d := NewReverseDNSCache(10*time.Second, 10*time.Second, -1)
|
||||
defer d.Stop()
|
||||
|
||||
d.Resolver = &timeoutResolver{}
|
||||
_, err := d.Lookup("127.0.0.1")
|
||||
require.Error(t, err)
|
||||
require.EqualValues(t, 1, d.Stats().RequestsAbandoned)
|
||||
}
|
||||
|
||||
type timeoutResolver struct{}
|
||||
|
||||
func (*timeoutResolver) LookupAddr(context.Context, string) (names []string, err error) {
|
||||
return nil, errors.New("timeout")
|
||||
}
|
||||
|
||||
type localResolver struct{}
|
||||
|
||||
func (*localResolver) LookupAddr(context.Context, string) (names []string, err error) {
|
||||
return []string{"localhost"}, nil
|
||||
}
|
||||
|
||||
// blockAllWorkers is a test function that eats up all the worker pool space to
|
||||
// make sure workers are done running and there's no room to acquire a new worker.
|
||||
func blockAllWorkers(testContext context.Context, d *ReverseDNSCache) error {
|
||||
return d.sem.Acquire(testContext, int64(d.maxWorkers))
|
||||
}
|
109
plugins/processors/reverse_dns/reverse_dns.go
Normal file
109
plugins/processors/reverse_dns/reverse_dns.go
Normal file
|
@ -0,0 +1,109 @@
|
|||
//go:generate ../../../tools/readme_config_includer/generator
|
||||
package reverse_dns
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/config"
|
||||
"github.com/influxdata/telegraf/plugins/common/parallel"
|
||||
"github.com/influxdata/telegraf/plugins/processors"
|
||||
)
|
||||
|
||||
//go:embed sample.conf
|
||||
var sampleConfig string
|
||||
|
||||
type lookupEntry struct {
|
||||
Tag string `toml:"tag"`
|
||||
Field string `toml:"field"`
|
||||
Dest string `toml:"dest"`
|
||||
}
|
||||
|
||||
type ReverseDNS struct {
|
||||
reverseDNSCache *ReverseDNSCache
|
||||
acc telegraf.Accumulator
|
||||
parallel parallel.Parallel
|
||||
|
||||
Lookups []lookupEntry `toml:"lookup"`
|
||||
CacheTTL config.Duration `toml:"cache_ttl"`
|
||||
LookupTimeout config.Duration `toml:"lookup_timeout"`
|
||||
MaxParallelLookups int `toml:"max_parallel_lookups"`
|
||||
Ordered bool `toml:"ordered"`
|
||||
Log telegraf.Logger `toml:"-"`
|
||||
}
|
||||
|
||||
func (*ReverseDNS) SampleConfig() string {
|
||||
return sampleConfig
|
||||
}
|
||||
|
||||
func (r *ReverseDNS) Start(acc telegraf.Accumulator) error {
|
||||
r.acc = acc
|
||||
r.reverseDNSCache = NewReverseDNSCache(
|
||||
time.Duration(r.CacheTTL),
|
||||
time.Duration(r.LookupTimeout),
|
||||
r.MaxParallelLookups, // max parallel reverse-dns lookups
|
||||
)
|
||||
if r.Ordered {
|
||||
r.parallel = parallel.NewOrdered(acc, r.asyncAdd, 10000, r.MaxParallelLookups)
|
||||
} else {
|
||||
r.parallel = parallel.NewUnordered(acc, r.asyncAdd, r.MaxParallelLookups)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *ReverseDNS) Stop() {
|
||||
r.parallel.Stop()
|
||||
r.reverseDNSCache.Stop()
|
||||
}
|
||||
|
||||
func (r *ReverseDNS) Add(metric telegraf.Metric, _ telegraf.Accumulator) error {
|
||||
r.parallel.Enqueue(metric)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *ReverseDNS) asyncAdd(metric telegraf.Metric) []telegraf.Metric {
|
||||
for _, lookup := range r.Lookups {
|
||||
if len(lookup.Field) > 0 {
|
||||
if ipField, ok := metric.GetField(lookup.Field); ok {
|
||||
if ip, ok := ipField.(string); ok {
|
||||
result, err := r.reverseDNSCache.Lookup(ip)
|
||||
if err != nil {
|
||||
r.Log.Errorf("lookup error: %v", err)
|
||||
continue
|
||||
}
|
||||
if len(result) > 0 {
|
||||
metric.AddField(lookup.Dest, result[0])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(lookup.Tag) > 0 {
|
||||
if ipTag, ok := metric.GetTag(lookup.Tag); ok {
|
||||
result, err := r.reverseDNSCache.Lookup(ipTag)
|
||||
if err != nil {
|
||||
r.Log.Errorf("lookup error: %v", err)
|
||||
continue
|
||||
}
|
||||
if len(result) > 0 {
|
||||
metric.AddTag(lookup.Dest, result[0])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return []telegraf.Metric{metric}
|
||||
}
|
||||
|
||||
func init() {
|
||||
processors.AddStreaming("reverse_dns", func() telegraf.StreamingProcessor {
|
||||
return newReverseDNS()
|
||||
})
|
||||
}
|
||||
|
||||
func newReverseDNS() *ReverseDNS {
|
||||
return &ReverseDNS{
|
||||
CacheTTL: config.Duration(24 * time.Hour),
|
||||
LookupTimeout: config.Duration(1 * time.Minute),
|
||||
MaxParallelLookups: 10,
|
||||
}
|
||||
}
|
133
plugins/processors/reverse_dns/reverse_dns_test.go
Normal file
133
plugins/processors/reverse_dns/reverse_dns_test.go
Normal file
|
@ -0,0 +1,133 @@
|
|||
package reverse_dns
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/config"
|
||||
"github.com/influxdata/telegraf/metric"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
)
|
||||
|
||||
func TestSimpleReverseLookupIntegration(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("Skipping integration test in short mode")
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
m := metric.New("name", map[string]string{
|
||||
"dest_ip": "1.1.1.1",
|
||||
}, map[string]interface{}{
|
||||
"source_ip": "127.0.0.1",
|
||||
}, now)
|
||||
|
||||
dns := newReverseDNS()
|
||||
dns.Log = &testutil.Logger{}
|
||||
dns.Lookups = []lookupEntry{
|
||||
{
|
||||
Field: "source_ip",
|
||||
Dest: "source_name",
|
||||
},
|
||||
{
|
||||
Tag: "dest_ip",
|
||||
Dest: "dest_name",
|
||||
},
|
||||
}
|
||||
acc := &testutil.Accumulator{}
|
||||
err := dns.Start(acc)
|
||||
require.NoError(t, err)
|
||||
err = dns.Add(m, acc)
|
||||
require.NoError(t, err)
|
||||
dns.Stop()
|
||||
// should be processed now.
|
||||
|
||||
require.Len(t, acc.GetTelegrafMetrics(), 1)
|
||||
processedMetric := acc.GetTelegrafMetrics()[0]
|
||||
_, ok := processedMetric.GetField("source_name")
|
||||
require.True(t, ok)
|
||||
tag, ok := processedMetric.GetTag("dest_name")
|
||||
require.True(t, ok)
|
||||
require.EqualValues(t, "one.one.one.one.", tag)
|
||||
}
|
||||
|
||||
func TestTracking(t *testing.T) {
|
||||
inputRaw := []telegraf.Metric{
|
||||
metric.New("foo", map[string]string{}, map[string]interface{}{"ip": "1.1.1.1"}, time.Unix(0, 0)),
|
||||
metric.New("bar", map[string]string{}, map[string]interface{}{"ip": "1.1.1.1"}, time.Unix(0, 0)),
|
||||
metric.New("baz", map[string]string{}, map[string]interface{}{"ip": "1.1.1.1"}, time.Unix(0, 0)),
|
||||
}
|
||||
|
||||
var mu sync.Mutex
|
||||
delivered := make([]telegraf.DeliveryInfo, 0, len(inputRaw))
|
||||
notify := func(di telegraf.DeliveryInfo) {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
delivered = append(delivered, di)
|
||||
}
|
||||
|
||||
input := make([]telegraf.Metric, 0, len(inputRaw))
|
||||
for _, m := range inputRaw {
|
||||
tm, _ := metric.WithTracking(m, notify)
|
||||
input = append(input, tm)
|
||||
}
|
||||
|
||||
expected := []telegraf.Metric{
|
||||
metric.New(
|
||||
"foo",
|
||||
map[string]string{},
|
||||
map[string]interface{}{"ip": "1.1.1.1", "name": "one.one.one.one."},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
metric.New(
|
||||
"bar",
|
||||
map[string]string{},
|
||||
map[string]interface{}{"ip": "1.1.1.1", "name": "one.one.one.one."},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
metric.New(
|
||||
"baz",
|
||||
map[string]string{},
|
||||
map[string]interface{}{"ip": "1.1.1.1", "name": "one.one.one.one."},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
}
|
||||
|
||||
plugin := &ReverseDNS{
|
||||
CacheTTL: config.Duration(24 * time.Hour),
|
||||
LookupTimeout: config.Duration(1 * time.Minute),
|
||||
MaxParallelLookups: 10,
|
||||
Log: &testutil.Logger{},
|
||||
Lookups: []lookupEntry{
|
||||
{
|
||||
Field: "ip",
|
||||
Dest: "name",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// Process expected metrics and compare with resulting metrics
|
||||
acc := &testutil.Accumulator{}
|
||||
require.NoError(t, plugin.Start(acc))
|
||||
for _, m := range input {
|
||||
require.NoError(t, plugin.Add(m, acc))
|
||||
}
|
||||
plugin.Stop()
|
||||
actual := acc.GetTelegrafMetrics()
|
||||
testutil.RequireMetricsEqual(t, expected, actual, testutil.SortMetrics())
|
||||
|
||||
// Simulate output acknowledging delivery
|
||||
for _, m := range actual {
|
||||
m.Accept()
|
||||
}
|
||||
|
||||
// Check delivery
|
||||
require.Eventuallyf(t, func() bool {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
return len(input) == len(delivered)
|
||||
}, time.Second, 100*time.Millisecond, "%d delivered but %d expected", len(delivered), len(expected))
|
||||
}
|
46
plugins/processors/reverse_dns/sample.conf
Normal file
46
plugins/processors/reverse_dns/sample.conf
Normal file
|
@ -0,0 +1,46 @@
|
|||
# ReverseDNS does a reverse lookup on IP addresses to retrieve the DNS name
|
||||
[[processors.reverse_dns]]
|
||||
## For optimal performance, you may want to limit which metrics are passed to this
|
||||
## processor. eg:
|
||||
## namepass = ["my_metric_*"]
|
||||
|
||||
## cache_ttl is how long the dns entries should stay cached for.
|
||||
## generally longer is better, but if you expect a large number of diverse lookups
|
||||
## you'll want to consider memory use.
|
||||
cache_ttl = "24h"
|
||||
|
||||
## lookup_timeout is how long should you wait for a single dns request to respond.
|
||||
## this is also the maximum acceptable latency for a metric travelling through
|
||||
## the reverse_dns processor. After lookup_timeout is exceeded, a metric will
|
||||
## be passed on unaltered.
|
||||
## multiple simultaneous resolution requests for the same IP will only make a
|
||||
## single rDNS request, and they will all wait for the answer for this long.
|
||||
lookup_timeout = "3s"
|
||||
|
||||
## max_parallel_lookups is the maximum number of dns requests to be in flight
|
||||
## at the same time. Requesting hitting cached values do not count against this
|
||||
## total, and neither do mulptiple requests for the same IP.
|
||||
## It's probably best to keep this number fairly low.
|
||||
max_parallel_lookups = 10
|
||||
|
||||
## ordered controls whether or not the metrics need to stay in the same order
|
||||
## this plugin received them in. If false, this plugin will change the order
|
||||
## with requests hitting cached results moving through immediately and not
|
||||
## waiting on slower lookups. This may cause issues for you if you are
|
||||
## depending on the order of metrics staying the same. If so, set this to true.
|
||||
## keeping the metrics ordered may be slightly slower.
|
||||
ordered = false
|
||||
|
||||
[[processors.reverse_dns.lookup]]
|
||||
## get the ip from the field "source_ip", and put the result in the field "source_name"
|
||||
field = "source_ip"
|
||||
dest = "source_name"
|
||||
|
||||
[[processors.reverse_dns.lookup]]
|
||||
## get the ip from the tag "destination_ip", and put the result in the tag
|
||||
## "destination_name".
|
||||
tag = "destination_ip"
|
||||
dest = "destination_name"
|
||||
|
||||
## If you would prefer destination_name to be a field instead, you can use a
|
||||
## processors.converter after this one, specifying the order attribute.
|
Loading…
Add table
Add a link
Reference in a new issue