Adding upstream version 1.4.2.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
e344d0b8ae
commit
1ea3e103a7
77 changed files with 5760 additions and 0 deletions
dsc_datatool
484
dsc_datatool/__init__.py
Normal file
484
dsc_datatool/__init__.py
Normal file
|
@ -0,0 +1,484 @@
|
|||
"""dsc_datatool
|
||||
|
||||
The main Python module for the command line tool `dsc-datatool`, see
|
||||
`man dsc-datatool` on how to run it.
|
||||
|
||||
On runtime it will load all plugins under the following module path:
|
||||
- dsc_datatool.input
|
||||
- dsc_datatool.output
|
||||
- dsc_datatool.generator
|
||||
- dsc_datatool.transformer
|
||||
|
||||
Each plugin category should base it class on one of the follow superclasses:
|
||||
- dsc_datatool.Input
|
||||
- dsc_datatool.Output
|
||||
- dsc_datatool.Generator
|
||||
- dsc_datatool.Transformer
|
||||
|
||||
Doing so it will be automatically registered as available and indexed in
|
||||
the following public dicts using the class name:
|
||||
- inputs
|
||||
- outputs
|
||||
- generators
|
||||
- transformers
|
||||
|
||||
Example of an output:
|
||||
|
||||
from dsc_datatool import Output
|
||||
class ExampleOutput(Output):
|
||||
def process(self, datasets)
|
||||
...
|
||||
|
||||
:copyright: 2024 OARC, Inc.
|
||||
"""
|
||||
|
||||
__version__ = '1.4.2'
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import importlib
|
||||
import pkgutil
|
||||
import sys
|
||||
import traceback
|
||||
import re
|
||||
|
||||
args = argparse.Namespace()
|
||||
inputs = {}
|
||||
outputs = {}
|
||||
generators = {}
|
||||
transformers = {}
|
||||
process_dataset = {}
|
||||
encoding = 'utf-8'
|
||||
|
||||
|
||||
class Dataset(object):
|
||||
"""A representation of a DSC dataset
|
||||
|
||||
A DSC dataset is one to two dimensional structure where the last
|
||||
dimension holds an array of values and counters.
|
||||
|
||||
It is based on the XML structure of DSC:
|
||||
|
||||
<array name="pcap_stats" dimensions="2" start_time="1563520560" stop_time="1563520620">
|
||||
<dimension number="1" type="ifname"/>
|
||||
<dimension number="2" type="pcap_stat"/>
|
||||
<data>
|
||||
<ifname val="eth0">
|
||||
<pcap_stat val="filter_received" count="5625"/>
|
||||
<pcap_stat val="pkts_captured" count="4894"/>
|
||||
<pcap_stat val="kernel_dropped" count="731"/>
|
||||
</ifname>
|
||||
</data>
|
||||
</array>
|
||||
|
||||
Attributes:
|
||||
- name: The name of the dataset
|
||||
- start_time: The start time of the dataset in seconds
|
||||
- stop_time: The stop time of the dataset in seconds
|
||||
- dimensions: An array with `Dimension`, the first dimension
|
||||
"""
|
||||
name = None
|
||||
start_time = None
|
||||
stop_time = None
|
||||
dimensions = None
|
||||
|
||||
|
||||
def __init__(self):
|
||||
self.dimensions = []
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
return '<Dataset name=%r dimension=%r>' % (self.name, self.dimensions)
|
||||
|
||||
|
||||
class Dimension(object):
|
||||
"""A representation of a DSC dimension
|
||||
|
||||
A DSC dataset dimension which can be the first or second dimension,
|
||||
see `Dataset` for more information.
|
||||
|
||||
Attributes:
|
||||
- name: The name of the dimension
|
||||
- value: Is set to the value of the dimension if it's the first dimension
|
||||
- values: A dict of values with corresponding counters if it's the second dimension
|
||||
"""
|
||||
name = None
|
||||
value = None
|
||||
values = None
|
||||
dimensions = None
|
||||
|
||||
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
self.values = {}
|
||||
self.dimensions = []
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
return '<Dimension name=%r value=%r dimension=%r>' % (self.name, self.values or self.value, self.dimensions)
|
||||
|
||||
|
||||
class Input(object):
|
||||
"""Base class of an input plugin"""
|
||||
|
||||
|
||||
def process(self, file):
|
||||
"""Input.process(...) -> [ Dataset, ... ]
|
||||
|
||||
Called to process a file and return an array of `Dataset`'s found in it.
|
||||
"""
|
||||
raise Exception('process() not overloaded')
|
||||
|
||||
|
||||
def __init_subclass__(cls):
|
||||
"""This method is called when a class is subclassed and it will
|
||||
register the input plugin in `inputs`."""
|
||||
global inputs
|
||||
if cls.__name__ in inputs:
|
||||
raise Exception('Duplicate input module: %s already exists' % cls.__name__)
|
||||
inputs[cls.__name__] = cls
|
||||
|
||||
|
||||
class Output(object):
|
||||
"""Base class of an output plugin"""
|
||||
|
||||
|
||||
def process(self, datasets):
|
||||
"""Output.process([ Dataset, ... ])
|
||||
|
||||
Called to output the `Dataset`'s in the given array."""
|
||||
raise Exception('process() not overloaded')
|
||||
|
||||
|
||||
def __init__(self, opts):
|
||||
"""instance = Output({ 'opt': value, ... })
|
||||
|
||||
Called to create an instance of the output plugin, will get a dict
|
||||
with options provided on command line."""
|
||||
pass
|
||||
|
||||
|
||||
def __init_subclass__(cls):
|
||||
"""This method is called when a class is subclassed and it will
|
||||
register the output plugin in `outputs`."""
|
||||
global outputs
|
||||
if cls.__name__ in outputs:
|
||||
raise Exception('Duplicate output module: %s already exists' % cls.__name__)
|
||||
outputs[cls.__name__] = cls
|
||||
|
||||
|
||||
class Generator(object):
|
||||
"""Base class of a generator plugin"""
|
||||
|
||||
|
||||
def process(self, datasets):
|
||||
"""Generator.process([ Dataset, ... ]) -> [ Dataset, ... ]
|
||||
|
||||
Called to generate additional `Dataset`'s based on the given array
|
||||
of `Dataset`'s."""
|
||||
raise Exception('process() not overloaded')
|
||||
|
||||
|
||||
def __init__(self, opts):
|
||||
"""instance = Generator({ 'opt': value, ... })
|
||||
|
||||
Called to create an instance of the generator plugin, will get a dict
|
||||
with options provided on command line."""
|
||||
pass
|
||||
|
||||
|
||||
def __init_subclass__(cls):
|
||||
"""This method is called when a class is subclassed and it will
|
||||
register the generator plugin in `generators`."""
|
||||
global generators
|
||||
if cls.__name__ in generators:
|
||||
raise Exception('Duplicate generator module: %s already exists' % cls.__name__)
|
||||
generators[cls.__name__] = cls
|
||||
|
||||
|
||||
class Transformer(object):
|
||||
"""Base class of a transformer plugin"""
|
||||
|
||||
|
||||
def process(self, datasets):
|
||||
"""Transformer.process([ Dataset, ... ])
|
||||
|
||||
Called to do transformation of the given `Dataset`'s, as in modifying
|
||||
them directly."""
|
||||
raise Exception('process() not overloaded')
|
||||
|
||||
|
||||
def __init__(self, opts):
|
||||
"""instance = Transformer({ 'opt': value, ... })
|
||||
|
||||
Called to create an instance of the transformer plugin, will get a dict
|
||||
with options provided on command line."""
|
||||
pass
|
||||
|
||||
|
||||
def __init_subclass__(cls):
|
||||
"""This method is called when a class is subclassed and it will
|
||||
register the transformer plugin in `transformers`."""
|
||||
global transformers
|
||||
if cls.__name__ in transformers:
|
||||
raise Exception('Duplicate transformer module: %s already exists' % cls.__name__)
|
||||
transformers[cls.__name__] = cls
|
||||
|
||||
|
||||
def main():
|
||||
"""Called when running `dsc-datatool`."""
|
||||
def iter_namespace(ns_pkg):
|
||||
return pkgutil.iter_modules(ns_pkg.__path__, ns_pkg.__name__ + ".")
|
||||
|
||||
|
||||
def split_arg(arg, num=1):
|
||||
sep = arg[0]
|
||||
p = arg.split(sep)
|
||||
p.pop(0)
|
||||
ret = ()
|
||||
while num > 0:
|
||||
ret += (p.pop(0),)
|
||||
num -= 1
|
||||
ret += (p,)
|
||||
return ret
|
||||
|
||||
|
||||
def parse_opts(opts):
|
||||
ret = {}
|
||||
for opt in opts:
|
||||
p = opt.split('=', maxsplit=1)
|
||||
if len(p) > 1:
|
||||
if p[0] in ret:
|
||||
if isinstance(ret[p[0]], list):
|
||||
ret[p[0]].append(p[1])
|
||||
else:
|
||||
ret[p[0]] = [ ret[p[0]], p[1] ]
|
||||
else:
|
||||
ret[p[0]] = p[1]
|
||||
elif len(p) > 0:
|
||||
ret[p[0]] = True
|
||||
return ret
|
||||
|
||||
|
||||
def _process(datasets, generators, transformers, outputs):
|
||||
gen_datasets = []
|
||||
for generator in generators:
|
||||
try:
|
||||
gen_datasets += generator.process(datasets)
|
||||
except Exception as e:
|
||||
logging.warning('Generator %s failed: %s' % (generator, e))
|
||||
exc_type, exc_value, exc_traceback = sys.exc_info()
|
||||
for tb in traceback.format_tb(exc_traceback):
|
||||
logging.warning(str(tb))
|
||||
return 2
|
||||
|
||||
datasets += gen_datasets
|
||||
|
||||
if '*' in transformers:
|
||||
for transformer in transformers['*']:
|
||||
try:
|
||||
transformer.process(datasets)
|
||||
except Exception as e:
|
||||
logging.warning('Transformer %s failed: %s' % (transformer, e))
|
||||
exc_type, exc_value, exc_traceback = sys.exc_info()
|
||||
for tb in traceback.format_tb(exc_traceback):
|
||||
logging.warning(str(tb))
|
||||
return 2
|
||||
for dataset in datasets:
|
||||
if dataset.name in transformers:
|
||||
for transformer in transformers[dataset.name]:
|
||||
try:
|
||||
transformer.process([dataset])
|
||||
except Exception as e:
|
||||
logging.warning('Transformer %s failed: %s' % (transformer, e))
|
||||
exc_type, exc_value, exc_traceback = sys.exc_info()
|
||||
for tb in traceback.format_tb(exc_traceback):
|
||||
logging.warning(str(tb))
|
||||
return 2
|
||||
|
||||
for output in outputs:
|
||||
try:
|
||||
output.process(datasets)
|
||||
except Exception as e:
|
||||
logging.warning('Output %s failed: %s' % (output, e))
|
||||
exc_type, exc_value, exc_traceback = sys.exc_info()
|
||||
for tb in traceback.format_tb(exc_traceback):
|
||||
logging.warning(str(tb))
|
||||
return 2
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
global args, inputs, outputs, generators, transformers, process_dataset
|
||||
|
||||
parser = argparse.ArgumentParser(prog='dsc-datatool',
|
||||
description='Export DSC data into various formats and databases.',
|
||||
epilog='See man-page dsc-datatool(1) and dsc-datatool-[generator|transformer|output] <name>(5) for more information')
|
||||
parser.add_argument('-c', '--conf', nargs=1,
|
||||
help='Not implemented')
|
||||
# help='Specify the YAML configuration file to use (default to ~/.dsc-datatool.conf), any command line option will override the options in the configuration file. See dsc-datatool.conf(5)for more information.')
|
||||
parser.add_argument('-s', '--server', nargs=1,
|
||||
help='Specify the server for where the data comes from. (required)')
|
||||
parser.add_argument('-n', '--node', nargs=1,
|
||||
help='Specify the node for where the data comes from. (required)')
|
||||
parser.add_argument('-x', '--xml', action='append',
|
||||
help='Read DSC data from the given file or directory, can be specified multiple times. If a directory is given then all files ending with .xml will be read.')
|
||||
parser.add_argument('-d', '--dat', action='append',
|
||||
help='Read DSC data from the given directory, can be specified multiple times. Note that the DAT format is depended on the filename to know what type of data it is.')
|
||||
parser.add_argument('--dataset', action='append',
|
||||
help='Specify that only the list of datasets will be processed, the list is comma separated and the option can be given multiple times.')
|
||||
parser.add_argument('-o', '--output', action='append',
|
||||
help='"<sep><output>[<sep>option=value...]>" Output data to <output> and use <separator> as an options separator.')
|
||||
parser.add_argument('-t', '--transform', action='append',
|
||||
help='"<sep><name><sep><datasets>[<sep>option=value...]>" Use the transformer <name> to change the list of datasets in <datasets>.')
|
||||
parser.add_argument('-g', '--generator', action='append',
|
||||
help='"<name>[,<name>,...]" or "<sep><name>[<sep>option=value...]>" Use the specified generators to generate additional datasets.')
|
||||
parser.add_argument('--list', action='store_true',
|
||||
help='List the available generators, transformers and outputs then exit.')
|
||||
parser.add_argument('--skipped-key', nargs=1, default='-:SKIPPED:-',
|
||||
help='Set the special DSC skipped key. (default to "-:SKIPPED:-")')
|
||||
parser.add_argument('--skipped-sum-key', nargs=1, default='-:SKIPPED_SUM:-',
|
||||
help='Set the special DSC skipped sum key. (default to "-:SKIPPED_SUM:-")')
|
||||
parser.add_argument('--encoding', nargs=1, default='utf-8',
|
||||
help='Encoding to use for all files, default utf-8.')
|
||||
parser.add_argument('-v', '--verbose', action='count', default=0,
|
||||
help='Increase the verbose level, can be given multiple times.')
|
||||
parser.add_argument('-V', '--version', action='version', version='%(prog)s v'+__version__,
|
||||
help='Display version and exit.')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
log_level = 30 - (args.verbose * 10)
|
||||
if log_level < 0:
|
||||
log_level = 0
|
||||
logging.basicConfig(format='%(asctime)s %(levelname)s %(module)s: %(message)s', level=log_level, stream=sys.stderr)
|
||||
|
||||
import dsc_datatool.input
|
||||
import dsc_datatool.output
|
||||
import dsc_datatool.generator
|
||||
import dsc_datatool.transformer
|
||||
|
||||
for finder, name, ispkg in iter_namespace(dsc_datatool.input):
|
||||
importlib.import_module(name)
|
||||
for finder, name, ispkg in iter_namespace(dsc_datatool.output):
|
||||
importlib.import_module(name)
|
||||
for finder, name, ispkg in iter_namespace(dsc_datatool.generator):
|
||||
importlib.import_module(name)
|
||||
for finder, name, ispkg in iter_namespace(dsc_datatool.transformer):
|
||||
importlib.import_module(name)
|
||||
|
||||
if args.list:
|
||||
print('Generators:')
|
||||
for name in generators:
|
||||
print('',name)
|
||||
print('Transformers:')
|
||||
for name in transformers:
|
||||
print('',name)
|
||||
print('Outputs:')
|
||||
for name in outputs:
|
||||
print('',name)
|
||||
return 0
|
||||
|
||||
if not args.server or not args.node:
|
||||
raise Exception('--server and --node must be given')
|
||||
|
||||
if isinstance(args.server, list):
|
||||
args.server = ' '.join(args.server)
|
||||
elif not isinstance(args.server, str):
|
||||
raise Exception('Invalid argument for --server: %r' % args.server)
|
||||
if isinstance(args.node, list):
|
||||
args.node = ' '.join(args.node)
|
||||
elif not isinstance(args.node, str):
|
||||
raise Exception('Invalid argument for --node: %r' % args.node)
|
||||
|
||||
gens = []
|
||||
if args.generator:
|
||||
for arg in args.generator:
|
||||
if not re.match(r'^\w', arg):
|
||||
name, opts = split_arg(arg)
|
||||
if not name in generators:
|
||||
logging.critical('Generator %s does not exist' % name)
|
||||
return 1
|
||||
gens.append(generators[name](parse_opts(opts)))
|
||||
continue
|
||||
for name in arg.split(','):
|
||||
if not name in generators:
|
||||
logging.critical('Generator %s does not exist' % name)
|
||||
return 1
|
||||
gens.append(generators[name]({}))
|
||||
|
||||
trans = {}
|
||||
if args.transform:
|
||||
for arg in args.transform:
|
||||
name, datasets, opts = split_arg(arg, num=2)
|
||||
if not name in transformers:
|
||||
logging.critical('Transformer %s does not exist' % name)
|
||||
return 1
|
||||
for dataset in datasets.split(','):
|
||||
if not dataset in trans:
|
||||
trans[dataset] = []
|
||||
trans[dataset].append(transformers[name](parse_opts(opts)))
|
||||
|
||||
out = []
|
||||
if args.output:
|
||||
for arg in args.output:
|
||||
name, opts = split_arg(arg)
|
||||
if not name in outputs:
|
||||
logging.critical('Output %s does not exist' % name)
|
||||
return 1
|
||||
out.append(outputs[name](parse_opts(opts)))
|
||||
|
||||
if args.dataset:
|
||||
for dataset in args.dataset:
|
||||
for p in dataset.split(','):
|
||||
process_dataset[p] = True
|
||||
|
||||
xml = []
|
||||
if args.xml:
|
||||
for entry in args.xml:
|
||||
if os.path.isfile(entry):
|
||||
xml.append(entry)
|
||||
elif os.path.isdir(entry):
|
||||
with os.scandir(entry) as dir:
|
||||
for file in dir:
|
||||
if not file.name.startswith('.') and file.is_file() and file.name.lower().endswith('.xml'):
|
||||
xml.append(file.path)
|
||||
else:
|
||||
logging.error('--xml %r is not a file or directory' % entry)
|
||||
|
||||
dat = []
|
||||
if args.dat:
|
||||
for entry in args.dat:
|
||||
if os.path.isdir(entry):
|
||||
dat.append(entry)
|
||||
else:
|
||||
logging.error('--dat %r is not a directory' % entry)
|
||||
|
||||
if not xml and not dat:
|
||||
logging.error('No valid --xml or --dat given')
|
||||
return 1
|
||||
|
||||
xml_input = inputs['XML']()
|
||||
for file in xml:
|
||||
try:
|
||||
datasets = xml_input.process(file)
|
||||
except Exception as e:
|
||||
logging.critical('Unable to process XML file %s: %s' % (file, e))
|
||||
return 1
|
||||
|
||||
ret = _process(datasets, gens, trans, out)
|
||||
if ret > 0:
|
||||
return ret
|
||||
|
||||
dat_input = inputs['DAT']()
|
||||
for dir in dat:
|
||||
try:
|
||||
datasets = dat_input.process(dir)
|
||||
except Exception as e:
|
||||
logging.critical('Unable to process DAT files in %s: %s' % (dir, e))
|
||||
return 1
|
||||
|
||||
ret = _process(datasets, gens, trans, out)
|
||||
if ret > 0:
|
||||
return ret
|
181
dsc_datatool/generator/client_subnet_authority.py
Normal file
181
dsc_datatool/generator/client_subnet_authority.py
Normal file
|
@ -0,0 +1,181 @@
|
|||
"""dsc_datatool.generator.client_subnet_authority
|
||||
|
||||
See `man dsc-datatool-generator client_subnet_authority`.
|
||||
|
||||
Part of dsc_datatool.
|
||||
|
||||
:copyright: 2024 OARC, Inc.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import ipaddress
|
||||
import logging
|
||||
from urllib.request import Request, urlopen
|
||||
from io import StringIO
|
||||
|
||||
from dsc_datatool import Generator, Dataset, Dimension, args, encoding
|
||||
|
||||
|
||||
_whois2rir = {
|
||||
'whois.apnic.net': 'APNIC',
|
||||
'whois.arin.net': 'ARIN',
|
||||
'whois.ripe.net': 'RIPE NCC',
|
||||
'whois.lacnic.net': 'LACNIC',
|
||||
'whois.afrinic.net': 'AFRINIC',
|
||||
}
|
||||
|
||||
_desig2rir = {
|
||||
'apnic': 'APNIC',
|
||||
'arin': 'ARIN',
|
||||
'ripe ncc': 'RIPE NCC',
|
||||
'lacnic': 'LACNIC',
|
||||
'afrinic': 'AFRINIC',
|
||||
'iana': 'IANA',
|
||||
'6to4': 'IANA',
|
||||
}
|
||||
|
||||
class client_subnet_authority(Generator):
|
||||
auth = None
|
||||
nonstrict = False
|
||||
|
||||
|
||||
def _read(self, input):
|
||||
global _whois2rir, _desig2rir
|
||||
for row in csv.reader(input):
|
||||
prefix, designation, date, whois, rdap, status, note = row
|
||||
if prefix == 'Prefix':
|
||||
continue
|
||||
rir = designation.replace('Administered by ', '').lower()
|
||||
|
||||
whois = whois.lower()
|
||||
if whois in _whois2rir:
|
||||
rir = _whois2rir[whois]
|
||||
else:
|
||||
if rir in _desig2rir:
|
||||
rir = _desig2rir[rir]
|
||||
else:
|
||||
found = None
|
||||
for k, v in _desig2rir.items():
|
||||
if k in rir:
|
||||
found = v
|
||||
break
|
||||
if found:
|
||||
rir = found
|
||||
else:
|
||||
if status == 'RESERVED':
|
||||
rir = 'IANA'
|
||||
elif designation == 'Segment Routing (SRv6) SIDs':
|
||||
# TODO: How to better handle this weird allocation?
|
||||
rir = 'IANA'
|
||||
else:
|
||||
raise Exception('Unknown whois/designation: %r/%r' % (whois, designation))
|
||||
|
||||
try:
|
||||
net = ipaddress.ip_network(prefix)
|
||||
except Exception:
|
||||
ip, net = prefix.split('/')
|
||||
net = ipaddress.ip_network('%s.0.0.0/%s' % (int(ip), net))
|
||||
|
||||
if net.version == 4:
|
||||
idx = ipaddress.ip_network('%s/8' % net.network_address, strict=False)
|
||||
else:
|
||||
idx = ipaddress.ip_network('%s/24' % net.network_address, strict=False)
|
||||
|
||||
if idx.network_address in self.auth:
|
||||
self.auth[idx.network_address].append({'net': net, 'auth': rir})
|
||||
else:
|
||||
self.auth[idx.network_address] = [{'net': net, 'auth': rir}]
|
||||
|
||||
|
||||
def __init__(self, opts):
|
||||
Generator.__init__(self, opts)
|
||||
self.auth = {}
|
||||
csvs = opts.get('csv', None)
|
||||
urlv4 = opts.get('urlv4', 'https://www.iana.org/assignments/ipv4-address-space/ipv4-address-space.csv')
|
||||
urlv6 = opts.get('urlv6', 'https://www.iana.org/assignments/ipv6-unicast-address-assignments/ipv6-unicast-address-assignments.csv')
|
||||
if opts.get('nonstrict', False):
|
||||
self.nonstrict = True
|
||||
|
||||
if csvs:
|
||||
if not isinstance(csvs, list):
|
||||
csvs = [ csvs ]
|
||||
for file in csvs:
|
||||
with open(file, newline='', encoding=encoding) as csvfile:
|
||||
self._read(csvfile)
|
||||
elif opts.get('fetch', 'no').lower() == 'yes':
|
||||
urls = opts.get('url', [ urlv4, urlv6 ])
|
||||
if urls and not isinstance(urls, list):
|
||||
urls = [ urls ]
|
||||
logging.info('bootstrapping client subnet authority using URLs')
|
||||
for url in urls:
|
||||
logging.info('fetching %s' % url)
|
||||
self._read(StringIO(urlopen(Request(url)).read().decode('utf-8')))
|
||||
else:
|
||||
raise Exception('No authorities bootstrapped, please specify csv= or fetch=yes')
|
||||
|
||||
|
||||
def process(self, datasets):
|
||||
gen_datasets = []
|
||||
|
||||
for dataset in datasets:
|
||||
if dataset.name != 'client_subnet':
|
||||
continue
|
||||
|
||||
subnets = {}
|
||||
for d1 in dataset.dimensions:
|
||||
for d2 in d1.dimensions:
|
||||
for k, v in d2.values.items():
|
||||
if k == args.skipped_key:
|
||||
continue
|
||||
elif k == args.skipped_sum_key:
|
||||
continue
|
||||
|
||||
if k in subnets:
|
||||
subnets[k] += v
|
||||
else:
|
||||
subnets[k] = v
|
||||
|
||||
auth = {}
|
||||
for subnet in subnets:
|
||||
try:
|
||||
ip = ipaddress.ip_address(subnet)
|
||||
except Exception as e:
|
||||
if not self.nonstrict:
|
||||
raise e
|
||||
continue
|
||||
if ip.version == 4:
|
||||
idx = ipaddress.ip_network('%s/8' % ip, strict=False)
|
||||
ip = ipaddress.ip_network('%s/32' % ip)
|
||||
else:
|
||||
idx = ipaddress.ip_network('%s/24' % ip, strict=False)
|
||||
ip = ipaddress.ip_network('%s/128' % ip)
|
||||
if not idx.network_address in self.auth:
|
||||
idx = '??'
|
||||
else:
|
||||
for entry in self.auth[idx.network_address]:
|
||||
if entry['net'].overlaps(ip):
|
||||
idx = entry['auth']
|
||||
break
|
||||
|
||||
if idx in auth:
|
||||
auth[idx] += subnets[subnet]
|
||||
else:
|
||||
auth[idx] = subnets[subnet]
|
||||
|
||||
if auth:
|
||||
authd = Dataset()
|
||||
authd.name = 'client_subnet_authority'
|
||||
authd.start_time = dataset.start_time
|
||||
authd.stop_time = dataset.stop_time
|
||||
gen_datasets.append(authd)
|
||||
|
||||
authd1 = Dimension('ClientAuthority')
|
||||
authd1.values = auth
|
||||
authd.dimensions.append(authd1)
|
||||
|
||||
return gen_datasets
|
||||
|
||||
|
||||
import sys
|
||||
if sys.version_info[0] == 3 and sys.version_info[1] == 5: # pragma: no cover
|
||||
Generator.__init_subclass__(client_subnet_authority)
|
98
dsc_datatool/generator/client_subnet_country.py
Normal file
98
dsc_datatool/generator/client_subnet_country.py
Normal file
|
@ -0,0 +1,98 @@
|
|||
"""dsc_datatool.generator.client_subnet_country
|
||||
|
||||
See `man dsc-datatool-generator client_subnet_country`.
|
||||
|
||||
Part of dsc_datatool.
|
||||
|
||||
:copyright: 2024 OARC, Inc.
|
||||
"""
|
||||
|
||||
import maxminddb
|
||||
import os
|
||||
import logging
|
||||
|
||||
from dsc_datatool import Generator, Dataset, Dimension, args
|
||||
|
||||
|
||||
class client_subnet_country(Generator):
|
||||
reader = None
|
||||
nonstrict = False
|
||||
|
||||
|
||||
def __init__(self, opts):
|
||||
Generator.__init__(self, opts)
|
||||
paths = opts.get('path', ['/var/lib/GeoIP', '/usr/share/GeoIP', '/usr/local/share/GeoIP'])
|
||||
if not isinstance(paths, list):
|
||||
paths = [ paths ]
|
||||
filename = opts.get('filename', 'GeoLite2-Country.mmdb')
|
||||
db = opts.get('db', None)
|
||||
|
||||
if db is None:
|
||||
for path in paths:
|
||||
db = '%s/%s' % (path, filename)
|
||||
if os.path.isfile(db) and os.access(db, os.R_OK):
|
||||
break
|
||||
db = None
|
||||
if db is None:
|
||||
raise Exception('Please specify valid Maxmind database with path=,filename= or db=')
|
||||
|
||||
logging.info('Using %s' % db)
|
||||
self.reader = maxminddb.open_database(db)
|
||||
|
||||
if opts.get('nonstrict', False):
|
||||
self.nonstrict = True
|
||||
|
||||
|
||||
def process(self, datasets):
|
||||
gen_datasets = []
|
||||
|
||||
for dataset in datasets:
|
||||
if dataset.name != 'client_subnet':
|
||||
continue
|
||||
|
||||
subnets = {}
|
||||
for d1 in dataset.dimensions:
|
||||
for d2 in d1.dimensions:
|
||||
for k, v in d2.values.items():
|
||||
if k == args.skipped_key:
|
||||
continue
|
||||
elif k == args.skipped_sum_key:
|
||||
continue
|
||||
|
||||
if k in subnets:
|
||||
subnets[k] += v
|
||||
else:
|
||||
subnets[k] = v
|
||||
|
||||
cc = {}
|
||||
for subnet in subnets:
|
||||
try:
|
||||
c = self.reader.get(subnet)
|
||||
except Exception as e:
|
||||
if not self.nonstrict:
|
||||
raise e
|
||||
continue
|
||||
if c:
|
||||
iso_code = c.get('country', {}).get('iso_code', '??')
|
||||
if iso_code in cc:
|
||||
cc[iso_code] += subnets[subnet]
|
||||
else:
|
||||
cc[iso_code] = subnets[subnet]
|
||||
|
||||
if cc:
|
||||
ccd = Dataset()
|
||||
ccd.name = 'client_subnet_country'
|
||||
ccd.start_time = dataset.start_time
|
||||
ccd.stop_time = dataset.stop_time
|
||||
gen_datasets.append(ccd)
|
||||
|
||||
ccd1 = Dimension('ClientCountry')
|
||||
ccd1.values = cc
|
||||
ccd.dimensions.append(ccd1)
|
||||
|
||||
return gen_datasets
|
||||
|
||||
|
||||
import sys
|
||||
if sys.version_info[0] == 3 and sys.version_info[1] == 5: # pragma: no cover
|
||||
Generator.__init_subclass__(client_subnet_country)
|
177
dsc_datatool/input/dat.py
Normal file
177
dsc_datatool/input/dat.py
Normal file
|
@ -0,0 +1,177 @@
|
|||
"""dsc_datatool.input.dat
|
||||
|
||||
Input plugin to generate `Dataset`'s from DSC DAT files.
|
||||
|
||||
Part of dsc_datatool.
|
||||
|
||||
:copyright: 2024 OARC, Inc.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
from dsc_datatool import Input, Dataset, Dimension, process_dataset, encoding
|
||||
|
||||
|
||||
_dataset1d = [
|
||||
'client_subnet_count',
|
||||
'ipv6_rsn_abusers_count',
|
||||
]
|
||||
|
||||
_dataset2d = {
|
||||
'qtype': 'Qtype',
|
||||
'rcode': 'Rcode',
|
||||
'do_bit': 'D0',
|
||||
'rd_bit': 'RD',
|
||||
'opcode': 'Opcode',
|
||||
'dnssec_qtype': 'Qtype',
|
||||
'edns_version': 'EDNSVersion',
|
||||
'client_subnet2_count': 'Class',
|
||||
'client_subnet2_trace': 'Class',
|
||||
'edns_bufsiz': 'EDNSBufSiz',
|
||||
'idn_qname': 'IDNQname',
|
||||
'client_port_range': 'PortRange',
|
||||
'priming_responses': 'ReplyLen',
|
||||
}
|
||||
|
||||
_dataset3d = {
|
||||
'chaos_types_and_names': [ 'Qtype', 'Qname' ],
|
||||
'certain_qnames_vs_qtype': [ 'CertainQnames', 'Qtype' ],
|
||||
'direction_vs_ipproto': [ 'Direction', 'IPProto' ],
|
||||
'pcap_stats': [ 'pcap_stat', 'ifname' ],
|
||||
'transport_vs_qtype': [ 'Transport', 'Qtype' ],
|
||||
'dns_ip_version': [ 'IPVersion', 'Qtype' ],
|
||||
'priming_queries': [ 'Transport', 'EDNSBufSiz' ],
|
||||
'qr_aa_bits': [ 'Direction', 'QRAABits' ],
|
||||
}
|
||||
|
||||
|
||||
class DAT(Input):
|
||||
def process(self, dir):
|
||||
global _dataset1d, _dataset2d, _dataset3d
|
||||
|
||||
datasets = []
|
||||
|
||||
for d in _dataset1d:
|
||||
if process_dataset and not d in process_dataset:
|
||||
continue
|
||||
try:
|
||||
datasets += self.process1d('%s/%s.dat' % (dir, d), d)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
for k, v in _dataset2d.items():
|
||||
if process_dataset and not k in process_dataset:
|
||||
continue
|
||||
try:
|
||||
datasets += self.process2d('%s/%s.dat' % (dir, k), k, v)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
for k, v in _dataset3d.items():
|
||||
if process_dataset and not k in process_dataset:
|
||||
continue
|
||||
try:
|
||||
datasets += self.process3d('%s/%s.dat' % (dir, k), k, v[0], v[1])
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
return datasets
|
||||
|
||||
|
||||
def process1d(self, file, name):
|
||||
datasets = []
|
||||
with open(file, 'r', encoding=encoding) as f:
|
||||
for l in f.readlines():
|
||||
if re.match(r'^#', l):
|
||||
continue
|
||||
l = re.sub(r'[\r\n]+$', '', l)
|
||||
dat = re.split(r'\s+', l)
|
||||
if len(dat) != 2:
|
||||
raise Exception('DAT %r dataset %r: invalid number of elements for a 1d dataset' % (file, name))
|
||||
|
||||
dataset = Dataset()
|
||||
dataset.name = name
|
||||
dataset.start_time = int(dat.pop(0))
|
||||
dataset.stop_time = dataset.start_time + 60
|
||||
|
||||
d1 = Dimension('All')
|
||||
d1.values = { 'ALL': int(dat[0]) }
|
||||
dataset.dimensions.append(d1)
|
||||
|
||||
datasets.append(dataset)
|
||||
|
||||
return datasets
|
||||
|
||||
|
||||
def process2d(self, file, name, field):
|
||||
datasets = []
|
||||
with open(file, 'r', encoding=encoding) as f:
|
||||
for l in f.readlines():
|
||||
if re.match(r'^#', l):
|
||||
continue
|
||||
l = re.sub(r'[\r\n]+$', '', l)
|
||||
dat = re.split(r'\s+', l)
|
||||
|
||||
dataset = Dataset()
|
||||
dataset.name = name
|
||||
dataset.start_time = int(dat.pop(0))
|
||||
dataset.stop_time = dataset.start_time + 60
|
||||
|
||||
d1 = Dimension('All')
|
||||
d1.value = 'ALL'
|
||||
dataset.dimensions.append(d1)
|
||||
|
||||
d2 = Dimension(field)
|
||||
while dat:
|
||||
if len(dat) < 2:
|
||||
raise Exception('DAT %r dataset %r: invalid number of elements for a 2d dataset' % (file, name))
|
||||
k = dat.pop(0)
|
||||
v = dat.pop(0)
|
||||
d2.values[k] = int(v)
|
||||
d1.dimensions.append(d2)
|
||||
|
||||
datasets.append(dataset)
|
||||
|
||||
return datasets
|
||||
|
||||
|
||||
def process3d(self, file, name, first, second):
|
||||
datasets = []
|
||||
with open(file, 'r', encoding=encoding) as f:
|
||||
for l in f.readlines():
|
||||
if re.match(r'^#', l):
|
||||
continue
|
||||
l = re.sub(r'[\r\n]+$', '', l)
|
||||
dat = re.split(r'\s+', l)
|
||||
|
||||
dataset = Dataset()
|
||||
dataset.name = name
|
||||
dataset.start_time = int(dat.pop(0))
|
||||
dataset.stop_time = dataset.start_time + 60
|
||||
|
||||
while dat:
|
||||
if len(dat) < 2:
|
||||
raise Exception('DAT %r dataset %r: invalid number of elements for a 2d dataset' % (file, name))
|
||||
k = dat.pop(0)
|
||||
v = dat.pop(0)
|
||||
|
||||
d1 = Dimension(first)
|
||||
d1.value = k
|
||||
dataset.dimensions.append(d1)
|
||||
|
||||
d2 = Dimension(second)
|
||||
dat2 = v.split(':')
|
||||
while dat2:
|
||||
if len(dat2) < 2:
|
||||
raise Exception('DAT %r dataset %r: invalid number of elements for a 2d dataset' % (file, name))
|
||||
k2 = dat2.pop(0)
|
||||
v2 = dat2.pop(0)
|
||||
d2.values[k2] = int(v2)
|
||||
d1.dimensions.append(d2)
|
||||
|
||||
datasets.append(dataset)
|
||||
|
||||
return datasets
|
||||
|
||||
|
||||
import sys
|
||||
if sys.version_info[0] == 3 and sys.version_info[1] == 5: # pragma: no cover
|
||||
Input.__init_subclass__(DAT)
|
71
dsc_datatool/input/xml.py
Normal file
71
dsc_datatool/input/xml.py
Normal file
|
@ -0,0 +1,71 @@
|
|||
"""dsc_datatool.input.xml
|
||||
|
||||
Input plugin to generate `Dataset`'s from DSC XML files.
|
||||
|
||||
Part of dsc_datatool.
|
||||
|
||||
:copyright: 2024 OARC, Inc.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from xml.dom import minidom
|
||||
import base64
|
||||
|
||||
from dsc_datatool import Input, Dataset, Dimension, process_dataset
|
||||
|
||||
|
||||
class XML(Input):
|
||||
def process(self, file):
|
||||
dom = minidom.parse(file)
|
||||
datasets = []
|
||||
for array in dom.getElementsByTagName('array'):
|
||||
if process_dataset and not array.getAttribute('name') in process_dataset:
|
||||
continue
|
||||
|
||||
dataset = Dataset()
|
||||
dataset.name = array.getAttribute('name')
|
||||
dataset.start_time = int(array.getAttribute('start_time'))
|
||||
dataset.stop_time = int(array.getAttribute('stop_time'))
|
||||
|
||||
dimensions = [None, None]
|
||||
for dimension in array.getElementsByTagName('dimension'):
|
||||
if dimension.getAttribute('number') == '1':
|
||||
if dimensions[0]:
|
||||
logging.warning('Overwriting dimension 1 for %s' % dataset.name)
|
||||
dimensions[0] = dimension.getAttribute('type')
|
||||
elif dimension.getAttribute('number') == '2':
|
||||
if dimensions[1]:
|
||||
logging.warning('Overwriting dimension 2 for %s' % dataset.name)
|
||||
dimensions[1] = dimension.getAttribute('type')
|
||||
else:
|
||||
logging.warning('Invalid dimension number %r for %s' % (dimension.getAttribute('number'), dataset.name))
|
||||
|
||||
for node1 in array.getElementsByTagName(dimensions[0]):
|
||||
d1 = Dimension(dimensions[0])
|
||||
d1.value = node1.getAttribute('val')
|
||||
try:
|
||||
if node1.getAttribute('base64'):
|
||||
d1.value = base64.b64decode(d1.value).decode('utf-8')
|
||||
except Exception as e:
|
||||
pass
|
||||
dataset.dimensions.append(d1)
|
||||
|
||||
d2 = Dimension(dimensions[1])
|
||||
d1.dimensions.append(d2)
|
||||
for node2 in node1.getElementsByTagName(dimensions[1]):
|
||||
val = node2.getAttribute('val')
|
||||
try:
|
||||
if node2.getAttribute('base64'):
|
||||
val = base64.b64decode(val).decode('utf-8')
|
||||
except Exception as e:
|
||||
pass
|
||||
d2.values[val] = int(node2.getAttribute('count'))
|
||||
|
||||
datasets.append(dataset)
|
||||
|
||||
return datasets
|
||||
|
||||
|
||||
import sys
|
||||
if sys.version_info[0] == 3 and sys.version_info[1] == 5: # pragma: no cover
|
||||
Input.__init_subclass__(XML)
|
103
dsc_datatool/output/influxdb.py
Normal file
103
dsc_datatool/output/influxdb.py
Normal file
|
@ -0,0 +1,103 @@
|
|||
"""dsc_datatool.output.influxdb
|
||||
|
||||
See `man dsc-datatool-output influxdb`.
|
||||
|
||||
Part of dsc_datatool.
|
||||
|
||||
:copyright: 2024 OARC, Inc.
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
import atexit
|
||||
|
||||
from dsc_datatool import Output, args, encoding
|
||||
|
||||
|
||||
_re = re.compile(r'([,=\\\s])')
|
||||
|
||||
|
||||
def _key(key):
|
||||
return re.sub(_re, r'\\\1', key)
|
||||
|
||||
|
||||
def _val(val):
|
||||
ret = re.sub(_re, r'\\\1', val)
|
||||
if ret == '':
|
||||
return '""'
|
||||
return ret
|
||||
|
||||
|
||||
def _process(tags, timestamp, dimension, fh):
|
||||
if dimension.dimensions is None:
|
||||
return
|
||||
|
||||
if len(dimension.dimensions) > 0:
|
||||
if not (dimension.name == 'All' and dimension.value == 'ALL'):
|
||||
tags += ',%s=%s' % (_key(dimension.name.lower()), _val(dimension.value))
|
||||
for d2 in dimension.dimensions:
|
||||
_process(tags, timestamp, d2, fh)
|
||||
return
|
||||
|
||||
if dimension.values is None:
|
||||
return
|
||||
|
||||
if len(dimension.values) > 0:
|
||||
tags += ',%s=' % _key(dimension.name.lower())
|
||||
|
||||
for k, v in dimension.values.items():
|
||||
print('%s%s value=%s %s' % (tags, _val(k), v, timestamp), file=fh)
|
||||
|
||||
|
||||
class InfluxDB(Output):
|
||||
start_timestamp = True
|
||||
fh = None
|
||||
|
||||
|
||||
def __init__(self, opts):
|
||||
Output.__init__(self, opts)
|
||||
timestamp = opts.get('timestamp', 'start')
|
||||
if timestamp == 'start':
|
||||
pass
|
||||
elif timestamp == 'stop':
|
||||
self.start_timestamp = False
|
||||
else:
|
||||
raise Exception('timestamp option invalid')
|
||||
file = opts.get('file', None)
|
||||
append = opts.get('append', False)
|
||||
if file:
|
||||
if append:
|
||||
self.fh = open(file, 'a', encoding=encoding)
|
||||
else:
|
||||
self.fh = open(file, 'w', encoding=encoding)
|
||||
atexit.register(self.close)
|
||||
else:
|
||||
self.fh = sys.stdout
|
||||
|
||||
if opts.get('dml', False):
|
||||
print('# DML', file=self.fh)
|
||||
database = opts.get('database', None)
|
||||
if database:
|
||||
print('# CONTEXT-DATABASE: %s' % database, file=self.fh)
|
||||
|
||||
|
||||
def close(self):
|
||||
if self.fh:
|
||||
self.fh.close()
|
||||
self.fh = None
|
||||
|
||||
|
||||
def process(self, datasets):
|
||||
for dataset in datasets:
|
||||
tags = '%s,server=%s,node=%s' % (_key(dataset.name.lower()), args.server, args.node)
|
||||
if self.start_timestamp:
|
||||
timestamp = dataset.start_time * 1000000000
|
||||
else:
|
||||
timestamp = dataset.end_time * 1000000000
|
||||
|
||||
for d in dataset.dimensions:
|
||||
_process(tags, timestamp, d, self.fh)
|
||||
|
||||
|
||||
if sys.version_info[0] == 3 and sys.version_info[1] == 5: # pragma: no cover
|
||||
Output.__init_subclass__(InfluxDB)
|
112
dsc_datatool/output/prometheus.py
Normal file
112
dsc_datatool/output/prometheus.py
Normal file
|
@ -0,0 +1,112 @@
|
|||
"""dsc_datatool.output.prometheus
|
||||
|
||||
See `man dsc-datatool-output prometheus`.
|
||||
|
||||
Part of dsc_datatool.
|
||||
|
||||
:copyright: 2024 OARC, Inc.
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
import atexit
|
||||
|
||||
from dsc_datatool import Output, args, encoding
|
||||
|
||||
|
||||
_re = re.compile(r'([\\\n"])')
|
||||
|
||||
|
||||
def _key(key):
|
||||
return re.sub(_re, r'\\\1', key)
|
||||
|
||||
|
||||
def _val(val):
|
||||
ret = re.sub(_re, r'\\\1', val)
|
||||
if ret == '':
|
||||
return '""'
|
||||
return '"%s"' % ret
|
||||
|
||||
|
||||
class Prometheus(Output):
|
||||
show_timestamp = True
|
||||
start_timestamp = True
|
||||
fh = None
|
||||
type_def = ''
|
||||
type_printed = False
|
||||
prefix = ''
|
||||
|
||||
|
||||
def __init__(self, opts):
|
||||
Output.__init__(self, opts)
|
||||
timestamp = opts.get('timestamp', 'start')
|
||||
if timestamp == 'hide':
|
||||
self.show_timestamp = False
|
||||
elif timestamp == 'start':
|
||||
pass
|
||||
elif timestamp == 'stop':
|
||||
self.start_timestamp = False
|
||||
else:
|
||||
raise Exception('timestamp option invalid')
|
||||
file = opts.get('file', None)
|
||||
append = opts.get('append', False)
|
||||
if file:
|
||||
if append:
|
||||
self.fh = open(file, 'a', encoding=encoding)
|
||||
else:
|
||||
self.fh = open(file, 'w', encoding=encoding)
|
||||
atexit.register(self.close)
|
||||
else:
|
||||
self.fh = sys.stdout
|
||||
self.prefix = opts.get('prefix', '')
|
||||
|
||||
|
||||
def close(self):
|
||||
if self.fh:
|
||||
self.fh.close()
|
||||
self.fh = None
|
||||
|
||||
|
||||
def _process(self, tags, timestamp, dimension, fh):
|
||||
if dimension.dimensions is None:
|
||||
return
|
||||
|
||||
if len(dimension.dimensions) > 0:
|
||||
if not (dimension.name == 'All' and dimension.value == 'ALL'):
|
||||
tags += ',%s=%s' % (_key(dimension.name.lower()), _val(dimension.value))
|
||||
for d2 in dimension.dimensions:
|
||||
self._process(tags, timestamp, d2, fh)
|
||||
return
|
||||
|
||||
if dimension.values is None:
|
||||
return
|
||||
|
||||
if len(dimension.values) > 0:
|
||||
tags += ',%s=' % _key(dimension.name.lower())
|
||||
|
||||
for k, v in dimension.values.items():
|
||||
if not self.type_printed:
|
||||
print(self.type_def, file=fh)
|
||||
self.type_printed = True
|
||||
if self.show_timestamp:
|
||||
print('%s%s} %s %s' % (tags, _val(k), v, timestamp), file=fh)
|
||||
else:
|
||||
print('%s%s} %s' % (tags, _val(k), v), file=fh)
|
||||
|
||||
|
||||
def process(self, datasets):
|
||||
for dataset in datasets:
|
||||
self.type_def = '# TYPE %s gauge' % _key(dataset.name.lower())
|
||||
self.type_printed = False
|
||||
tags = '%s%s{server=%s,node=%s' % (self.prefix, _key(dataset.name.lower()), _val(args.server), _val(args.node))
|
||||
if self.start_timestamp:
|
||||
timestamp = dataset.start_time * 1000
|
||||
else:
|
||||
timestamp = dataset.end_time * 1000
|
||||
|
||||
for d in dataset.dimensions:
|
||||
self._process(tags, timestamp, d, self.fh)
|
||||
|
||||
|
||||
if sys.version_info[0] == 3 and sys.version_info[1] == 5: # pragma: no cover
|
||||
Output.__init_subclass__(Prometheus)
|
69
dsc_datatool/transformer/labler.py
Normal file
69
dsc_datatool/transformer/labler.py
Normal file
|
@ -0,0 +1,69 @@
|
|||
"""dsc_datatool.transformer.labler
|
||||
|
||||
See `man dsc-datatool-transformer labler`.
|
||||
|
||||
Part of dsc_datatool.
|
||||
|
||||
:copyright: 2024 OARC, Inc.
|
||||
"""
|
||||
|
||||
import yaml
|
||||
|
||||
from dsc_datatool import Transformer, encoding
|
||||
|
||||
|
||||
def _process(label, d):
|
||||
l = label.get(d.name, None)
|
||||
if d.values:
|
||||
if l is None:
|
||||
return
|
||||
|
||||
values = d.values
|
||||
d.values = {}
|
||||
|
||||
for k, v in values.items():
|
||||
nk = l.get(k, None)
|
||||
d.values[nk or k] = v
|
||||
|
||||
return
|
||||
|
||||
if l:
|
||||
v = l.get(d.value, None)
|
||||
if v:
|
||||
d.value = v
|
||||
for d2 in d.dimensions:
|
||||
_process(label, d2)
|
||||
|
||||
|
||||
class Labler(Transformer):
|
||||
label = None
|
||||
|
||||
|
||||
def __init__(self, opts):
|
||||
Transformer.__init__(self, opts)
|
||||
if not 'yaml' in opts:
|
||||
raise Exception('yaml=file option required')
|
||||
f = open(opts.get('yaml'), 'r', encoding=encoding)
|
||||
try:
|
||||
self.label = yaml.full_load(f)
|
||||
except AttributeError:
|
||||
self.label = yaml.load(f)
|
||||
f.close()
|
||||
|
||||
|
||||
def process(self, datasets):
|
||||
if self.label is None:
|
||||
return
|
||||
|
||||
for dataset in datasets:
|
||||
label = self.label.get(dataset.name, None)
|
||||
if label is None:
|
||||
continue
|
||||
|
||||
for d in dataset.dimensions:
|
||||
_process(label, d)
|
||||
|
||||
|
||||
import sys
|
||||
if sys.version_info[0] == 3 and sys.version_info[1] == 5: # pragma: no cover
|
||||
Transformer.__init_subclass__(Labler)
|
77
dsc_datatool/transformer/net_remap.py
Normal file
77
dsc_datatool/transformer/net_remap.py
Normal file
|
@ -0,0 +1,77 @@
|
|||
"""dsc_datatool.transformer.net_remap
|
||||
|
||||
See `man dsc-datatool-transformer netremap`.
|
||||
|
||||
Part of dsc_datatool.
|
||||
|
||||
:copyright: 2024 OARC, Inc.
|
||||
"""
|
||||
|
||||
import ipaddress
|
||||
|
||||
from dsc_datatool import Transformer, args
|
||||
|
||||
|
||||
class NetRemap(Transformer):
|
||||
v4net = None
|
||||
v6net = None
|
||||
nonstrict = False
|
||||
|
||||
|
||||
def __init__(self, opts):
|
||||
Transformer.__init__(self, opts)
|
||||
net = opts.get('net', None)
|
||||
self.v4net = opts.get('v4net', net)
|
||||
self.v6net = opts.get('v6net', net)
|
||||
|
||||
if not self.v4net:
|
||||
raise Exception('v4net (or net) must be given')
|
||||
if not self.v6net:
|
||||
raise Exception('v6net (or net) must be given')
|
||||
|
||||
if opts.get('nonstrict', False):
|
||||
self.nonstrict = True
|
||||
|
||||
|
||||
def _process(self, dimension):
|
||||
if not dimension.values:
|
||||
for d2 in dimension.dimensions:
|
||||
self._process(d2)
|
||||
return
|
||||
|
||||
values = dimension.values
|
||||
dimension.values = {}
|
||||
|
||||
for k, v in values.items():
|
||||
if k == args.skipped_key:
|
||||
continue
|
||||
elif k == args.skipped_sum_key:
|
||||
dimension.values['0'] = v
|
||||
continue
|
||||
|
||||
try:
|
||||
ip = ipaddress.ip_address(k)
|
||||
except Exception as e:
|
||||
if not self.nonstrict:
|
||||
raise e
|
||||
continue
|
||||
if ip.version == 4:
|
||||
nkey = str(ipaddress.IPv4Network('%s/%s' % (ip, self.v4net), strict=False).network_address)
|
||||
else:
|
||||
nkey = str(ipaddress.IPv6Network('%s/%s' % (ip, self.v6net), strict=False).network_address)
|
||||
|
||||
if not nkey in dimension.values:
|
||||
dimension.values[nkey] = v
|
||||
else:
|
||||
dimension.values[nkey] += v
|
||||
|
||||
|
||||
def process(self, datasets):
|
||||
for dataset in datasets:
|
||||
for dimension in dataset.dimensions:
|
||||
self._process(dimension)
|
||||
|
||||
|
||||
import sys
|
||||
if sys.version_info[0] == 3 and sys.version_info[1] == 5: # pragma: no cover
|
||||
Transformer.__init_subclass__(NetRemap)
|
123
dsc_datatool/transformer/re_ranger.py
Normal file
123
dsc_datatool/transformer/re_ranger.py
Normal file
|
@ -0,0 +1,123 @@
|
|||
"""dsc_datatool.transformer.re_ranger
|
||||
|
||||
See `man dsc-datatool-transformer reranger`.
|
||||
|
||||
Part of dsc_datatool.
|
||||
|
||||
:copyright: 2024 OARC, Inc.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
from dsc_datatool import Transformer, args
|
||||
|
||||
|
||||
_key_re = re.compile(r'^(?:(\d+)|(\d+)-(\d+))$')
|
||||
|
||||
|
||||
class ReRanger(Transformer):
|
||||
key = None
|
||||
func = None
|
||||
allow_invalid_keys = None
|
||||
range = None
|
||||
split_by = None
|
||||
|
||||
|
||||
def __init__(self, opts):
|
||||
Transformer.__init__(self, opts)
|
||||
self.key = opts.get('key', 'mid')
|
||||
self.func = opts.get('func', 'sum')
|
||||
self.allow_invalid_keys = opts.get('allow_invalid_keys', False)
|
||||
self.range = opts.get('range', None)
|
||||
|
||||
if self.allow_invalid_keys != False:
|
||||
self.allow_invalid_keys = True
|
||||
|
||||
if self.range is None:
|
||||
raise Exception('range must be given')
|
||||
m = re.match(r'^/(\d+)$', self.range)
|
||||
if m is None:
|
||||
raise Exception('invalid range')
|
||||
self.split_by = int(m.group(1))
|
||||
|
||||
if self.key != 'low' and self.key != 'mid' and self.key != 'high':
|
||||
raise Exception('invalid key %r' % self.key)
|
||||
|
||||
if self.func != 'sum':
|
||||
raise Exception('invalid func %r' % self.func)
|
||||
|
||||
|
||||
def _process(self, dimension):
|
||||
global _key_re
|
||||
|
||||
if not dimension.values:
|
||||
for d2 in dimension.dimensions:
|
||||
self._process(d2)
|
||||
return
|
||||
|
||||
values = dimension.values
|
||||
dimension.values = {}
|
||||
skipped = None
|
||||
|
||||
for k, v in values.items():
|
||||
low = None
|
||||
high = None
|
||||
|
||||
m = _key_re.match(k)
|
||||
if m:
|
||||
low, low2, high = m.group(1, 2, 3)
|
||||
if high is None:
|
||||
low = int(low)
|
||||
high = low
|
||||
else:
|
||||
low = int(low2)
|
||||
high = int(high)
|
||||
elif k == args.skipped_key:
|
||||
continue
|
||||
elif k == args.skipped_sum_key:
|
||||
if skipped is None:
|
||||
skipped = v
|
||||
else:
|
||||
skipped += v
|
||||
continue
|
||||
elif self.allow_invalid_keys:
|
||||
dimension.values[k] = v
|
||||
continue
|
||||
else:
|
||||
raise Exception('invalid key %r' % k)
|
||||
|
||||
if self.key == 'low':
|
||||
nkey = low
|
||||
elif self.key == 'mid':
|
||||
nkey = int(low + ( (high - low) / 2 ))
|
||||
else:
|
||||
nkey = high
|
||||
|
||||
nkey = int(nkey / self.split_by) * self.split_by
|
||||
low = nkey
|
||||
high = nkey + self.split_by - 1
|
||||
|
||||
if self.func == 'sum':
|
||||
if low != high:
|
||||
nkey = '%d-%d' % (low, high)
|
||||
else:
|
||||
nkey = str(nkey)
|
||||
|
||||
if nkey in dimension.values:
|
||||
dimension.values[nkey] += v
|
||||
else:
|
||||
dimension.values[nkey] = v
|
||||
|
||||
if skipped:
|
||||
dimension.values['skipped'] = skipped
|
||||
|
||||
|
||||
def process(self, datasets):
|
||||
for dataset in datasets:
|
||||
for dimension in dataset.dimensions:
|
||||
self._process(dimension)
|
||||
|
||||
|
||||
import sys
|
||||
if sys.version_info[0] == 3 and sys.version_info[1] == 5: # pragma: no cover
|
||||
Transformer.__init_subclass__(ReRanger)
|
Loading…
Add table
Add a link
Reference in a new issue