1
0
Fork 0

Adding upstream version 1.4.2.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-04-22 13:23:23 +02:00
parent e344d0b8ae
commit 1ea3e103a7
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
77 changed files with 5760 additions and 0 deletions

484
dsc_datatool/__init__.py Normal file
View file

@ -0,0 +1,484 @@
"""dsc_datatool
The main Python module for the command line tool `dsc-datatool`, see
`man dsc-datatool` on how to run it.
On runtime it will load all plugins under the following module path:
- dsc_datatool.input
- dsc_datatool.output
- dsc_datatool.generator
- dsc_datatool.transformer
Each plugin category should base it class on one of the follow superclasses:
- dsc_datatool.Input
- dsc_datatool.Output
- dsc_datatool.Generator
- dsc_datatool.Transformer
Doing so it will be automatically registered as available and indexed in
the following public dicts using the class name:
- inputs
- outputs
- generators
- transformers
Example of an output:
from dsc_datatool import Output
class ExampleOutput(Output):
def process(self, datasets)
...
:copyright: 2024 OARC, Inc.
"""
__version__ = '1.4.2'
import argparse
import logging
import os
import importlib
import pkgutil
import sys
import traceback
import re
args = argparse.Namespace()
inputs = {}
outputs = {}
generators = {}
transformers = {}
process_dataset = {}
encoding = 'utf-8'
class Dataset(object):
"""A representation of a DSC dataset
A DSC dataset is one to two dimensional structure where the last
dimension holds an array of values and counters.
It is based on the XML structure of DSC:
<array name="pcap_stats" dimensions="2" start_time="1563520560" stop_time="1563520620">
<dimension number="1" type="ifname"/>
<dimension number="2" type="pcap_stat"/>
<data>
<ifname val="eth0">
<pcap_stat val="filter_received" count="5625"/>
<pcap_stat val="pkts_captured" count="4894"/>
<pcap_stat val="kernel_dropped" count="731"/>
</ifname>
</data>
</array>
Attributes:
- name: The name of the dataset
- start_time: The start time of the dataset in seconds
- stop_time: The stop time of the dataset in seconds
- dimensions: An array with `Dimension`, the first dimension
"""
name = None
start_time = None
stop_time = None
dimensions = None
def __init__(self):
self.dimensions = []
def __repr__(self):
return '<Dataset name=%r dimension=%r>' % (self.name, self.dimensions)
class Dimension(object):
"""A representation of a DSC dimension
A DSC dataset dimension which can be the first or second dimension,
see `Dataset` for more information.
Attributes:
- name: The name of the dimension
- value: Is set to the value of the dimension if it's the first dimension
- values: A dict of values with corresponding counters if it's the second dimension
"""
name = None
value = None
values = None
dimensions = None
def __init__(self, name):
self.name = name
self.values = {}
self.dimensions = []
def __repr__(self):
return '<Dimension name=%r value=%r dimension=%r>' % (self.name, self.values or self.value, self.dimensions)
class Input(object):
"""Base class of an input plugin"""
def process(self, file):
"""Input.process(...) -> [ Dataset, ... ]
Called to process a file and return an array of `Dataset`'s found in it.
"""
raise Exception('process() not overloaded')
def __init_subclass__(cls):
"""This method is called when a class is subclassed and it will
register the input plugin in `inputs`."""
global inputs
if cls.__name__ in inputs:
raise Exception('Duplicate input module: %s already exists' % cls.__name__)
inputs[cls.__name__] = cls
class Output(object):
"""Base class of an output plugin"""
def process(self, datasets):
"""Output.process([ Dataset, ... ])
Called to output the `Dataset`'s in the given array."""
raise Exception('process() not overloaded')
def __init__(self, opts):
"""instance = Output({ 'opt': value, ... })
Called to create an instance of the output plugin, will get a dict
with options provided on command line."""
pass
def __init_subclass__(cls):
"""This method is called when a class is subclassed and it will
register the output plugin in `outputs`."""
global outputs
if cls.__name__ in outputs:
raise Exception('Duplicate output module: %s already exists' % cls.__name__)
outputs[cls.__name__] = cls
class Generator(object):
"""Base class of a generator plugin"""
def process(self, datasets):
"""Generator.process([ Dataset, ... ]) -> [ Dataset, ... ]
Called to generate additional `Dataset`'s based on the given array
of `Dataset`'s."""
raise Exception('process() not overloaded')
def __init__(self, opts):
"""instance = Generator({ 'opt': value, ... })
Called to create an instance of the generator plugin, will get a dict
with options provided on command line."""
pass
def __init_subclass__(cls):
"""This method is called when a class is subclassed and it will
register the generator plugin in `generators`."""
global generators
if cls.__name__ in generators:
raise Exception('Duplicate generator module: %s already exists' % cls.__name__)
generators[cls.__name__] = cls
class Transformer(object):
"""Base class of a transformer plugin"""
def process(self, datasets):
"""Transformer.process([ Dataset, ... ])
Called to do transformation of the given `Dataset`'s, as in modifying
them directly."""
raise Exception('process() not overloaded')
def __init__(self, opts):
"""instance = Transformer({ 'opt': value, ... })
Called to create an instance of the transformer plugin, will get a dict
with options provided on command line."""
pass
def __init_subclass__(cls):
"""This method is called when a class is subclassed and it will
register the transformer plugin in `transformers`."""
global transformers
if cls.__name__ in transformers:
raise Exception('Duplicate transformer module: %s already exists' % cls.__name__)
transformers[cls.__name__] = cls
def main():
"""Called when running `dsc-datatool`."""
def iter_namespace(ns_pkg):
return pkgutil.iter_modules(ns_pkg.__path__, ns_pkg.__name__ + ".")
def split_arg(arg, num=1):
sep = arg[0]
p = arg.split(sep)
p.pop(0)
ret = ()
while num > 0:
ret += (p.pop(0),)
num -= 1
ret += (p,)
return ret
def parse_opts(opts):
ret = {}
for opt in opts:
p = opt.split('=', maxsplit=1)
if len(p) > 1:
if p[0] in ret:
if isinstance(ret[p[0]], list):
ret[p[0]].append(p[1])
else:
ret[p[0]] = [ ret[p[0]], p[1] ]
else:
ret[p[0]] = p[1]
elif len(p) > 0:
ret[p[0]] = True
return ret
def _process(datasets, generators, transformers, outputs):
gen_datasets = []
for generator in generators:
try:
gen_datasets += generator.process(datasets)
except Exception as e:
logging.warning('Generator %s failed: %s' % (generator, e))
exc_type, exc_value, exc_traceback = sys.exc_info()
for tb in traceback.format_tb(exc_traceback):
logging.warning(str(tb))
return 2
datasets += gen_datasets
if '*' in transformers:
for transformer in transformers['*']:
try:
transformer.process(datasets)
except Exception as e:
logging.warning('Transformer %s failed: %s' % (transformer, e))
exc_type, exc_value, exc_traceback = sys.exc_info()
for tb in traceback.format_tb(exc_traceback):
logging.warning(str(tb))
return 2
for dataset in datasets:
if dataset.name in transformers:
for transformer in transformers[dataset.name]:
try:
transformer.process([dataset])
except Exception as e:
logging.warning('Transformer %s failed: %s' % (transformer, e))
exc_type, exc_value, exc_traceback = sys.exc_info()
for tb in traceback.format_tb(exc_traceback):
logging.warning(str(tb))
return 2
for output in outputs:
try:
output.process(datasets)
except Exception as e:
logging.warning('Output %s failed: %s' % (output, e))
exc_type, exc_value, exc_traceback = sys.exc_info()
for tb in traceback.format_tb(exc_traceback):
logging.warning(str(tb))
return 2
return 0
global args, inputs, outputs, generators, transformers, process_dataset
parser = argparse.ArgumentParser(prog='dsc-datatool',
description='Export DSC data into various formats and databases.',
epilog='See man-page dsc-datatool(1) and dsc-datatool-[generator|transformer|output] <name>(5) for more information')
parser.add_argument('-c', '--conf', nargs=1,
help='Not implemented')
# help='Specify the YAML configuration file to use (default to ~/.dsc-datatool.conf), any command line option will override the options in the configuration file. See dsc-datatool.conf(5)for more information.')
parser.add_argument('-s', '--server', nargs=1,
help='Specify the server for where the data comes from. (required)')
parser.add_argument('-n', '--node', nargs=1,
help='Specify the node for where the data comes from. (required)')
parser.add_argument('-x', '--xml', action='append',
help='Read DSC data from the given file or directory, can be specified multiple times. If a directory is given then all files ending with .xml will be read.')
parser.add_argument('-d', '--dat', action='append',
help='Read DSC data from the given directory, can be specified multiple times. Note that the DAT format is depended on the filename to know what type of data it is.')
parser.add_argument('--dataset', action='append',
help='Specify that only the list of datasets will be processed, the list is comma separated and the option can be given multiple times.')
parser.add_argument('-o', '--output', action='append',
help='"<sep><output>[<sep>option=value...]>" Output data to <output> and use <separator> as an options separator.')
parser.add_argument('-t', '--transform', action='append',
help='"<sep><name><sep><datasets>[<sep>option=value...]>" Use the transformer <name> to change the list of datasets in <datasets>.')
parser.add_argument('-g', '--generator', action='append',
help='"<name>[,<name>,...]" or "<sep><name>[<sep>option=value...]>" Use the specified generators to generate additional datasets.')
parser.add_argument('--list', action='store_true',
help='List the available generators, transformers and outputs then exit.')
parser.add_argument('--skipped-key', nargs=1, default='-:SKIPPED:-',
help='Set the special DSC skipped key. (default to "-:SKIPPED:-")')
parser.add_argument('--skipped-sum-key', nargs=1, default='-:SKIPPED_SUM:-',
help='Set the special DSC skipped sum key. (default to "-:SKIPPED_SUM:-")')
parser.add_argument('--encoding', nargs=1, default='utf-8',
help='Encoding to use for all files, default utf-8.')
parser.add_argument('-v', '--verbose', action='count', default=0,
help='Increase the verbose level, can be given multiple times.')
parser.add_argument('-V', '--version', action='version', version='%(prog)s v'+__version__,
help='Display version and exit.')
args = parser.parse_args()
log_level = 30 - (args.verbose * 10)
if log_level < 0:
log_level = 0
logging.basicConfig(format='%(asctime)s %(levelname)s %(module)s: %(message)s', level=log_level, stream=sys.stderr)
import dsc_datatool.input
import dsc_datatool.output
import dsc_datatool.generator
import dsc_datatool.transformer
for finder, name, ispkg in iter_namespace(dsc_datatool.input):
importlib.import_module(name)
for finder, name, ispkg in iter_namespace(dsc_datatool.output):
importlib.import_module(name)
for finder, name, ispkg in iter_namespace(dsc_datatool.generator):
importlib.import_module(name)
for finder, name, ispkg in iter_namespace(dsc_datatool.transformer):
importlib.import_module(name)
if args.list:
print('Generators:')
for name in generators:
print('',name)
print('Transformers:')
for name in transformers:
print('',name)
print('Outputs:')
for name in outputs:
print('',name)
return 0
if not args.server or not args.node:
raise Exception('--server and --node must be given')
if isinstance(args.server, list):
args.server = ' '.join(args.server)
elif not isinstance(args.server, str):
raise Exception('Invalid argument for --server: %r' % args.server)
if isinstance(args.node, list):
args.node = ' '.join(args.node)
elif not isinstance(args.node, str):
raise Exception('Invalid argument for --node: %r' % args.node)
gens = []
if args.generator:
for arg in args.generator:
if not re.match(r'^\w', arg):
name, opts = split_arg(arg)
if not name in generators:
logging.critical('Generator %s does not exist' % name)
return 1
gens.append(generators[name](parse_opts(opts)))
continue
for name in arg.split(','):
if not name in generators:
logging.critical('Generator %s does not exist' % name)
return 1
gens.append(generators[name]({}))
trans = {}
if args.transform:
for arg in args.transform:
name, datasets, opts = split_arg(arg, num=2)
if not name in transformers:
logging.critical('Transformer %s does not exist' % name)
return 1
for dataset in datasets.split(','):
if not dataset in trans:
trans[dataset] = []
trans[dataset].append(transformers[name](parse_opts(opts)))
out = []
if args.output:
for arg in args.output:
name, opts = split_arg(arg)
if not name in outputs:
logging.critical('Output %s does not exist' % name)
return 1
out.append(outputs[name](parse_opts(opts)))
if args.dataset:
for dataset in args.dataset:
for p in dataset.split(','):
process_dataset[p] = True
xml = []
if args.xml:
for entry in args.xml:
if os.path.isfile(entry):
xml.append(entry)
elif os.path.isdir(entry):
with os.scandir(entry) as dir:
for file in dir:
if not file.name.startswith('.') and file.is_file() and file.name.lower().endswith('.xml'):
xml.append(file.path)
else:
logging.error('--xml %r is not a file or directory' % entry)
dat = []
if args.dat:
for entry in args.dat:
if os.path.isdir(entry):
dat.append(entry)
else:
logging.error('--dat %r is not a directory' % entry)
if not xml and not dat:
logging.error('No valid --xml or --dat given')
return 1
xml_input = inputs['XML']()
for file in xml:
try:
datasets = xml_input.process(file)
except Exception as e:
logging.critical('Unable to process XML file %s: %s' % (file, e))
return 1
ret = _process(datasets, gens, trans, out)
if ret > 0:
return ret
dat_input = inputs['DAT']()
for dir in dat:
try:
datasets = dat_input.process(dir)
except Exception as e:
logging.critical('Unable to process DAT files in %s: %s' % (dir, e))
return 1
ret = _process(datasets, gens, trans, out)
if ret > 0:
return ret

View file

@ -0,0 +1,181 @@
"""dsc_datatool.generator.client_subnet_authority
See `man dsc-datatool-generator client_subnet_authority`.
Part of dsc_datatool.
:copyright: 2024 OARC, Inc.
"""
import csv
import ipaddress
import logging
from urllib.request import Request, urlopen
from io import StringIO
from dsc_datatool import Generator, Dataset, Dimension, args, encoding
_whois2rir = {
'whois.apnic.net': 'APNIC',
'whois.arin.net': 'ARIN',
'whois.ripe.net': 'RIPE NCC',
'whois.lacnic.net': 'LACNIC',
'whois.afrinic.net': 'AFRINIC',
}
_desig2rir = {
'apnic': 'APNIC',
'arin': 'ARIN',
'ripe ncc': 'RIPE NCC',
'lacnic': 'LACNIC',
'afrinic': 'AFRINIC',
'iana': 'IANA',
'6to4': 'IANA',
}
class client_subnet_authority(Generator):
auth = None
nonstrict = False
def _read(self, input):
global _whois2rir, _desig2rir
for row in csv.reader(input):
prefix, designation, date, whois, rdap, status, note = row
if prefix == 'Prefix':
continue
rir = designation.replace('Administered by ', '').lower()
whois = whois.lower()
if whois in _whois2rir:
rir = _whois2rir[whois]
else:
if rir in _desig2rir:
rir = _desig2rir[rir]
else:
found = None
for k, v in _desig2rir.items():
if k in rir:
found = v
break
if found:
rir = found
else:
if status == 'RESERVED':
rir = 'IANA'
elif designation == 'Segment Routing (SRv6) SIDs':
# TODO: How to better handle this weird allocation?
rir = 'IANA'
else:
raise Exception('Unknown whois/designation: %r/%r' % (whois, designation))
try:
net = ipaddress.ip_network(prefix)
except Exception:
ip, net = prefix.split('/')
net = ipaddress.ip_network('%s.0.0.0/%s' % (int(ip), net))
if net.version == 4:
idx = ipaddress.ip_network('%s/8' % net.network_address, strict=False)
else:
idx = ipaddress.ip_network('%s/24' % net.network_address, strict=False)
if idx.network_address in self.auth:
self.auth[idx.network_address].append({'net': net, 'auth': rir})
else:
self.auth[idx.network_address] = [{'net': net, 'auth': rir}]
def __init__(self, opts):
Generator.__init__(self, opts)
self.auth = {}
csvs = opts.get('csv', None)
urlv4 = opts.get('urlv4', 'https://www.iana.org/assignments/ipv4-address-space/ipv4-address-space.csv')
urlv6 = opts.get('urlv6', 'https://www.iana.org/assignments/ipv6-unicast-address-assignments/ipv6-unicast-address-assignments.csv')
if opts.get('nonstrict', False):
self.nonstrict = True
if csvs:
if not isinstance(csvs, list):
csvs = [ csvs ]
for file in csvs:
with open(file, newline='', encoding=encoding) as csvfile:
self._read(csvfile)
elif opts.get('fetch', 'no').lower() == 'yes':
urls = opts.get('url', [ urlv4, urlv6 ])
if urls and not isinstance(urls, list):
urls = [ urls ]
logging.info('bootstrapping client subnet authority using URLs')
for url in urls:
logging.info('fetching %s' % url)
self._read(StringIO(urlopen(Request(url)).read().decode('utf-8')))
else:
raise Exception('No authorities bootstrapped, please specify csv= or fetch=yes')
def process(self, datasets):
gen_datasets = []
for dataset in datasets:
if dataset.name != 'client_subnet':
continue
subnets = {}
for d1 in dataset.dimensions:
for d2 in d1.dimensions:
for k, v in d2.values.items():
if k == args.skipped_key:
continue
elif k == args.skipped_sum_key:
continue
if k in subnets:
subnets[k] += v
else:
subnets[k] = v
auth = {}
for subnet in subnets:
try:
ip = ipaddress.ip_address(subnet)
except Exception as e:
if not self.nonstrict:
raise e
continue
if ip.version == 4:
idx = ipaddress.ip_network('%s/8' % ip, strict=False)
ip = ipaddress.ip_network('%s/32' % ip)
else:
idx = ipaddress.ip_network('%s/24' % ip, strict=False)
ip = ipaddress.ip_network('%s/128' % ip)
if not idx.network_address in self.auth:
idx = '??'
else:
for entry in self.auth[idx.network_address]:
if entry['net'].overlaps(ip):
idx = entry['auth']
break
if idx in auth:
auth[idx] += subnets[subnet]
else:
auth[idx] = subnets[subnet]
if auth:
authd = Dataset()
authd.name = 'client_subnet_authority'
authd.start_time = dataset.start_time
authd.stop_time = dataset.stop_time
gen_datasets.append(authd)
authd1 = Dimension('ClientAuthority')
authd1.values = auth
authd.dimensions.append(authd1)
return gen_datasets
import sys
if sys.version_info[0] == 3 and sys.version_info[1] == 5: # pragma: no cover
Generator.__init_subclass__(client_subnet_authority)

View file

@ -0,0 +1,98 @@
"""dsc_datatool.generator.client_subnet_country
See `man dsc-datatool-generator client_subnet_country`.
Part of dsc_datatool.
:copyright: 2024 OARC, Inc.
"""
import maxminddb
import os
import logging
from dsc_datatool import Generator, Dataset, Dimension, args
class client_subnet_country(Generator):
reader = None
nonstrict = False
def __init__(self, opts):
Generator.__init__(self, opts)
paths = opts.get('path', ['/var/lib/GeoIP', '/usr/share/GeoIP', '/usr/local/share/GeoIP'])
if not isinstance(paths, list):
paths = [ paths ]
filename = opts.get('filename', 'GeoLite2-Country.mmdb')
db = opts.get('db', None)
if db is None:
for path in paths:
db = '%s/%s' % (path, filename)
if os.path.isfile(db) and os.access(db, os.R_OK):
break
db = None
if db is None:
raise Exception('Please specify valid Maxmind database with path=,filename= or db=')
logging.info('Using %s' % db)
self.reader = maxminddb.open_database(db)
if opts.get('nonstrict', False):
self.nonstrict = True
def process(self, datasets):
gen_datasets = []
for dataset in datasets:
if dataset.name != 'client_subnet':
continue
subnets = {}
for d1 in dataset.dimensions:
for d2 in d1.dimensions:
for k, v in d2.values.items():
if k == args.skipped_key:
continue
elif k == args.skipped_sum_key:
continue
if k in subnets:
subnets[k] += v
else:
subnets[k] = v
cc = {}
for subnet in subnets:
try:
c = self.reader.get(subnet)
except Exception as e:
if not self.nonstrict:
raise e
continue
if c:
iso_code = c.get('country', {}).get('iso_code', '??')
if iso_code in cc:
cc[iso_code] += subnets[subnet]
else:
cc[iso_code] = subnets[subnet]
if cc:
ccd = Dataset()
ccd.name = 'client_subnet_country'
ccd.start_time = dataset.start_time
ccd.stop_time = dataset.stop_time
gen_datasets.append(ccd)
ccd1 = Dimension('ClientCountry')
ccd1.values = cc
ccd.dimensions.append(ccd1)
return gen_datasets
import sys
if sys.version_info[0] == 3 and sys.version_info[1] == 5: # pragma: no cover
Generator.__init_subclass__(client_subnet_country)

177
dsc_datatool/input/dat.py Normal file
View file

@ -0,0 +1,177 @@
"""dsc_datatool.input.dat
Input plugin to generate `Dataset`'s from DSC DAT files.
Part of dsc_datatool.
:copyright: 2024 OARC, Inc.
"""
import re
from dsc_datatool import Input, Dataset, Dimension, process_dataset, encoding
_dataset1d = [
'client_subnet_count',
'ipv6_rsn_abusers_count',
]
_dataset2d = {
'qtype': 'Qtype',
'rcode': 'Rcode',
'do_bit': 'D0',
'rd_bit': 'RD',
'opcode': 'Opcode',
'dnssec_qtype': 'Qtype',
'edns_version': 'EDNSVersion',
'client_subnet2_count': 'Class',
'client_subnet2_trace': 'Class',
'edns_bufsiz': 'EDNSBufSiz',
'idn_qname': 'IDNQname',
'client_port_range': 'PortRange',
'priming_responses': 'ReplyLen',
}
_dataset3d = {
'chaos_types_and_names': [ 'Qtype', 'Qname' ],
'certain_qnames_vs_qtype': [ 'CertainQnames', 'Qtype' ],
'direction_vs_ipproto': [ 'Direction', 'IPProto' ],
'pcap_stats': [ 'pcap_stat', 'ifname' ],
'transport_vs_qtype': [ 'Transport', 'Qtype' ],
'dns_ip_version': [ 'IPVersion', 'Qtype' ],
'priming_queries': [ 'Transport', 'EDNSBufSiz' ],
'qr_aa_bits': [ 'Direction', 'QRAABits' ],
}
class DAT(Input):
def process(self, dir):
global _dataset1d, _dataset2d, _dataset3d
datasets = []
for d in _dataset1d:
if process_dataset and not d in process_dataset:
continue
try:
datasets += self.process1d('%s/%s.dat' % (dir, d), d)
except FileNotFoundError:
pass
for k, v in _dataset2d.items():
if process_dataset and not k in process_dataset:
continue
try:
datasets += self.process2d('%s/%s.dat' % (dir, k), k, v)
except FileNotFoundError:
pass
for k, v in _dataset3d.items():
if process_dataset and not k in process_dataset:
continue
try:
datasets += self.process3d('%s/%s.dat' % (dir, k), k, v[0], v[1])
except FileNotFoundError:
pass
return datasets
def process1d(self, file, name):
datasets = []
with open(file, 'r', encoding=encoding) as f:
for l in f.readlines():
if re.match(r'^#', l):
continue
l = re.sub(r'[\r\n]+$', '', l)
dat = re.split(r'\s+', l)
if len(dat) != 2:
raise Exception('DAT %r dataset %r: invalid number of elements for a 1d dataset' % (file, name))
dataset = Dataset()
dataset.name = name
dataset.start_time = int(dat.pop(0))
dataset.stop_time = dataset.start_time + 60
d1 = Dimension('All')
d1.values = { 'ALL': int(dat[0]) }
dataset.dimensions.append(d1)
datasets.append(dataset)
return datasets
def process2d(self, file, name, field):
datasets = []
with open(file, 'r', encoding=encoding) as f:
for l in f.readlines():
if re.match(r'^#', l):
continue
l = re.sub(r'[\r\n]+$', '', l)
dat = re.split(r'\s+', l)
dataset = Dataset()
dataset.name = name
dataset.start_time = int(dat.pop(0))
dataset.stop_time = dataset.start_time + 60
d1 = Dimension('All')
d1.value = 'ALL'
dataset.dimensions.append(d1)
d2 = Dimension(field)
while dat:
if len(dat) < 2:
raise Exception('DAT %r dataset %r: invalid number of elements for a 2d dataset' % (file, name))
k = dat.pop(0)
v = dat.pop(0)
d2.values[k] = int(v)
d1.dimensions.append(d2)
datasets.append(dataset)
return datasets
def process3d(self, file, name, first, second):
datasets = []
with open(file, 'r', encoding=encoding) as f:
for l in f.readlines():
if re.match(r'^#', l):
continue
l = re.sub(r'[\r\n]+$', '', l)
dat = re.split(r'\s+', l)
dataset = Dataset()
dataset.name = name
dataset.start_time = int(dat.pop(0))
dataset.stop_time = dataset.start_time + 60
while dat:
if len(dat) < 2:
raise Exception('DAT %r dataset %r: invalid number of elements for a 2d dataset' % (file, name))
k = dat.pop(0)
v = dat.pop(0)
d1 = Dimension(first)
d1.value = k
dataset.dimensions.append(d1)
d2 = Dimension(second)
dat2 = v.split(':')
while dat2:
if len(dat2) < 2:
raise Exception('DAT %r dataset %r: invalid number of elements for a 2d dataset' % (file, name))
k2 = dat2.pop(0)
v2 = dat2.pop(0)
d2.values[k2] = int(v2)
d1.dimensions.append(d2)
datasets.append(dataset)
return datasets
import sys
if sys.version_info[0] == 3 and sys.version_info[1] == 5: # pragma: no cover
Input.__init_subclass__(DAT)

71
dsc_datatool/input/xml.py Normal file
View file

@ -0,0 +1,71 @@
"""dsc_datatool.input.xml
Input plugin to generate `Dataset`'s from DSC XML files.
Part of dsc_datatool.
:copyright: 2024 OARC, Inc.
"""
import logging
from xml.dom import minidom
import base64
from dsc_datatool import Input, Dataset, Dimension, process_dataset
class XML(Input):
def process(self, file):
dom = minidom.parse(file)
datasets = []
for array in dom.getElementsByTagName('array'):
if process_dataset and not array.getAttribute('name') in process_dataset:
continue
dataset = Dataset()
dataset.name = array.getAttribute('name')
dataset.start_time = int(array.getAttribute('start_time'))
dataset.stop_time = int(array.getAttribute('stop_time'))
dimensions = [None, None]
for dimension in array.getElementsByTagName('dimension'):
if dimension.getAttribute('number') == '1':
if dimensions[0]:
logging.warning('Overwriting dimension 1 for %s' % dataset.name)
dimensions[0] = dimension.getAttribute('type')
elif dimension.getAttribute('number') == '2':
if dimensions[1]:
logging.warning('Overwriting dimension 2 for %s' % dataset.name)
dimensions[1] = dimension.getAttribute('type')
else:
logging.warning('Invalid dimension number %r for %s' % (dimension.getAttribute('number'), dataset.name))
for node1 in array.getElementsByTagName(dimensions[0]):
d1 = Dimension(dimensions[0])
d1.value = node1.getAttribute('val')
try:
if node1.getAttribute('base64'):
d1.value = base64.b64decode(d1.value).decode('utf-8')
except Exception as e:
pass
dataset.dimensions.append(d1)
d2 = Dimension(dimensions[1])
d1.dimensions.append(d2)
for node2 in node1.getElementsByTagName(dimensions[1]):
val = node2.getAttribute('val')
try:
if node2.getAttribute('base64'):
val = base64.b64decode(val).decode('utf-8')
except Exception as e:
pass
d2.values[val] = int(node2.getAttribute('count'))
datasets.append(dataset)
return datasets
import sys
if sys.version_info[0] == 3 and sys.version_info[1] == 5: # pragma: no cover
Input.__init_subclass__(XML)

View file

@ -0,0 +1,103 @@
"""dsc_datatool.output.influxdb
See `man dsc-datatool-output influxdb`.
Part of dsc_datatool.
:copyright: 2024 OARC, Inc.
"""
import re
import sys
import atexit
from dsc_datatool import Output, args, encoding
_re = re.compile(r'([,=\\\s])')
def _key(key):
return re.sub(_re, r'\\\1', key)
def _val(val):
ret = re.sub(_re, r'\\\1', val)
if ret == '':
return '""'
return ret
def _process(tags, timestamp, dimension, fh):
if dimension.dimensions is None:
return
if len(dimension.dimensions) > 0:
if not (dimension.name == 'All' and dimension.value == 'ALL'):
tags += ',%s=%s' % (_key(dimension.name.lower()), _val(dimension.value))
for d2 in dimension.dimensions:
_process(tags, timestamp, d2, fh)
return
if dimension.values is None:
return
if len(dimension.values) > 0:
tags += ',%s=' % _key(dimension.name.lower())
for k, v in dimension.values.items():
print('%s%s value=%s %s' % (tags, _val(k), v, timestamp), file=fh)
class InfluxDB(Output):
start_timestamp = True
fh = None
def __init__(self, opts):
Output.__init__(self, opts)
timestamp = opts.get('timestamp', 'start')
if timestamp == 'start':
pass
elif timestamp == 'stop':
self.start_timestamp = False
else:
raise Exception('timestamp option invalid')
file = opts.get('file', None)
append = opts.get('append', False)
if file:
if append:
self.fh = open(file, 'a', encoding=encoding)
else:
self.fh = open(file, 'w', encoding=encoding)
atexit.register(self.close)
else:
self.fh = sys.stdout
if opts.get('dml', False):
print('# DML', file=self.fh)
database = opts.get('database', None)
if database:
print('# CONTEXT-DATABASE: %s' % database, file=self.fh)
def close(self):
if self.fh:
self.fh.close()
self.fh = None
def process(self, datasets):
for dataset in datasets:
tags = '%s,server=%s,node=%s' % (_key(dataset.name.lower()), args.server, args.node)
if self.start_timestamp:
timestamp = dataset.start_time * 1000000000
else:
timestamp = dataset.end_time * 1000000000
for d in dataset.dimensions:
_process(tags, timestamp, d, self.fh)
if sys.version_info[0] == 3 and sys.version_info[1] == 5: # pragma: no cover
Output.__init_subclass__(InfluxDB)

View file

@ -0,0 +1,112 @@
"""dsc_datatool.output.prometheus
See `man dsc-datatool-output prometheus`.
Part of dsc_datatool.
:copyright: 2024 OARC, Inc.
"""
import re
import sys
import atexit
from dsc_datatool import Output, args, encoding
_re = re.compile(r'([\\\n"])')
def _key(key):
return re.sub(_re, r'\\\1', key)
def _val(val):
ret = re.sub(_re, r'\\\1', val)
if ret == '':
return '""'
return '"%s"' % ret
class Prometheus(Output):
show_timestamp = True
start_timestamp = True
fh = None
type_def = ''
type_printed = False
prefix = ''
def __init__(self, opts):
Output.__init__(self, opts)
timestamp = opts.get('timestamp', 'start')
if timestamp == 'hide':
self.show_timestamp = False
elif timestamp == 'start':
pass
elif timestamp == 'stop':
self.start_timestamp = False
else:
raise Exception('timestamp option invalid')
file = opts.get('file', None)
append = opts.get('append', False)
if file:
if append:
self.fh = open(file, 'a', encoding=encoding)
else:
self.fh = open(file, 'w', encoding=encoding)
atexit.register(self.close)
else:
self.fh = sys.stdout
self.prefix = opts.get('prefix', '')
def close(self):
if self.fh:
self.fh.close()
self.fh = None
def _process(self, tags, timestamp, dimension, fh):
if dimension.dimensions is None:
return
if len(dimension.dimensions) > 0:
if not (dimension.name == 'All' and dimension.value == 'ALL'):
tags += ',%s=%s' % (_key(dimension.name.lower()), _val(dimension.value))
for d2 in dimension.dimensions:
self._process(tags, timestamp, d2, fh)
return
if dimension.values is None:
return
if len(dimension.values) > 0:
tags += ',%s=' % _key(dimension.name.lower())
for k, v in dimension.values.items():
if not self.type_printed:
print(self.type_def, file=fh)
self.type_printed = True
if self.show_timestamp:
print('%s%s} %s %s' % (tags, _val(k), v, timestamp), file=fh)
else:
print('%s%s} %s' % (tags, _val(k), v), file=fh)
def process(self, datasets):
for dataset in datasets:
self.type_def = '# TYPE %s gauge' % _key(dataset.name.lower())
self.type_printed = False
tags = '%s%s{server=%s,node=%s' % (self.prefix, _key(dataset.name.lower()), _val(args.server), _val(args.node))
if self.start_timestamp:
timestamp = dataset.start_time * 1000
else:
timestamp = dataset.end_time * 1000
for d in dataset.dimensions:
self._process(tags, timestamp, d, self.fh)
if sys.version_info[0] == 3 and sys.version_info[1] == 5: # pragma: no cover
Output.__init_subclass__(Prometheus)

View file

@ -0,0 +1,69 @@
"""dsc_datatool.transformer.labler
See `man dsc-datatool-transformer labler`.
Part of dsc_datatool.
:copyright: 2024 OARC, Inc.
"""
import yaml
from dsc_datatool import Transformer, encoding
def _process(label, d):
l = label.get(d.name, None)
if d.values:
if l is None:
return
values = d.values
d.values = {}
for k, v in values.items():
nk = l.get(k, None)
d.values[nk or k] = v
return
if l:
v = l.get(d.value, None)
if v:
d.value = v
for d2 in d.dimensions:
_process(label, d2)
class Labler(Transformer):
label = None
def __init__(self, opts):
Transformer.__init__(self, opts)
if not 'yaml' in opts:
raise Exception('yaml=file option required')
f = open(opts.get('yaml'), 'r', encoding=encoding)
try:
self.label = yaml.full_load(f)
except AttributeError:
self.label = yaml.load(f)
f.close()
def process(self, datasets):
if self.label is None:
return
for dataset in datasets:
label = self.label.get(dataset.name, None)
if label is None:
continue
for d in dataset.dimensions:
_process(label, d)
import sys
if sys.version_info[0] == 3 and sys.version_info[1] == 5: # pragma: no cover
Transformer.__init_subclass__(Labler)

View file

@ -0,0 +1,77 @@
"""dsc_datatool.transformer.net_remap
See `man dsc-datatool-transformer netremap`.
Part of dsc_datatool.
:copyright: 2024 OARC, Inc.
"""
import ipaddress
from dsc_datatool import Transformer, args
class NetRemap(Transformer):
v4net = None
v6net = None
nonstrict = False
def __init__(self, opts):
Transformer.__init__(self, opts)
net = opts.get('net', None)
self.v4net = opts.get('v4net', net)
self.v6net = opts.get('v6net', net)
if not self.v4net:
raise Exception('v4net (or net) must be given')
if not self.v6net:
raise Exception('v6net (or net) must be given')
if opts.get('nonstrict', False):
self.nonstrict = True
def _process(self, dimension):
if not dimension.values:
for d2 in dimension.dimensions:
self._process(d2)
return
values = dimension.values
dimension.values = {}
for k, v in values.items():
if k == args.skipped_key:
continue
elif k == args.skipped_sum_key:
dimension.values['0'] = v
continue
try:
ip = ipaddress.ip_address(k)
except Exception as e:
if not self.nonstrict:
raise e
continue
if ip.version == 4:
nkey = str(ipaddress.IPv4Network('%s/%s' % (ip, self.v4net), strict=False).network_address)
else:
nkey = str(ipaddress.IPv6Network('%s/%s' % (ip, self.v6net), strict=False).network_address)
if not nkey in dimension.values:
dimension.values[nkey] = v
else:
dimension.values[nkey] += v
def process(self, datasets):
for dataset in datasets:
for dimension in dataset.dimensions:
self._process(dimension)
import sys
if sys.version_info[0] == 3 and sys.version_info[1] == 5: # pragma: no cover
Transformer.__init_subclass__(NetRemap)

View file

@ -0,0 +1,123 @@
"""dsc_datatool.transformer.re_ranger
See `man dsc-datatool-transformer reranger`.
Part of dsc_datatool.
:copyright: 2024 OARC, Inc.
"""
import re
from dsc_datatool import Transformer, args
_key_re = re.compile(r'^(?:(\d+)|(\d+)-(\d+))$')
class ReRanger(Transformer):
key = None
func = None
allow_invalid_keys = None
range = None
split_by = None
def __init__(self, opts):
Transformer.__init__(self, opts)
self.key = opts.get('key', 'mid')
self.func = opts.get('func', 'sum')
self.allow_invalid_keys = opts.get('allow_invalid_keys', False)
self.range = opts.get('range', None)
if self.allow_invalid_keys != False:
self.allow_invalid_keys = True
if self.range is None:
raise Exception('range must be given')
m = re.match(r'^/(\d+)$', self.range)
if m is None:
raise Exception('invalid range')
self.split_by = int(m.group(1))
if self.key != 'low' and self.key != 'mid' and self.key != 'high':
raise Exception('invalid key %r' % self.key)
if self.func != 'sum':
raise Exception('invalid func %r' % self.func)
def _process(self, dimension):
global _key_re
if not dimension.values:
for d2 in dimension.dimensions:
self._process(d2)
return
values = dimension.values
dimension.values = {}
skipped = None
for k, v in values.items():
low = None
high = None
m = _key_re.match(k)
if m:
low, low2, high = m.group(1, 2, 3)
if high is None:
low = int(low)
high = low
else:
low = int(low2)
high = int(high)
elif k == args.skipped_key:
continue
elif k == args.skipped_sum_key:
if skipped is None:
skipped = v
else:
skipped += v
continue
elif self.allow_invalid_keys:
dimension.values[k] = v
continue
else:
raise Exception('invalid key %r' % k)
if self.key == 'low':
nkey = low
elif self.key == 'mid':
nkey = int(low + ( (high - low) / 2 ))
else:
nkey = high
nkey = int(nkey / self.split_by) * self.split_by
low = nkey
high = nkey + self.split_by - 1
if self.func == 'sum':
if low != high:
nkey = '%d-%d' % (low, high)
else:
nkey = str(nkey)
if nkey in dimension.values:
dimension.values[nkey] += v
else:
dimension.values[nkey] = v
if skipped:
dimension.values['skipped'] = skipped
def process(self, datasets):
for dataset in datasets:
for dimension in dataset.dimensions:
self._process(dimension)
import sys
if sys.version_info[0] == 3 and sys.version_info[1] == 5: # pragma: no cover
Transformer.__init_subclass__(ReRanger)