Adding upstream version 1.4.2.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-04-22 13:23:23 +02:00 · 2025-04-22 13:23:23 +02:00 · 1ea3e103a7
commit 1ea3e103a7
parent e344d0b8ae
77 changed files with 5760 additions and 0 deletions
--- a/dsc_datatool/init.py
+++ b/dsc_datatool/init.py
@ -0,0 +1,484 @@
+"""dsc_datatool
+
+The main Python module for the command line tool `dsc-datatool`, see
+`man dsc-datatool` on how to run it.
+
+On runtime it will load all plugins under the following module path:
+- dsc_datatool.input
+- dsc_datatool.output
+- dsc_datatool.generator
+- dsc_datatool.transformer
+
+Each plugin category should base it class on one of the follow superclasses:
+- dsc_datatool.Input
+- dsc_datatool.Output
+- dsc_datatool.Generator
+- dsc_datatool.Transformer
+
+Doing so it will be automatically registered as available and indexed in
+the following public dicts using the class name:
+- inputs
+- outputs
+- generators
+- transformers
+
+Example of an output:
+
+    from dsc_datatool import Output
+    class ExampleOutput(Output):
+        def process(self, datasets)
+            ...
+
+:copyright: 2024 OARC, Inc.
+"""
+
+__version__ = '1.4.2'
+
+import argparse
+import logging
+import os
+import importlib
+import pkgutil
+import sys
+import traceback
+import re
+
+args = argparse.Namespace()
+inputs = {}
+outputs = {}
+generators = {}
+transformers = {}
+process_dataset = {}
+encoding = 'utf-8'
+
+
+class Dataset(object):
+    """A representation of a DSC dataset
+
+    A DSC dataset is one to two dimensional structure where the last
+    dimension holds an array of values and counters.
+
+    It is based on the XML structure of DSC:
+
+        <array name="pcap_stats" dimensions="2" start_time="1563520560" stop_time="1563520620">
+          <dimension number="1" type="ifname"/>
+          <dimension number="2" type="pcap_stat"/>
+          <data>
+            <ifname val="eth0">
+              <pcap_stat val="filter_received" count="5625"/>
+              <pcap_stat val="pkts_captured" count="4894"/>
+              <pcap_stat val="kernel_dropped" count="731"/>
+            </ifname>
+          </data>
+        </array>
+
+    Attributes:
+    - name: The name of the dataset
+    - start_time: The start time of the dataset in seconds
+    - stop_time: The stop time of the dataset in seconds
+    - dimensions: An array with `Dimension`, the first dimension
+    """
+    name = None
+    start_time = None
+    stop_time = None
+    dimensions = None
+
+
+    def __init__(self):
+        self.dimensions = []
+
+
+    def __repr__(self):
+        return '<Dataset name=%r dimension=%r>' % (self.name, self.dimensions)
+
+
+class Dimension(object):
+    """A representation of a DSC dimension
+
+    A DSC dataset dimension which can be the first or second dimension,
+    see `Dataset` for more information.
+
+    Attributes:
+    - name: The name of the dimension
+    - value: Is set to the value of the dimension if it's the first dimension
+    - values: A dict of values with corresponding counters if it's the second dimension
+    """
+    name = None
+    value = None
+    values = None
+    dimensions = None
+
+
+    def __init__(self, name):
+        self.name = name
+        self.values = {}
+        self.dimensions = []
+
+
+    def __repr__(self):
+        return '<Dimension name=%r value=%r dimension=%r>' % (self.name, self.values or self.value, self.dimensions)
+
+
+class Input(object):
+    """Base class of an input plugin"""
+
+
+    def process(self, file):
+        """Input.process(...) -> [ Dataset, ... ]
+
+        Called to process a file and return an array of `Dataset`'s found in it.
+        """
+        raise Exception('process() not overloaded')
+
+
+    def __init_subclass__(cls):
+        """This method is called when a class is subclassed and it will
+        register the input plugin in `inputs`."""
+        global inputs
+        if cls.__name__ in inputs:
+            raise Exception('Duplicate input module: %s already exists' % cls.__name__)
+        inputs[cls.__name__] = cls
+
+
+class Output(object):
+    """Base class of an output plugin"""
+
+
+    def process(self, datasets):
+        """Output.process([ Dataset, ... ])
+
+        Called to output the `Dataset`'s in the given array."""
+        raise Exception('process() not overloaded')
+
+
+    def __init__(self, opts):
+        """instance = Output({ 'opt': value, ... })
+
+        Called to create an instance of the output plugin, will get a dict
+        with options provided on command line."""
+        pass
+
+
+    def __init_subclass__(cls):
+        """This method is called when a class is subclassed and it will
+        register the output plugin in `outputs`."""
+        global outputs
+        if cls.__name__ in outputs:
+            raise Exception('Duplicate output module: %s already exists' % cls.__name__)
+        outputs[cls.__name__] = cls
+
+
+class Generator(object):
+    """Base class of a generator plugin"""
+
+
+    def process(self, datasets):
+        """Generator.process([ Dataset, ... ]) -> [ Dataset, ... ]
+
+        Called to generate additional `Dataset`'s based on the given array
+        of `Dataset`'s."""
+        raise Exception('process() not overloaded')
+
+
+    def __init__(self, opts):
+        """instance = Generator({ 'opt': value, ... })
+
+        Called to create an instance of the generator plugin, will get a dict
+        with options provided on command line."""
+        pass
+
+
+    def __init_subclass__(cls):
+        """This method is called when a class is subclassed and it will
+        register the generator plugin in `generators`."""
+        global generators
+        if cls.__name__ in generators:
+            raise Exception('Duplicate generator module: %s already exists' % cls.__name__)
+        generators[cls.__name__] = cls
+
+
+class Transformer(object):
+    """Base class of a transformer plugin"""
+
+
+    def process(self, datasets):
+        """Transformer.process([ Dataset, ... ])
+
+        Called to do transformation of the given `Dataset`'s, as in modifying
+        them directly."""
+        raise Exception('process() not overloaded')
+
+
+    def __init__(self, opts):
+        """instance = Transformer({ 'opt': value, ... })
+
+        Called to create an instance of the transformer plugin, will get a dict
+        with options provided on command line."""
+        pass
+
+
+    def __init_subclass__(cls):
+        """This method is called when a class is subclassed and it will
+        register the transformer plugin in `transformers`."""
+        global transformers
+        if cls.__name__ in transformers:
+            raise Exception('Duplicate transformer module: %s already exists' % cls.__name__)
+        transformers[cls.__name__] = cls
+
+
+def main():
+    """Called when running `dsc-datatool`."""
+    def iter_namespace(ns_pkg):
+        return pkgutil.iter_modules(ns_pkg.__path__, ns_pkg.__name__ + ".")
+
+
+    def split_arg(arg, num=1):
+        sep = arg[0]
+        p = arg.split(sep)
+        p.pop(0)
+        ret = ()
+        while num > 0:
+            ret += (p.pop(0),)
+            num -= 1
+        ret += (p,)
+        return ret
+
+
+    def parse_opts(opts):
+        ret = {}
+        for opt in opts:
+            p = opt.split('=', maxsplit=1)
+            if len(p) > 1:
+                if p[0] in ret:
+                    if isinstance(ret[p[0]], list):
+                        ret[p[0]].append(p[1])
+                    else:
+                        ret[p[0]] = [ ret[p[0]], p[1] ]
+                else:
+                    ret[p[0]] = p[1]
+            elif len(p) > 0:
+                ret[p[0]] = True
+        return ret
+
+
+    def _process(datasets, generators, transformers, outputs):
+        gen_datasets = []
+        for generator in generators:
+            try:
+                gen_datasets += generator.process(datasets)
+            except Exception as e:
+                logging.warning('Generator %s failed: %s' % (generator, e))
+                exc_type, exc_value, exc_traceback = sys.exc_info()
+                for tb in traceback.format_tb(exc_traceback):
+                    logging.warning(str(tb))
+                return 2
+
+        datasets += gen_datasets
+
+        if '*' in transformers:
+            for transformer in transformers['*']:
+                try:
+                    transformer.process(datasets)
+                except Exception as e:
+                    logging.warning('Transformer %s failed: %s' % (transformer, e))
+                    exc_type, exc_value, exc_traceback = sys.exc_info()
+                    for tb in traceback.format_tb(exc_traceback):
+                        logging.warning(str(tb))
+                    return 2
+        for dataset in datasets:
+            if dataset.name in transformers:
+                for transformer in transformers[dataset.name]:
+                    try:
+                        transformer.process([dataset])
+                    except Exception as e:
+                        logging.warning('Transformer %s failed: %s' % (transformer, e))
+                        exc_type, exc_value, exc_traceback = sys.exc_info()
+                        for tb in traceback.format_tb(exc_traceback):
+                            logging.warning(str(tb))
+                        return 2
+
+        for output in outputs:
+            try:
+                output.process(datasets)
+            except Exception as e:
+                logging.warning('Output %s failed: %s' % (output, e))
+                exc_type, exc_value, exc_traceback = sys.exc_info()
+                for tb in traceback.format_tb(exc_traceback):
+                    logging.warning(str(tb))
+                return 2
+
+        return 0
+
+
+    global args, inputs, outputs, generators, transformers, process_dataset
+
+    parser = argparse.ArgumentParser(prog='dsc-datatool',
+        description='Export DSC data into various formats and databases.',
+        epilog='See man-page dsc-datatool(1) and dsc-datatool-[generator|transformer|output] <name>(5) for more information')
+    parser.add_argument('-c', '--conf', nargs=1,
+        help='Not implemented')
+    #    help='Specify the YAML configuration file to use (default to ~/.dsc-datatool.conf), any command line option will override the options in the configuration file. See dsc-datatool.conf(5)for more information.')
+    parser.add_argument('-s', '--server', nargs=1,
+        help='Specify the server for where the data comes from. (required)')
+    parser.add_argument('-n', '--node', nargs=1,
+        help='Specify the node for where the data comes from. (required)')
+    parser.add_argument('-x', '--xml', action='append',
+        help='Read DSC data from the given file or directory, can be specified multiple times. If a directory is given then all files ending with .xml will be read.')
+    parser.add_argument('-d', '--dat', action='append',
+        help='Read DSC data from the given directory, can be specified multiple times. Note that the DAT format is depended on the filename to know what type of data it is.')
+    parser.add_argument('--dataset', action='append',
+        help='Specify that only the list of datasets will be processed, the list is comma separated and the option can be given multiple times.')
+    parser.add_argument('-o', '--output', action='append',
+        help='"<sep><output>[<sep>option=value...]>" Output data to <output> and use <separator> as an options separator.')
+    parser.add_argument('-t', '--transform', action='append',
+        help='"<sep><name><sep><datasets>[<sep>option=value...]>" Use the transformer <name> to change the list of datasets in <datasets>.')
+    parser.add_argument('-g', '--generator', action='append',
+        help='"<name>[,<name>,...]" or "<sep><name>[<sep>option=value...]>" Use the specified generators to generate additional datasets.')
+    parser.add_argument('--list', action='store_true',
+        help='List the available generators, transformers and outputs then exit.')
+    parser.add_argument('--skipped-key', nargs=1, default='-:SKIPPED:-',
+        help='Set the special DSC skipped key. (default to "-:SKIPPED:-")')
+    parser.add_argument('--skipped-sum-key', nargs=1, default='-:SKIPPED_SUM:-',
+        help='Set the special DSC skipped sum key. (default to "-:SKIPPED_SUM:-")')
+    parser.add_argument('--encoding', nargs=1, default='utf-8',
+        help='Encoding to use for all files, default utf-8.')
+    parser.add_argument('-v', '--verbose', action='count', default=0,
+        help='Increase the verbose level, can be given multiple times.')
+    parser.add_argument('-V', '--version', action='version', version='%(prog)s v'+__version__,
+        help='Display version and exit.')
+
+    args = parser.parse_args()
+
+    log_level = 30 - (args.verbose * 10)
+    if log_level < 0:
+        log_level = 0
+    logging.basicConfig(format='%(asctime)s %(levelname)s %(module)s: %(message)s', level=log_level, stream=sys.stderr)
+
+    import dsc_datatool.input
+    import dsc_datatool.output
+    import dsc_datatool.generator
+    import dsc_datatool.transformer
+
+    for finder, name, ispkg in iter_namespace(dsc_datatool.input):
+        importlib.import_module(name)
+    for finder, name, ispkg in iter_namespace(dsc_datatool.output):
+        importlib.import_module(name)
+    for finder, name, ispkg in iter_namespace(dsc_datatool.generator):
+        importlib.import_module(name)
+    for finder, name, ispkg in iter_namespace(dsc_datatool.transformer):
+        importlib.import_module(name)
+
+    if args.list:
+        print('Generators:')
+        for name in generators:
+            print('',name)
+        print('Transformers:')
+        for name in transformers:
+            print('',name)
+        print('Outputs:')
+        for name in outputs:
+            print('',name)
+        return 0
+
+    if not args.server or not args.node:
+        raise Exception('--server and --node must be given')
+
+    if isinstance(args.server, list):
+        args.server = ' '.join(args.server)
+    elif not isinstance(args.server, str):
+        raise Exception('Invalid argument for --server: %r' % args.server)
+    if isinstance(args.node, list):
+        args.node = ' '.join(args.node)
+    elif not isinstance(args.node, str):
+        raise Exception('Invalid argument for --node: %r' % args.node)
+
+    gens = []
+    if args.generator:
+        for arg in args.generator:
+            if not re.match(r'^\w', arg):
+                name, opts = split_arg(arg)
+                if not name in generators:
+                    logging.critical('Generator %s does not exist' % name)
+                    return 1
+                gens.append(generators[name](parse_opts(opts)))
+                continue
+            for name in arg.split(','):
+                if not name in generators:
+                    logging.critical('Generator %s does not exist' % name)
+                    return 1
+                gens.append(generators[name]({}))
+
+    trans = {}
+    if args.transform:
+        for arg in args.transform:
+            name, datasets, opts = split_arg(arg, num=2)
+            if not name in transformers:
+                logging.critical('Transformer %s does not exist' % name)
+                return 1
+            for dataset in datasets.split(','):
+                if not dataset in trans:
+                    trans[dataset] = []
+                trans[dataset].append(transformers[name](parse_opts(opts)))
+
+    out = []
+    if args.output:
+        for arg in args.output:
+            name, opts = split_arg(arg)
+            if not name in outputs:
+                logging.critical('Output %s does not exist' % name)
+                return 1
+            out.append(outputs[name](parse_opts(opts)))
+
+    if args.dataset:
+        for dataset in args.dataset:
+            for p in dataset.split(','):
+                process_dataset[p] = True
+
+    xml = []
+    if args.xml:
+        for entry in args.xml:
+            if os.path.isfile(entry):
+                xml.append(entry)
+            elif os.path.isdir(entry):
+                with os.scandir(entry) as dir:
+                    for file in dir:
+                        if not file.name.startswith('.') and file.is_file() and file.name.lower().endswith('.xml'):
+                            xml.append(file.path)
+            else:
+                logging.error('--xml %r is not a file or directory' % entry)
+
+    dat = []
+    if args.dat:
+        for entry in args.dat:
+            if os.path.isdir(entry):
+                dat.append(entry)
+            else:
+                logging.error('--dat %r is not a directory' % entry)
+
+    if not xml and not dat:
+        logging.error('No valid --xml or --dat given')
+        return 1
+
+    xml_input = inputs['XML']()
+    for file in xml:
+        try:
+            datasets = xml_input.process(file)
+        except Exception as e:
+            logging.critical('Unable to process XML file %s: %s' % (file, e))
+            return 1
+
+        ret = _process(datasets, gens, trans, out)
+        if ret > 0:
+            return ret
+
+    dat_input = inputs['DAT']()
+    for dir in dat:
+        try:
+            datasets = dat_input.process(dir)
+        except Exception as e:
+            logging.critical('Unable to process DAT files in %s: %s' % (dir, e))
+            return 1
+
+        ret = _process(datasets, gens, trans, out)
+        if ret > 0:
+            return ret