#!/usr/bin/python3
import glob
from lxml import etree

exclude_list = list(glob.glob('standard-*.xml'))

PARSER = etree.XMLParser(remove_blank_text=True)


def extract_data(fname):
    et = etree.parse(fname, PARSER)

    manvolnum = et.find('./refmeta/manvolnum')
    manvolnum = manvolnum.text if manvolnum is not None else 0

    deps = set()
    for elem in et.iter():
        keys = elem.keys()
        if 'href' in keys and 'xpointer' in keys:
            dep = elem.values()[0]
            if dep in exclude_list:
                deps.add(dep)

    return manvolnum, list(deps)


output = list()
file_list = glob.glob('*.xml')
for fname in file_list:
    if fname not in exclude_list:
        stem = fname[0:-4]
        manvolnum, deps = extract_data(fname)
        deps = ':'.join(deps) if deps else 'None'
        output.append(','.join([stem, manvolnum, fname, deps]))

print(';'.join(output))