#!/usr/bin/env python3 import xml.etree.ElementTree as xml import sys class Tagdata: def __init__(self): self.attrs = {} self.elems = {} self.content = set() def add_attr(self, k, v): (optional, samples) = self.attrs.get(k, (False, set())) self.attrs[k] = (optional, samples | set([v])) def add_elem(self, elem): self.elems[elem.tag] = self.elems.get(elem.tag, False) def add_content(self, stuff): self.content |= set(stuff) def make_attrs_optional(self, attrs): missing = set(self.attrs) - set(attrs) for m in missing: (_, samples) = self.attrs[m] self.attrs[m] = (True, samples) def make_elems_optional(self, elems): missing = set(self.elems) - set(elems) for m in missing: self.elems[m] = True def __repr__(self): return 'Tagdata(attrs={}, elems={})'.format( self.attrs, self.elems) class Traverse: def __init__(self): self.cache = {} def add_first_element(self, elem): self.cache[elem.tag] = tag = Tagdata() for k, v in elem.attrib.items(): tag.add_attr(k, v) for child in elem: tag.add_elem(child) self.add_element(child) def add_subsequent_element(self, elem): tag = self.cache[elem.tag] for k, v in elem.attrib.items(): tag.add_attr(k, v) for child in elem: tag.add_elem(elem) self.add_element(child) tag.make_attrs_optional(elem.attrib.keys()) tag.make_elems_optional([e.tag for e in elem]) def add_element(self, elem): if elem.tag not in self.cache: self.add_first_element(elem) else: self.add_subsequent_element(elem) def main(path): t = Traverse() t.add_element(xml.parse(path).getroot()) for k, v in t.cache.items(): print('tag {0}'.format(k)) if v.attrs: for (attr, (optional, sample)) in v.attrs.items(): print(' - attr {0} ({1})'.format( attr, 'optional' if optional else 'mandatory' )) if v.elems: for (elem, optional) in v.elems.items(): print(' - child {0} ({1})'.format( elem, 'optional' if optional else 'mandatory' )) if __name__ == '__main__': if sys.argv[1:]: main(sys.argv[1]) else: sys.stderr.write( 'usage: {0} [file.xml]\n'.format(sys.argv[0]))