1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192 |
- #!/usr/bin/env python3
- import xml.etree.ElementTree as xml
- import sys
- class Tagdata:
- def __init__(self):
- self.attrs = {}
- self.elems = {}
- self.content = set()
- def add_attr(self, k, v):
- (optional, samples) = self.attrs.get(k, (False, set()))
- self.attrs[k] = (optional, samples | set([v]))
- def add_elem(self, elem):
- self.elems[elem.tag] = self.elems.get(elem.tag, False)
- def add_content(self, stuff):
- self.content |= set(stuff)
- def make_attrs_optional(self, attrs):
- missing = set(self.attrs) - set(attrs)
- for m in missing:
- (_, samples) = self.attrs[m]
- self.attrs[m] = (True, samples)
- def make_elems_optional(self, elems):
- missing = set(self.elems) - set(elems)
- for m in missing:
- self.elems[m] = True
- def __repr__(self):
- return 'Tagdata(attrs={}, elems={})'.format(
- self.attrs, self.elems)
- class Traverse:
- def __init__(self):
- self.cache = {}
- def add_first_element(self, elem):
- self.cache[elem.tag] = tag = Tagdata()
- for k, v in elem.attrib.items():
- tag.add_attr(k, v)
- for child in elem:
- tag.add_elem(child)
- self.add_element(child)
- def add_subsequent_element(self, elem):
- tag = self.cache[elem.tag]
- for k, v in elem.attrib.items():
- tag.add_attr(k, v)
- for child in elem:
- tag.add_elem(elem)
- self.add_element(child)
- tag.make_attrs_optional(elem.attrib.keys())
- tag.make_elems_optional([e.tag for e in elem])
- def add_element(self, elem):
- if elem.tag not in self.cache:
- self.add_first_element(elem)
- else:
- self.add_subsequent_element(elem)
- def main(path):
- t = Traverse()
- t.add_element(xml.parse(path).getroot())
- for k, v in t.cache.items():
- print('tag {0}'.format(k))
- if v.attrs:
- for (attr, (optional, sample)) in v.attrs.items():
- print(' - attr {0} ({1})'.format(
- attr, 'optional' if optional else 'mandatory'
- ))
- if v.elems:
- for (elem, optional) in v.elems.items():
- print(' - child {0} ({1})'.format(
- elem, 'optional' if optional else 'mandatory'
- ))
- if __name__ == '__main__':
- if sys.argv[1:]:
- main(sys.argv[1])
- else:
- sys.stderr.write(
- 'usage: {0} [file.xml]\n'.format(sys.argv[0]))
|