infer-xml.py 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. #!/usr/bin/env python3
  2. import xml.etree.ElementTree as xml
  3. import sys
  4. class Tagdata:
  5. def __init__(self):
  6. self.attrs = {}
  7. self.elems = {}
  8. self.content = set()
  9. def add_attr(self, k, v):
  10. (optional, samples) = self.attrs.get(k, (False, set()))
  11. self.attrs[k] = (optional, samples | set([v]))
  12. def add_elem(self, elem):
  13. self.elems[elem.tag] = self.elems.get(elem.tag, False)
  14. def add_content(self, stuff):
  15. self.content |= set(stuff)
  16. def make_attrs_optional(self, attrs):
  17. missing = set(self.attrs) - set(attrs)
  18. for m in missing:
  19. (_, samples) = self.attrs[m]
  20. self.attrs[m] = (True, samples)
  21. def make_elems_optional(self, elems):
  22. missing = set(self.elems) - set(elems)
  23. for m in missing:
  24. self.elems[m] = True
  25. def __repr__(self):
  26. return 'Tagdata(attrs={}, elems={})'.format(
  27. self.attrs, self.elems)
  28. class Traverse:
  29. def __init__(self):
  30. self.cache = {}
  31. def add_first_element(self, elem):
  32. self.cache[elem.tag] = tag = Tagdata()
  33. for k, v in elem.attrib.items():
  34. tag.add_attr(k, v)
  35. for child in elem:
  36. tag.add_elem(child)
  37. self.add_element(child)
  38. def add_subsequent_element(self, elem):
  39. tag = self.cache[elem.tag]
  40. for k, v in elem.attrib.items():
  41. tag.add_attr(k, v)
  42. for child in elem:
  43. tag.add_elem(elem)
  44. self.add_element(child)
  45. tag.make_attrs_optional(elem.attrib.keys())
  46. tag.make_elems_optional([e.tag for e in elem])
  47. def add_element(self, elem):
  48. if elem.tag not in self.cache:
  49. self.add_first_element(elem)
  50. else:
  51. self.add_subsequent_element(elem)
  52. def main(path):
  53. t = Traverse()
  54. t.add_element(xml.parse(path).getroot())
  55. for k, v in t.cache.items():
  56. print('tag {0}'.format(k))
  57. if v.attrs:
  58. for (attr, (optional, sample)) in v.attrs.items():
  59. print(' - attr {0} ({1})'.format(
  60. attr, 'optional' if optional else 'mandatory'
  61. ))
  62. if v.elems:
  63. for (elem, optional) in v.elems.items():
  64. print(' - child {0} ({1})'.format(
  65. elem, 'optional' if optional else 'mandatory'
  66. ))
  67. if __name__ == '__main__':
  68. if sys.argv[1:]:
  69. main(sys.argv[1])
  70. else:
  71. sys.stderr.write(
  72. 'usage: {0} [file.xml]\n'.format(sys.argv[0]))