Coverage for portality/lib/xmlutil.py: 19%
42 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-22 15:59 +0100
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-22 15:59 +0100
1from lxml import etree
2import re
4encoding_rx = re.compile('^<\?xml .*encoding=["\'](.+?)["\'].*\?>')
6def detect_encoding(s):
7 m = encoding_rx.match(s)
8 if m is None:
9 return None
10 return m.group(1)
12def fromstring(s):
13 # first try and parse the string directly
14 error = None
15 try:
16 return etree.fromstring(s)
17 except ValueError as e:
18 error = e
20 # if this failed, and this is not a unicode string, then just raise
21 # the exception, as there's nothing more to do for now
22 if not isinstance(s, str):
23 raise error
25 # our next best bet is to attempt to encode the unicode to a byte-stream
26 # with the relevant encoding
27 enc = detect_encoding(s)
28 if enc is not None:
29 try:
30 bs = s.encode(enc)
31 return etree.fromstring(bs)
32 except LookupError:
33 # this means the detected encoding is junk
34 pass
35 except ValueError as e:
36 # we had a problem parsing with the given encoding
37 pass
39 # if we get here, we failed to decode or failed to parse. Let's therefore strip
40 # the encoding declaration and see if lxml can sort it out (and just let the
41 # error raise as necessary)
42 clean = encoding_rx.sub("", s).strip()
43 return etree.fromstring(clean)
45def xp_first_text(element, xpath, default=None):
46 el = element.xpath(xpath)
47 if len(el) > 0:
48 return el[0].text
49 return default
51def xp_texts(element, xpath):
52 els = element.xpath(xpath)
53 return [e.text for e in els if e.text is not None]
55def objectify(element):
56 obj = {}
57 for c in element.getchildren():
58 # FIXME: does not currently handle attributes
59 #for attr in c.keys():
60 # obj["@" + attr] = c.get(attr)
61 if len(c.getchildren()) > 0:
62 obj[c.tag] = objectify(c)
63 else:
64 obj[c.tag] = c.text
65 return obj