1from __future__ import absolute_import, division, unicode_literals 2 3from types import ModuleType 4 5from six import text_type 6 7try: 8 import xml.etree.cElementTree as default_etree 9except ImportError: 10 import xml.etree.ElementTree as default_etree 11 12 13__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair", 14 "surrogatePairToCodepoint", "moduleFactoryFactory", 15 "supports_lone_surrogates"] 16 17 18# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be 19# caught by the below test. In general this would be any platform 20# using UTF-16 as its encoding of unicode strings, such as 21# Jython. This is because UTF-16 itself is based on the use of such 22# surrogates, and there is no mechanism to further escape such 23# escapes. 24try: 25 _x = eval('"\\uD800"') 26 if not isinstance(_x, text_type): 27 # We need this with u"" because of http://bugs.jython.org/issue2039 28 _x = eval('u"\\uD800"') 29 assert isinstance(_x, text_type) 30except: 31 supports_lone_surrogates = False 32else: 33 supports_lone_surrogates = True 34 35 36class MethodDispatcher(dict): 37 """Dict with 2 special properties: 38 39 On initiation, keys that are lists, sets or tuples are converted to 40 multiple keys so accessing any one of the items in the original 41 list-like object returns the matching value 42 43 md = MethodDispatcher({("foo", "bar"):"baz"}) 44 md["foo"] == "baz" 45 46 A default value which can be set through the default attribute. 47 """ 48 49 def __init__(self, items=()): 50 # Using _dictEntries instead of directly assigning to self is about 51 # twice as fast. Please do careful performance testing before changing 52 # anything here. 53 _dictEntries = [] 54 for name, value in items: 55 if type(name) in (list, tuple, frozenset, set): 56 for item in name: 57 _dictEntries.append((item, value)) 58 else: 59 _dictEntries.append((name, value)) 60 dict.__init__(self, _dictEntries) 61 self.default = None 62 63 def __getitem__(self, key): 64 return dict.get(self, key, self.default) 65 66 67# Some utility functions to dal with weirdness around UCS2 vs UCS4 68# python builds 69 70def isSurrogatePair(data): 71 return (len(data) == 2 and 72 ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and 73 ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF) 74 75 76def surrogatePairToCodepoint(data): 77 char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 + 78 (ord(data[1]) - 0xDC00)) 79 return char_val 80 81# Module Factory Factory (no, this isn't Java, I know) 82# Here to stop this being duplicated all over the place. 83 84 85def moduleFactoryFactory(factory): 86 moduleCache = {} 87 88 def moduleFactory(baseModule, *args, **kwargs): 89 if isinstance(ModuleType.__name__, type("")): 90 name = "_%s_factory" % baseModule.__name__ 91 else: 92 name = b"_%s_factory" % baseModule.__name__ 93 94 if name in moduleCache: 95 return moduleCache[name] 96 else: 97 mod = ModuleType(name) 98 objs = factory(baseModule, *args, **kwargs) 99 mod.__dict__.update(objs) 100 moduleCache[name] = mod 101 return mod 102 103 return moduleFactory 104