1""" Standard "encodings" Package 2 3 Standard Python encoding modules are stored in this package 4 directory. 5 6 Codec modules must have names corresponding to normalized encoding 7 names as defined in the normalize_encoding() function below, e.g. 8 'utf-8' must be implemented by the module 'utf_8.py'. 9 10 Each codec module must export the following interface: 11 12 * getregentry() -> codecs.CodecInfo object 13 The getregentry() API must return a CodecInfo object with encoder, decoder, 14 incrementalencoder, incrementaldecoder, streamwriter and streamreader 15 attributes which adhere to the Python Codec Interface Standard. 16 17 In addition, a module may optionally also define the following 18 APIs which are then used by the package's codec search function: 19 20 * getaliases() -> sequence of encoding name strings to use as aliases 21 22 Alias names returned by getaliases() must be normalized encoding 23 names as defined by normalize_encoding(). 24 25Written by Marc-Andre Lemburg (mal@lemburg.com). 26 27(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. 28 29"""#" 30 31import codecs 32import sys 33from . import aliases 34 35_cache = {} 36_unknown = '--unknown--' 37_import_tail = ['*'] 38_aliases = aliases.aliases 39 40class CodecRegistryError(LookupError, SystemError): 41 pass 42 43def normalize_encoding(encoding): 44 45 """ Normalize an encoding name. 46 47 Normalization works as follows: all non-alphanumeric 48 characters except the dot used for Python package names are 49 collapsed and replaced with a single underscore, e.g. ' -;#' 50 becomes '_'. Leading and trailing underscores are removed. 51 52 Note that encoding names should be ASCII only. 53 54 """ 55 if isinstance(encoding, bytes): 56 encoding = str(encoding, "ascii") 57 58 chars = [] 59 punct = False 60 for c in encoding: 61 if c.isalnum() or c == '.': 62 if punct and chars: 63 chars.append('_') 64 if c.isascii(): 65 chars.append(c) 66 punct = False 67 else: 68 punct = True 69 return ''.join(chars) 70 71def search_function(encoding): 72 73 # Cache lookup 74 entry = _cache.get(encoding, _unknown) 75 if entry is not _unknown: 76 return entry 77 78 # Import the module: 79 # 80 # First try to find an alias for the normalized encoding 81 # name and lookup the module using the aliased name, then try to 82 # lookup the module using the standard import scheme, i.e. first 83 # try in the encodings package, then at top-level. 84 # 85 norm_encoding = normalize_encoding(encoding) 86 aliased_encoding = _aliases.get(norm_encoding) or \ 87 _aliases.get(norm_encoding.replace('.', '_')) 88 if aliased_encoding is not None: 89 modnames = [aliased_encoding, 90 norm_encoding] 91 else: 92 modnames = [norm_encoding] 93 for modname in modnames: 94 if not modname or '.' in modname: 95 continue 96 try: 97 # Import is absolute to prevent the possibly malicious import of a 98 # module with side-effects that is not in the 'encodings' package. 99 mod = __import__('encodings.' + modname, fromlist=_import_tail, 100 level=0) 101 except ImportError: 102 # ImportError may occur because 'encodings.(modname)' does not exist, 103 # or because it imports a name that does not exist (see mbcs and oem) 104 pass 105 else: 106 break 107 else: 108 mod = None 109 110 try: 111 getregentry = mod.getregentry 112 except AttributeError: 113 # Not a codec module 114 mod = None 115 116 if mod is None: 117 # Cache misses 118 _cache[encoding] = None 119 return None 120 121 # Now ask the module for the registry entry 122 entry = getregentry() 123 if not isinstance(entry, codecs.CodecInfo): 124 if not 4 <= len(entry) <= 7: 125 raise CodecRegistryError('module "%s" (%s) failed to register' 126 % (mod.__name__, mod.__file__)) 127 if not callable(entry[0]) or not callable(entry[1]) or \ 128 (entry[2] is not None and not callable(entry[2])) or \ 129 (entry[3] is not None and not callable(entry[3])) or \ 130 (len(entry) > 4 and entry[4] is not None and not callable(entry[4])) or \ 131 (len(entry) > 5 and entry[5] is not None and not callable(entry[5])): 132 raise CodecRegistryError('incompatible codecs in module "%s" (%s)' 133 % (mod.__name__, mod.__file__)) 134 if len(entry)<7 or entry[6] is None: 135 entry += (None,)*(6-len(entry)) + (mod.__name__.split(".", 1)[1],) 136 entry = codecs.CodecInfo(*entry) 137 138 # Cache the codec registry entry 139 _cache[encoding] = entry 140 141 # Register its aliases (without overwriting previously registered 142 # aliases) 143 try: 144 codecaliases = mod.getaliases() 145 except AttributeError: 146 pass 147 else: 148 for alias in codecaliases: 149 if alias not in _aliases: 150 _aliases[alias] = modname 151 152 # Return the registry entry 153 return entry 154 155# Register the search_function in the Python codec registry 156codecs.register(search_function) 157 158if sys.platform == 'win32': 159 def _alias_mbcs(encoding): 160 try: 161 import _winapi 162 ansi_code_page = "cp%s" % _winapi.GetACP() 163 if encoding == ansi_code_page: 164 import encodings.mbcs 165 return encodings.mbcs.getregentry() 166 except ImportError: 167 # Imports may fail while we are shutting down 168 pass 169 170 codecs.register(_alias_mbcs) 171