1""" Standard "encodings" Package 2 3 Standard Python encoding modules are stored in this package 4 directory. 5 6 Codec modules must have names corresponding to normalized encoding 7 names as defined in the normalize_encoding() function below, e.g. 8 'utf-8' must be implemented by the module 'utf_8.py'. 9 10 Each codec module must export the following interface: 11 12 * getregentry() -> codecs.CodecInfo object 13 The getregentry() API must return a CodecInfo object with encoder, decoder, 14 incrementalencoder, incrementaldecoder, streamwriter and streamreader 15 attributes which adhere to the Python Codec Interface Standard. 16 17 In addition, a module may optionally also define the following 18 APIs which are then used by the package's codec search function: 19 20 * getaliases() -> sequence of encoding name strings to use as aliases 21 22 Alias names returned by getaliases() must be normalized encoding 23 names as defined by normalize_encoding(). 24 25Written by Marc-Andre Lemburg (mal@lemburg.com). 26 27(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. 28 29"""#" 30 31import codecs 32import sys 33from . import aliases 34 35_cache = {} 36_unknown = '--unknown--' 37_import_tail = ['*'] 38_aliases = aliases.aliases 39 40class CodecRegistryError(LookupError, SystemError): 41 pass 42 43def normalize_encoding(encoding): 44 45 """ Normalize an encoding name. 46 47 Normalization works as follows: all non-alphanumeric 48 characters except the dot used for Python package names are 49 collapsed and replaced with a single underscore, e.g. ' -;#' 50 becomes '_'. Leading and trailing underscores are removed. 51 52 Note that encoding names should be ASCII only. 53 54 """ 55 if isinstance(encoding, bytes): 56 encoding = str(encoding, "ascii") 57 58 chars = [] 59 punct = False 60 for c in encoding: 61 if c.isalnum() or c == '.': 62 if punct and chars: 63 chars.append('_') 64 chars.append(c) 65 punct = False 66 else: 67 punct = True 68 return ''.join(chars) 69 70def search_function(encoding): 71 72 # Cache lookup 73 entry = _cache.get(encoding, _unknown) 74 if entry is not _unknown: 75 return entry 76 77 # Import the module: 78 # 79 # First try to find an alias for the normalized encoding 80 # name and lookup the module using the aliased name, then try to 81 # lookup the module using the standard import scheme, i.e. first 82 # try in the encodings package, then at top-level. 83 # 84 norm_encoding = normalize_encoding(encoding) 85 aliased_encoding = _aliases.get(norm_encoding) or \ 86 _aliases.get(norm_encoding.replace('.', '_')) 87 if aliased_encoding is not None: 88 modnames = [aliased_encoding, 89 norm_encoding] 90 else: 91 modnames = [norm_encoding] 92 for modname in modnames: 93 if not modname or '.' in modname: 94 continue 95 try: 96 # Import is absolute to prevent the possibly malicious import of a 97 # module with side-effects that is not in the 'encodings' package. 98 mod = __import__('encodings.' + modname, fromlist=_import_tail, 99 level=0) 100 except ImportError: 101 # ImportError may occur because 'encodings.(modname)' does not exist, 102 # or because it imports a name that does not exist (see mbcs and oem) 103 pass 104 else: 105 break 106 else: 107 mod = None 108 109 try: 110 getregentry = mod.getregentry 111 except AttributeError: 112 # Not a codec module 113 mod = None 114 115 if mod is None: 116 # Cache misses 117 _cache[encoding] = None 118 return None 119 120 # Now ask the module for the registry entry 121 entry = getregentry() 122 if not isinstance(entry, codecs.CodecInfo): 123 if not 4 <= len(entry) <= 7: 124 raise CodecRegistryError('module "%s" (%s) failed to register' 125 % (mod.__name__, mod.__file__)) 126 if not callable(entry[0]) or not callable(entry[1]) or \ 127 (entry[2] is not None and not callable(entry[2])) or \ 128 (entry[3] is not None and not callable(entry[3])) or \ 129 (len(entry) > 4 and entry[4] is not None and not callable(entry[4])) or \ 130 (len(entry) > 5 and entry[5] is not None and not callable(entry[5])): 131 raise CodecRegistryError('incompatible codecs in module "%s" (%s)' 132 % (mod.__name__, mod.__file__)) 133 if len(entry)<7 or entry[6] is None: 134 entry += (None,)*(6-len(entry)) + (mod.__name__.split(".", 1)[1],) 135 entry = codecs.CodecInfo(*entry) 136 137 # Cache the codec registry entry 138 _cache[encoding] = entry 139 140 # Register its aliases (without overwriting previously registered 141 # aliases) 142 try: 143 codecaliases = mod.getaliases() 144 except AttributeError: 145 pass 146 else: 147 for alias in codecaliases: 148 if alias not in _aliases: 149 _aliases[alias] = modname 150 151 # Return the registry entry 152 return entry 153 154# Register the search_function in the Python codec registry 155codecs.register(search_function) 156 157if sys.platform == 'win32': 158 def _alias_mbcs(encoding): 159 try: 160 import _winapi 161 ansi_code_page = "cp%s" % _winapi.GetACP() 162 if encoding == ansi_code_page: 163 import encodings.mbcs 164 return encodings.mbcs.getregentry() 165 except ImportError: 166 # Imports may fail while we are shutting down 167 pass 168 169 codecs.register(_alias_mbcs) 170