1""" Encoding Aliases Support 2 3 This module is used by the encodings package search function to 4 map encodings names to module names. 5 6 Note that the search function normalizes the encoding names before 7 doing the lookup, so the mapping will have to map normalized 8 encoding names to module names. 9 10 Contents: 11 12 The following aliases dictionary contains mappings of all IANA 13 character set names for which the Python core library provides 14 codecs. In addition to these, a few Python specific codec 15 aliases have also been added. 16 17""" 18aliases = { 19 20 # Please keep this list sorted alphabetically by value ! 21 22 # ascii codec 23 '646' : 'ascii', 24 'ansi_x3.4_1968' : 'ascii', 25 'ansi_x3_4_1968' : 'ascii', # some email headers use this non-standard name 26 'ansi_x3.4_1986' : 'ascii', 27 'cp367' : 'ascii', 28 'csascii' : 'ascii', 29 'ibm367' : 'ascii', 30 'iso646_us' : 'ascii', 31 'iso_646.irv_1991' : 'ascii', 32 'iso_ir_6' : 'ascii', 33 'us' : 'ascii', 34 'us_ascii' : 'ascii', 35 36 # base64_codec codec 37 'base64' : 'base64_codec', 38 'base_64' : 'base64_codec', 39 40 # big5 codec 41 'big5_tw' : 'big5', 42 'csbig5' : 'big5', 43 44 # big5hkscs codec 45 'big5_hkscs' : 'big5hkscs', 46 'hkscs' : 'big5hkscs', 47 48 # bz2_codec codec 49 'bz2' : 'bz2_codec', 50 51 # cp037 codec 52 '037' : 'cp037', 53 'csibm037' : 'cp037', 54 'ebcdic_cp_ca' : 'cp037', 55 'ebcdic_cp_nl' : 'cp037', 56 'ebcdic_cp_us' : 'cp037', 57 'ebcdic_cp_wt' : 'cp037', 58 'ibm037' : 'cp037', 59 'ibm039' : 'cp037', 60 61 # cp1026 codec 62 '1026' : 'cp1026', 63 'csibm1026' : 'cp1026', 64 'ibm1026' : 'cp1026', 65 66 # cp1125 codec 67 '1125' : 'cp1125', 68 'ibm1125' : 'cp1125', 69 'cp866u' : 'cp1125', 70 'ruscii' : 'cp1125', 71 72 # cp1140 codec 73 '1140' : 'cp1140', 74 'ibm1140' : 'cp1140', 75 76 # cp1250 codec 77 '1250' : 'cp1250', 78 'windows_1250' : 'cp1250', 79 80 # cp1251 codec 81 '1251' : 'cp1251', 82 'windows_1251' : 'cp1251', 83 84 # cp1252 codec 85 '1252' : 'cp1252', 86 'windows_1252' : 'cp1252', 87 88 # cp1253 codec 89 '1253' : 'cp1253', 90 'windows_1253' : 'cp1253', 91 92 # cp1254 codec 93 '1254' : 'cp1254', 94 'windows_1254' : 'cp1254', 95 96 # cp1255 codec 97 '1255' : 'cp1255', 98 'windows_1255' : 'cp1255', 99 100 # cp1256 codec 101 '1256' : 'cp1256', 102 'windows_1256' : 'cp1256', 103 104 # cp1257 codec 105 '1257' : 'cp1257', 106 'windows_1257' : 'cp1257', 107 108 # cp1258 codec 109 '1258' : 'cp1258', 110 'windows_1258' : 'cp1258', 111 112 # cp273 codec 113 '273' : 'cp273', 114 'ibm273' : 'cp273', 115 'csibm273' : 'cp273', 116 117 # cp424 codec 118 '424' : 'cp424', 119 'csibm424' : 'cp424', 120 'ebcdic_cp_he' : 'cp424', 121 'ibm424' : 'cp424', 122 123 # cp437 codec 124 '437' : 'cp437', 125 'cspc8codepage437' : 'cp437', 126 'ibm437' : 'cp437', 127 128 # cp500 codec 129 '500' : 'cp500', 130 'csibm500' : 'cp500', 131 'ebcdic_cp_be' : 'cp500', 132 'ebcdic_cp_ch' : 'cp500', 133 'ibm500' : 'cp500', 134 135 # cp775 codec 136 '775' : 'cp775', 137 'cspc775baltic' : 'cp775', 138 'ibm775' : 'cp775', 139 140 # cp850 codec 141 '850' : 'cp850', 142 'cspc850multilingual' : 'cp850', 143 'ibm850' : 'cp850', 144 145 # cp852 codec 146 '852' : 'cp852', 147 'cspcp852' : 'cp852', 148 'ibm852' : 'cp852', 149 150 # cp855 codec 151 '855' : 'cp855', 152 'csibm855' : 'cp855', 153 'ibm855' : 'cp855', 154 155 # cp857 codec 156 '857' : 'cp857', 157 'csibm857' : 'cp857', 158 'ibm857' : 'cp857', 159 160 # cp858 codec 161 '858' : 'cp858', 162 'csibm858' : 'cp858', 163 'ibm858' : 'cp858', 164 165 # cp860 codec 166 '860' : 'cp860', 167 'csibm860' : 'cp860', 168 'ibm860' : 'cp860', 169 170 # cp861 codec 171 '861' : 'cp861', 172 'cp_is' : 'cp861', 173 'csibm861' : 'cp861', 174 'ibm861' : 'cp861', 175 176 # cp862 codec 177 '862' : 'cp862', 178 'cspc862latinhebrew' : 'cp862', 179 'ibm862' : 'cp862', 180 181 # cp863 codec 182 '863' : 'cp863', 183 'csibm863' : 'cp863', 184 'ibm863' : 'cp863', 185 186 # cp864 codec 187 '864' : 'cp864', 188 'csibm864' : 'cp864', 189 'ibm864' : 'cp864', 190 191 # cp865 codec 192 '865' : 'cp865', 193 'csibm865' : 'cp865', 194 'ibm865' : 'cp865', 195 196 # cp866 codec 197 '866' : 'cp866', 198 'csibm866' : 'cp866', 199 'ibm866' : 'cp866', 200 201 # cp869 codec 202 '869' : 'cp869', 203 'cp_gr' : 'cp869', 204 'csibm869' : 'cp869', 205 'ibm869' : 'cp869', 206 207 # cp932 codec 208 '932' : 'cp932', 209 'ms932' : 'cp932', 210 'mskanji' : 'cp932', 211 'ms_kanji' : 'cp932', 212 'windows_31j' : 'cp932', 213 214 # cp949 codec 215 '949' : 'cp949', 216 'ms949' : 'cp949', 217 'uhc' : 'cp949', 218 219 # cp950 codec 220 '950' : 'cp950', 221 'ms950' : 'cp950', 222 223 # euc_jis_2004 codec 224 'jisx0213' : 'euc_jis_2004', 225 'eucjis2004' : 'euc_jis_2004', 226 'euc_jis2004' : 'euc_jis_2004', 227 228 # euc_jisx0213 codec 229 'eucjisx0213' : 'euc_jisx0213', 230 231 # euc_jp codec 232 'eucjp' : 'euc_jp', 233 'ujis' : 'euc_jp', 234 'u_jis' : 'euc_jp', 235 236 # euc_kr codec 237 'euckr' : 'euc_kr', 238 'korean' : 'euc_kr', 239 'ksc5601' : 'euc_kr', 240 'ks_c_5601' : 'euc_kr', 241 'ks_c_5601_1987' : 'euc_kr', 242 'ksx1001' : 'euc_kr', 243 'ks_x_1001' : 'euc_kr', 244 245 # gb18030 codec 246 'gb18030_2000' : 'gb18030', 247 248 # gb2312 codec 249 'chinese' : 'gb2312', 250 'csiso58gb231280' : 'gb2312', 251 'euc_cn' : 'gb2312', 252 'euccn' : 'gb2312', 253 'eucgb2312_cn' : 'gb2312', 254 'gb2312_1980' : 'gb2312', 255 'gb2312_80' : 'gb2312', 256 'iso_ir_58' : 'gb2312', 257 258 # gbk codec 259 '936' : 'gbk', 260 'cp936' : 'gbk', 261 'ms936' : 'gbk', 262 263 # hex_codec codec 264 'hex' : 'hex_codec', 265 266 # hp_roman8 codec 267 'roman8' : 'hp_roman8', 268 'r8' : 'hp_roman8', 269 'csHPRoman8' : 'hp_roman8', 270 'cp1051' : 'hp_roman8', 271 'ibm1051' : 'hp_roman8', 272 273 # hz codec 274 'hzgb' : 'hz', 275 'hz_gb' : 'hz', 276 'hz_gb_2312' : 'hz', 277 278 # iso2022_jp codec 279 'csiso2022jp' : 'iso2022_jp', 280 'iso2022jp' : 'iso2022_jp', 281 'iso_2022_jp' : 'iso2022_jp', 282 283 # iso2022_jp_1 codec 284 'iso2022jp_1' : 'iso2022_jp_1', 285 'iso_2022_jp_1' : 'iso2022_jp_1', 286 287 # iso2022_jp_2 codec 288 'iso2022jp_2' : 'iso2022_jp_2', 289 'iso_2022_jp_2' : 'iso2022_jp_2', 290 291 # iso2022_jp_2004 codec 292 'iso_2022_jp_2004' : 'iso2022_jp_2004', 293 'iso2022jp_2004' : 'iso2022_jp_2004', 294 295 # iso2022_jp_3 codec 296 'iso2022jp_3' : 'iso2022_jp_3', 297 'iso_2022_jp_3' : 'iso2022_jp_3', 298 299 # iso2022_jp_ext codec 300 'iso2022jp_ext' : 'iso2022_jp_ext', 301 'iso_2022_jp_ext' : 'iso2022_jp_ext', 302 303 # iso2022_kr codec 304 'csiso2022kr' : 'iso2022_kr', 305 'iso2022kr' : 'iso2022_kr', 306 'iso_2022_kr' : 'iso2022_kr', 307 308 # iso8859_10 codec 309 'csisolatin6' : 'iso8859_10', 310 'iso_8859_10' : 'iso8859_10', 311 'iso_8859_10_1992' : 'iso8859_10', 312 'iso_ir_157' : 'iso8859_10', 313 'l6' : 'iso8859_10', 314 'latin6' : 'iso8859_10', 315 316 # iso8859_11 codec 317 'thai' : 'iso8859_11', 318 'iso_8859_11' : 'iso8859_11', 319 'iso_8859_11_2001' : 'iso8859_11', 320 321 # iso8859_13 codec 322 'iso_8859_13' : 'iso8859_13', 323 'l7' : 'iso8859_13', 324 'latin7' : 'iso8859_13', 325 326 # iso8859_14 codec 327 'iso_8859_14' : 'iso8859_14', 328 'iso_8859_14_1998' : 'iso8859_14', 329 'iso_celtic' : 'iso8859_14', 330 'iso_ir_199' : 'iso8859_14', 331 'l8' : 'iso8859_14', 332 'latin8' : 'iso8859_14', 333 334 # iso8859_15 codec 335 'iso_8859_15' : 'iso8859_15', 336 'l9' : 'iso8859_15', 337 'latin9' : 'iso8859_15', 338 339 # iso8859_16 codec 340 'iso_8859_16' : 'iso8859_16', 341 'iso_8859_16_2001' : 'iso8859_16', 342 'iso_ir_226' : 'iso8859_16', 343 'l10' : 'iso8859_16', 344 'latin10' : 'iso8859_16', 345 346 # iso8859_2 codec 347 'csisolatin2' : 'iso8859_2', 348 'iso_8859_2' : 'iso8859_2', 349 'iso_8859_2_1987' : 'iso8859_2', 350 'iso_ir_101' : 'iso8859_2', 351 'l2' : 'iso8859_2', 352 'latin2' : 'iso8859_2', 353 354 # iso8859_3 codec 355 'csisolatin3' : 'iso8859_3', 356 'iso_8859_3' : 'iso8859_3', 357 'iso_8859_3_1988' : 'iso8859_3', 358 'iso_ir_109' : 'iso8859_3', 359 'l3' : 'iso8859_3', 360 'latin3' : 'iso8859_3', 361 362 # iso8859_4 codec 363 'csisolatin4' : 'iso8859_4', 364 'iso_8859_4' : 'iso8859_4', 365 'iso_8859_4_1988' : 'iso8859_4', 366 'iso_ir_110' : 'iso8859_4', 367 'l4' : 'iso8859_4', 368 'latin4' : 'iso8859_4', 369 370 # iso8859_5 codec 371 'csisolatincyrillic' : 'iso8859_5', 372 'cyrillic' : 'iso8859_5', 373 'iso_8859_5' : 'iso8859_5', 374 'iso_8859_5_1988' : 'iso8859_5', 375 'iso_ir_144' : 'iso8859_5', 376 377 # iso8859_6 codec 378 'arabic' : 'iso8859_6', 379 'asmo_708' : 'iso8859_6', 380 'csisolatinarabic' : 'iso8859_6', 381 'ecma_114' : 'iso8859_6', 382 'iso_8859_6' : 'iso8859_6', 383 'iso_8859_6_1987' : 'iso8859_6', 384 'iso_ir_127' : 'iso8859_6', 385 386 # iso8859_7 codec 387 'csisolatingreek' : 'iso8859_7', 388 'ecma_118' : 'iso8859_7', 389 'elot_928' : 'iso8859_7', 390 'greek' : 'iso8859_7', 391 'greek8' : 'iso8859_7', 392 'iso_8859_7' : 'iso8859_7', 393 'iso_8859_7_1987' : 'iso8859_7', 394 'iso_ir_126' : 'iso8859_7', 395 396 # iso8859_8 codec 397 'csisolatinhebrew' : 'iso8859_8', 398 'hebrew' : 'iso8859_8', 399 'iso_8859_8' : 'iso8859_8', 400 'iso_8859_8_1988' : 'iso8859_8', 401 'iso_ir_138' : 'iso8859_8', 402 403 # iso8859_9 codec 404 'csisolatin5' : 'iso8859_9', 405 'iso_8859_9' : 'iso8859_9', 406 'iso_8859_9_1989' : 'iso8859_9', 407 'iso_ir_148' : 'iso8859_9', 408 'l5' : 'iso8859_9', 409 'latin5' : 'iso8859_9', 410 411 # johab codec 412 'cp1361' : 'johab', 413 'ms1361' : 'johab', 414 415 # koi8_r codec 416 'cskoi8r' : 'koi8_r', 417 418 # kz1048 codec 419 'kz_1048' : 'kz1048', 420 'rk1048' : 'kz1048', 421 'strk1048_2002' : 'kz1048', 422 423 # latin_1 codec 424 # 425 # Note that the latin_1 codec is implemented internally in C and a 426 # lot faster than the charmap codec iso8859_1 which uses the same 427 # encoding. This is why we discourage the use of the iso8859_1 428 # codec and alias it to latin_1 instead. 429 # 430 '8859' : 'latin_1', 431 'cp819' : 'latin_1', 432 'csisolatin1' : 'latin_1', 433 'ibm819' : 'latin_1', 434 'iso8859' : 'latin_1', 435 'iso8859_1' : 'latin_1', 436 'iso_8859_1' : 'latin_1', 437 'iso_8859_1_1987' : 'latin_1', 438 'iso_ir_100' : 'latin_1', 439 'l1' : 'latin_1', 440 'latin' : 'latin_1', 441 'latin1' : 'latin_1', 442 443 # mac_cyrillic codec 444 'maccyrillic' : 'mac_cyrillic', 445 446 # mac_greek codec 447 'macgreek' : 'mac_greek', 448 449 # mac_iceland codec 450 'maciceland' : 'mac_iceland', 451 452 # mac_latin2 codec 453 'maccentraleurope' : 'mac_latin2', 454 'mac_centeuro' : 'mac_latin2', 455 'maclatin2' : 'mac_latin2', 456 457 # mac_roman codec 458 'macintosh' : 'mac_roman', 459 'macroman' : 'mac_roman', 460 461 # mac_turkish codec 462 'macturkish' : 'mac_turkish', 463 464 # mbcs codec 465 'ansi' : 'mbcs', 466 'dbcs' : 'mbcs', 467 468 # ptcp154 codec 469 'csptcp154' : 'ptcp154', 470 'pt154' : 'ptcp154', 471 'cp154' : 'ptcp154', 472 'cyrillic_asian' : 'ptcp154', 473 474 # quopri_codec codec 475 'quopri' : 'quopri_codec', 476 'quoted_printable' : 'quopri_codec', 477 'quotedprintable' : 'quopri_codec', 478 479 # rot_13 codec 480 'rot13' : 'rot_13', 481 482 # shift_jis codec 483 'csshiftjis' : 'shift_jis', 484 'shiftjis' : 'shift_jis', 485 'sjis' : 'shift_jis', 486 's_jis' : 'shift_jis', 487 488 # shift_jis_2004 codec 489 'shiftjis2004' : 'shift_jis_2004', 490 'sjis_2004' : 'shift_jis_2004', 491 's_jis_2004' : 'shift_jis_2004', 492 493 # shift_jisx0213 codec 494 'shiftjisx0213' : 'shift_jisx0213', 495 'sjisx0213' : 'shift_jisx0213', 496 's_jisx0213' : 'shift_jisx0213', 497 498 # tis_620 codec 499 'tis620' : 'tis_620', 500 'tis_620_0' : 'tis_620', 501 'tis_620_2529_0' : 'tis_620', 502 'tis_620_2529_1' : 'tis_620', 503 'iso_ir_166' : 'tis_620', 504 505 # utf_16 codec 506 'u16' : 'utf_16', 507 'utf16' : 'utf_16', 508 509 # utf_16_be codec 510 'unicodebigunmarked' : 'utf_16_be', 511 'utf_16be' : 'utf_16_be', 512 513 # utf_16_le codec 514 'unicodelittleunmarked' : 'utf_16_le', 515 'utf_16le' : 'utf_16_le', 516 517 # utf_32 codec 518 'u32' : 'utf_32', 519 'utf32' : 'utf_32', 520 521 # utf_32_be codec 522 'utf_32be' : 'utf_32_be', 523 524 # utf_32_le codec 525 'utf_32le' : 'utf_32_le', 526 527 # utf_7 codec 528 'u7' : 'utf_7', 529 'utf7' : 'utf_7', 530 'unicode_1_1_utf_7' : 'utf_7', 531 532 # utf_8 codec 533 'u8' : 'utf_8', 534 'utf' : 'utf_8', 535 'utf8' : 'utf_8', 536 'utf8_ucs2' : 'utf_8', 537 'utf8_ucs4' : 'utf_8', 538 'cp65001' : 'utf_8', 539 540 # uu_codec codec 541 'uu' : 'uu_codec', 542 543 # zlib_codec codec 544 'zip' : 'zlib_codec', 545 'zlib' : 'zlib_codec', 546 547 # temporary mac CJK aliases, will be replaced by proper codecs in 3.1 548 'x_mac_japanese' : 'shift_jis', 549 'x_mac_korean' : 'euc_kr', 550 'x_mac_simp_chinese' : 'gb2312', 551 'x_mac_trad_chinese' : 'big5', 552} 553