• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1""" Encoding Aliases Support
2
3    This module is used by the encodings package search function to
4    map encodings names to module names.
5
6    Note that the search function normalizes the encoding names before
7    doing the lookup, so the mapping will have to map normalized
8    encoding names to module names.
9
10    Contents:
11
12        The following aliases dictionary contains mappings of all IANA
13        character set names for which the Python core library provides
14        codecs. In addition to these, a few Python specific codec
15        aliases have also been added.
16
17"""
18aliases = {
19
20    # Please keep this list sorted alphabetically by value !
21
22    # ascii codec
23    '646'                : 'ascii',
24    'ansi_x3.4_1968'     : 'ascii',
25    'ansi_x3_4_1968'     : 'ascii', # some email headers use this non-standard name
26    'ansi_x3.4_1986'     : 'ascii',
27    'cp367'              : 'ascii',
28    'csascii'            : 'ascii',
29    'ibm367'             : 'ascii',
30    'iso646_us'          : 'ascii',
31    'iso_646.irv_1991'   : 'ascii',
32    'iso_ir_6'           : 'ascii',
33    'us'                 : 'ascii',
34    'us_ascii'           : 'ascii',
35
36    # base64_codec codec
37    'base64'             : 'base64_codec',
38    'base_64'            : 'base64_codec',
39
40    # big5 codec
41    'big5_tw'            : 'big5',
42    'csbig5'             : 'big5',
43
44    # big5hkscs codec
45    'big5_hkscs'         : 'big5hkscs',
46    'hkscs'              : 'big5hkscs',
47
48    # bz2_codec codec
49    'bz2'                : 'bz2_codec',
50
51    # cp037 codec
52    '037'                : 'cp037',
53    'csibm037'           : 'cp037',
54    'ebcdic_cp_ca'       : 'cp037',
55    'ebcdic_cp_nl'       : 'cp037',
56    'ebcdic_cp_us'       : 'cp037',
57    'ebcdic_cp_wt'       : 'cp037',
58    'ibm037'             : 'cp037',
59    'ibm039'             : 'cp037',
60
61    # cp1026 codec
62    '1026'               : 'cp1026',
63    'csibm1026'          : 'cp1026',
64    'ibm1026'            : 'cp1026',
65
66    # cp1125 codec
67    '1125'                : 'cp1125',
68    'ibm1125'             : 'cp1125',
69    'cp866u'              : 'cp1125',
70    'ruscii'              : 'cp1125',
71
72    # cp1140 codec
73    '1140'               : 'cp1140',
74    'ibm1140'            : 'cp1140',
75
76    # cp1250 codec
77    '1250'               : 'cp1250',
78    'windows_1250'       : 'cp1250',
79
80    # cp1251 codec
81    '1251'               : 'cp1251',
82    'windows_1251'       : 'cp1251',
83
84    # cp1252 codec
85    '1252'               : 'cp1252',
86    'windows_1252'       : 'cp1252',
87
88    # cp1253 codec
89    '1253'               : 'cp1253',
90    'windows_1253'       : 'cp1253',
91
92    # cp1254 codec
93    '1254'               : 'cp1254',
94    'windows_1254'       : 'cp1254',
95
96    # cp1255 codec
97    '1255'               : 'cp1255',
98    'windows_1255'       : 'cp1255',
99
100    # cp1256 codec
101    '1256'               : 'cp1256',
102    'windows_1256'       : 'cp1256',
103
104    # cp1257 codec
105    '1257'               : 'cp1257',
106    'windows_1257'       : 'cp1257',
107
108    # cp1258 codec
109    '1258'               : 'cp1258',
110    'windows_1258'       : 'cp1258',
111
112    # cp273 codec
113    '273'                : 'cp273',
114    'ibm273'             : 'cp273',
115    'csibm273'           : 'cp273',
116
117    # cp424 codec
118    '424'                : 'cp424',
119    'csibm424'           : 'cp424',
120    'ebcdic_cp_he'       : 'cp424',
121    'ibm424'             : 'cp424',
122
123    # cp437 codec
124    '437'                : 'cp437',
125    'cspc8codepage437'   : 'cp437',
126    'ibm437'             : 'cp437',
127
128    # cp500 codec
129    '500'                : 'cp500',
130    'csibm500'           : 'cp500',
131    'ebcdic_cp_be'       : 'cp500',
132    'ebcdic_cp_ch'       : 'cp500',
133    'ibm500'             : 'cp500',
134
135    # cp775 codec
136    '775'                : 'cp775',
137    'cspc775baltic'      : 'cp775',
138    'ibm775'             : 'cp775',
139
140    # cp850 codec
141    '850'                : 'cp850',
142    'cspc850multilingual' : 'cp850',
143    'ibm850'             : 'cp850',
144
145    # cp852 codec
146    '852'                : 'cp852',
147    'cspcp852'           : 'cp852',
148    'ibm852'             : 'cp852',
149
150    # cp855 codec
151    '855'                : 'cp855',
152    'csibm855'           : 'cp855',
153    'ibm855'             : 'cp855',
154
155    # cp857 codec
156    '857'                : 'cp857',
157    'csibm857'           : 'cp857',
158    'ibm857'             : 'cp857',
159
160    # cp858 codec
161    '858'                : 'cp858',
162    'csibm858'           : 'cp858',
163    'ibm858'             : 'cp858',
164
165    # cp860 codec
166    '860'                : 'cp860',
167    'csibm860'           : 'cp860',
168    'ibm860'             : 'cp860',
169
170    # cp861 codec
171    '861'                : 'cp861',
172    'cp_is'              : 'cp861',
173    'csibm861'           : 'cp861',
174    'ibm861'             : 'cp861',
175
176    # cp862 codec
177    '862'                : 'cp862',
178    'cspc862latinhebrew' : 'cp862',
179    'ibm862'             : 'cp862',
180
181    # cp863 codec
182    '863'                : 'cp863',
183    'csibm863'           : 'cp863',
184    'ibm863'             : 'cp863',
185
186    # cp864 codec
187    '864'                : 'cp864',
188    'csibm864'           : 'cp864',
189    'ibm864'             : 'cp864',
190
191    # cp865 codec
192    '865'                : 'cp865',
193    'csibm865'           : 'cp865',
194    'ibm865'             : 'cp865',
195
196    # cp866 codec
197    '866'                : 'cp866',
198    'csibm866'           : 'cp866',
199    'ibm866'             : 'cp866',
200
201    # cp869 codec
202    '869'                : 'cp869',
203    'cp_gr'              : 'cp869',
204    'csibm869'           : 'cp869',
205    'ibm869'             : 'cp869',
206
207    # cp932 codec
208    '932'                : 'cp932',
209    'ms932'              : 'cp932',
210    'mskanji'            : 'cp932',
211    'ms_kanji'           : 'cp932',
212    'windows_31j'        : 'cp932',
213
214    # cp949 codec
215    '949'                : 'cp949',
216    'ms949'              : 'cp949',
217    'uhc'                : 'cp949',
218
219    # cp950 codec
220    '950'                : 'cp950',
221    'ms950'              : 'cp950',
222
223    # euc_jis_2004 codec
224    'jisx0213'           : 'euc_jis_2004',
225    'eucjis2004'         : 'euc_jis_2004',
226    'euc_jis2004'        : 'euc_jis_2004',
227
228    # euc_jisx0213 codec
229    'eucjisx0213'        : 'euc_jisx0213',
230
231    # euc_jp codec
232    'eucjp'              : 'euc_jp',
233    'ujis'               : 'euc_jp',
234    'u_jis'              : 'euc_jp',
235
236    # euc_kr codec
237    'euckr'              : 'euc_kr',
238    'korean'             : 'euc_kr',
239    'ksc5601'            : 'euc_kr',
240    'ks_c_5601'          : 'euc_kr',
241    'ks_c_5601_1987'     : 'euc_kr',
242    'ksx1001'            : 'euc_kr',
243    'ks_x_1001'          : 'euc_kr',
244
245    # gb18030 codec
246    'gb18030_2000'       : 'gb18030',
247
248    # gb2312 codec
249    'chinese'            : 'gb2312',
250    'csiso58gb231280'    : 'gb2312',
251    'euc_cn'             : 'gb2312',
252    'euccn'              : 'gb2312',
253    'eucgb2312_cn'       : 'gb2312',
254    'gb2312_1980'        : 'gb2312',
255    'gb2312_80'          : 'gb2312',
256    'iso_ir_58'          : 'gb2312',
257
258    # gbk codec
259    '936'                : 'gbk',
260    'cp936'              : 'gbk',
261    'ms936'              : 'gbk',
262
263    # hex_codec codec
264    'hex'                : 'hex_codec',
265
266    # hp_roman8 codec
267    'roman8'             : 'hp_roman8',
268    'r8'                 : 'hp_roman8',
269    'csHPRoman8'         : 'hp_roman8',
270    'cp1051'             : 'hp_roman8',
271    'ibm1051'            : 'hp_roman8',
272
273    # hz codec
274    'hzgb'               : 'hz',
275    'hz_gb'              : 'hz',
276    'hz_gb_2312'         : 'hz',
277
278    # iso2022_jp codec
279    'csiso2022jp'        : 'iso2022_jp',
280    'iso2022jp'          : 'iso2022_jp',
281    'iso_2022_jp'        : 'iso2022_jp',
282
283    # iso2022_jp_1 codec
284    'iso2022jp_1'        : 'iso2022_jp_1',
285    'iso_2022_jp_1'      : 'iso2022_jp_1',
286
287    # iso2022_jp_2 codec
288    'iso2022jp_2'        : 'iso2022_jp_2',
289    'iso_2022_jp_2'      : 'iso2022_jp_2',
290
291    # iso2022_jp_2004 codec
292    'iso_2022_jp_2004'   : 'iso2022_jp_2004',
293    'iso2022jp_2004'     : 'iso2022_jp_2004',
294
295    # iso2022_jp_3 codec
296    'iso2022jp_3'        : 'iso2022_jp_3',
297    'iso_2022_jp_3'      : 'iso2022_jp_3',
298
299    # iso2022_jp_ext codec
300    'iso2022jp_ext'      : 'iso2022_jp_ext',
301    'iso_2022_jp_ext'    : 'iso2022_jp_ext',
302
303    # iso2022_kr codec
304    'csiso2022kr'        : 'iso2022_kr',
305    'iso2022kr'          : 'iso2022_kr',
306    'iso_2022_kr'        : 'iso2022_kr',
307
308    # iso8859_10 codec
309    'csisolatin6'        : 'iso8859_10',
310    'iso_8859_10'        : 'iso8859_10',
311    'iso_8859_10_1992'   : 'iso8859_10',
312    'iso_ir_157'         : 'iso8859_10',
313    'l6'                 : 'iso8859_10',
314    'latin6'             : 'iso8859_10',
315
316    # iso8859_11 codec
317    'thai'               : 'iso8859_11',
318    'iso_8859_11'        : 'iso8859_11',
319    'iso_8859_11_2001'   : 'iso8859_11',
320
321    # iso8859_13 codec
322    'iso_8859_13'        : 'iso8859_13',
323    'l7'                 : 'iso8859_13',
324    'latin7'             : 'iso8859_13',
325
326    # iso8859_14 codec
327    'iso_8859_14'        : 'iso8859_14',
328    'iso_8859_14_1998'   : 'iso8859_14',
329    'iso_celtic'         : 'iso8859_14',
330    'iso_ir_199'         : 'iso8859_14',
331    'l8'                 : 'iso8859_14',
332    'latin8'             : 'iso8859_14',
333
334    # iso8859_15 codec
335    'iso_8859_15'        : 'iso8859_15',
336    'l9'                 : 'iso8859_15',
337    'latin9'             : 'iso8859_15',
338
339    # iso8859_16 codec
340    'iso_8859_16'        : 'iso8859_16',
341    'iso_8859_16_2001'   : 'iso8859_16',
342    'iso_ir_226'         : 'iso8859_16',
343    'l10'                : 'iso8859_16',
344    'latin10'            : 'iso8859_16',
345
346    # iso8859_2 codec
347    'csisolatin2'        : 'iso8859_2',
348    'iso_8859_2'         : 'iso8859_2',
349    'iso_8859_2_1987'    : 'iso8859_2',
350    'iso_ir_101'         : 'iso8859_2',
351    'l2'                 : 'iso8859_2',
352    'latin2'             : 'iso8859_2',
353
354    # iso8859_3 codec
355    'csisolatin3'        : 'iso8859_3',
356    'iso_8859_3'         : 'iso8859_3',
357    'iso_8859_3_1988'    : 'iso8859_3',
358    'iso_ir_109'         : 'iso8859_3',
359    'l3'                 : 'iso8859_3',
360    'latin3'             : 'iso8859_3',
361
362    # iso8859_4 codec
363    'csisolatin4'        : 'iso8859_4',
364    'iso_8859_4'         : 'iso8859_4',
365    'iso_8859_4_1988'    : 'iso8859_4',
366    'iso_ir_110'         : 'iso8859_4',
367    'l4'                 : 'iso8859_4',
368    'latin4'             : 'iso8859_4',
369
370    # iso8859_5 codec
371    'csisolatincyrillic' : 'iso8859_5',
372    'cyrillic'           : 'iso8859_5',
373    'iso_8859_5'         : 'iso8859_5',
374    'iso_8859_5_1988'    : 'iso8859_5',
375    'iso_ir_144'         : 'iso8859_5',
376
377    # iso8859_6 codec
378    'arabic'             : 'iso8859_6',
379    'asmo_708'           : 'iso8859_6',
380    'csisolatinarabic'   : 'iso8859_6',
381    'ecma_114'           : 'iso8859_6',
382    'iso_8859_6'         : 'iso8859_6',
383    'iso_8859_6_1987'    : 'iso8859_6',
384    'iso_ir_127'         : 'iso8859_6',
385
386    # iso8859_7 codec
387    'csisolatingreek'    : 'iso8859_7',
388    'ecma_118'           : 'iso8859_7',
389    'elot_928'           : 'iso8859_7',
390    'greek'              : 'iso8859_7',
391    'greek8'             : 'iso8859_7',
392    'iso_8859_7'         : 'iso8859_7',
393    'iso_8859_7_1987'    : 'iso8859_7',
394    'iso_ir_126'         : 'iso8859_7',
395
396    # iso8859_8 codec
397    'csisolatinhebrew'   : 'iso8859_8',
398    'hebrew'             : 'iso8859_8',
399    'iso_8859_8'         : 'iso8859_8',
400    'iso_8859_8_1988'    : 'iso8859_8',
401    'iso_ir_138'         : 'iso8859_8',
402
403    # iso8859_9 codec
404    'csisolatin5'        : 'iso8859_9',
405    'iso_8859_9'         : 'iso8859_9',
406    'iso_8859_9_1989'    : 'iso8859_9',
407    'iso_ir_148'         : 'iso8859_9',
408    'l5'                 : 'iso8859_9',
409    'latin5'             : 'iso8859_9',
410
411    # johab codec
412    'cp1361'             : 'johab',
413    'ms1361'             : 'johab',
414
415    # koi8_r codec
416    'cskoi8r'            : 'koi8_r',
417
418    # kz1048 codec
419    'kz_1048'           : 'kz1048',
420    'rk1048'            : 'kz1048',
421    'strk1048_2002'     : 'kz1048',
422
423    # latin_1 codec
424    #
425    # Note that the latin_1 codec is implemented internally in C and a
426    # lot faster than the charmap codec iso8859_1 which uses the same
427    # encoding. This is why we discourage the use of the iso8859_1
428    # codec and alias it to latin_1 instead.
429    #
430    '8859'               : 'latin_1',
431    'cp819'              : 'latin_1',
432    'csisolatin1'        : 'latin_1',
433    'ibm819'             : 'latin_1',
434    'iso8859'            : 'latin_1',
435    'iso8859_1'          : 'latin_1',
436    'iso_8859_1'         : 'latin_1',
437    'iso_8859_1_1987'    : 'latin_1',
438    'iso_ir_100'         : 'latin_1',
439    'l1'                 : 'latin_1',
440    'latin'              : 'latin_1',
441    'latin1'             : 'latin_1',
442
443    # mac_cyrillic codec
444    'maccyrillic'        : 'mac_cyrillic',
445
446    # mac_greek codec
447    'macgreek'           : 'mac_greek',
448
449    # mac_iceland codec
450    'maciceland'         : 'mac_iceland',
451
452    # mac_latin2 codec
453    'maccentraleurope'   : 'mac_latin2',
454    'mac_centeuro'       : 'mac_latin2',
455    'maclatin2'          : 'mac_latin2',
456
457    # mac_roman codec
458    'macintosh'          : 'mac_roman',
459    'macroman'           : 'mac_roman',
460
461    # mac_turkish codec
462    'macturkish'         : 'mac_turkish',
463
464    # mbcs codec
465    'ansi'               : 'mbcs',
466    'dbcs'               : 'mbcs',
467
468    # ptcp154 codec
469    'csptcp154'          : 'ptcp154',
470    'pt154'              : 'ptcp154',
471    'cp154'              : 'ptcp154',
472    'cyrillic_asian'     : 'ptcp154',
473
474    # quopri_codec codec
475    'quopri'             : 'quopri_codec',
476    'quoted_printable'   : 'quopri_codec',
477    'quotedprintable'    : 'quopri_codec',
478
479    # rot_13 codec
480    'rot13'              : 'rot_13',
481
482    # shift_jis codec
483    'csshiftjis'         : 'shift_jis',
484    'shiftjis'           : 'shift_jis',
485    'sjis'               : 'shift_jis',
486    's_jis'              : 'shift_jis',
487
488    # shift_jis_2004 codec
489    'shiftjis2004'       : 'shift_jis_2004',
490    'sjis_2004'          : 'shift_jis_2004',
491    's_jis_2004'         : 'shift_jis_2004',
492
493    # shift_jisx0213 codec
494    'shiftjisx0213'      : 'shift_jisx0213',
495    'sjisx0213'          : 'shift_jisx0213',
496    's_jisx0213'         : 'shift_jisx0213',
497
498    # tis_620 codec
499    'tis620'             : 'tis_620',
500    'tis_620_0'          : 'tis_620',
501    'tis_620_2529_0'     : 'tis_620',
502    'tis_620_2529_1'     : 'tis_620',
503    'iso_ir_166'         : 'tis_620',
504
505    # utf_16 codec
506    'u16'                : 'utf_16',
507    'utf16'              : 'utf_16',
508
509    # utf_16_be codec
510    'unicodebigunmarked' : 'utf_16_be',
511    'utf_16be'           : 'utf_16_be',
512
513    # utf_16_le codec
514    'unicodelittleunmarked' : 'utf_16_le',
515    'utf_16le'           : 'utf_16_le',
516
517    # utf_32 codec
518    'u32'                : 'utf_32',
519    'utf32'              : 'utf_32',
520
521    # utf_32_be codec
522    'utf_32be'           : 'utf_32_be',
523
524    # utf_32_le codec
525    'utf_32le'           : 'utf_32_le',
526
527    # utf_7 codec
528    'u7'                 : 'utf_7',
529    'utf7'               : 'utf_7',
530    'unicode_1_1_utf_7'  : 'utf_7',
531
532    # utf_8 codec
533    'u8'                 : 'utf_8',
534    'utf'                : 'utf_8',
535    'utf8'               : 'utf_8',
536    'utf8_ucs2'          : 'utf_8',
537    'utf8_ucs4'          : 'utf_8',
538    'cp65001'            : 'utf_8',
539
540    # uu_codec codec
541    'uu'                 : 'uu_codec',
542
543    # zlib_codec codec
544    'zip'                : 'zlib_codec',
545    'zlib'               : 'zlib_codec',
546
547    # temporary mac CJK aliases, will be replaced by proper codecs in 3.1
548    'x_mac_japanese'      : 'shift_jis',
549    'x_mac_korean'        : 'euc_kr',
550    'x_mac_simp_chinese'  : 'gb2312',
551    'x_mac_trad_chinese'  : 'big5',
552}
553