• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Locale support module.
2
3The module provides low-level access to the C lib's locale APIs and adds high
4level number formatting APIs as well as a locale aliasing engine to complement
5these.
6
7The aliasing engine includes support for many commonly used locale names and
8maps them to values suitable for passing to the C lib's setlocale() function. It
9also includes default encodings for all supported locale names.
10
11"""
12
13import sys
14import encodings
15import encodings.aliases
16import re
17import _collections_abc
18from builtins import str as _builtin_str
19import functools
20
21# Try importing the _locale module.
22#
23# If this fails, fall back on a basic 'C' locale emulation.
24
25# Yuck:  LC_MESSAGES is non-standard:  can't tell whether it exists before
26# trying the import.  So __all__ is also fiddled at the end of the file.
27__all__ = ["getlocale", "getdefaultlocale", "getpreferredencoding", "Error",
28           "setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm",
29           "str", "atof", "atoi", "format", "format_string", "currency",
30           "normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY",
31           "LC_NUMERIC", "LC_ALL", "CHAR_MAX"]
32
33def _strcoll(a,b):
34    """ strcoll(string,string) -> int.
35        Compares two strings according to the locale.
36    """
37    return (a > b) - (a < b)
38
39def _strxfrm(s):
40    """ strxfrm(string) -> string.
41        Returns a string that behaves for cmp locale-aware.
42    """
43    return s
44
45try:
46
47    from _locale import *
48
49except ImportError:
50
51    # Locale emulation
52
53    CHAR_MAX = 127
54    LC_ALL = 6
55    LC_COLLATE = 3
56    LC_CTYPE = 0
57    LC_MESSAGES = 5
58    LC_MONETARY = 4
59    LC_NUMERIC = 1
60    LC_TIME = 2
61    Error = ValueError
62
63    def localeconv():
64        """ localeconv() -> dict.
65            Returns numeric and monetary locale-specific parameters.
66        """
67        # 'C' locale default values
68        return {'grouping': [127],
69                'currency_symbol': '',
70                'n_sign_posn': 127,
71                'p_cs_precedes': 127,
72                'n_cs_precedes': 127,
73                'mon_grouping': [],
74                'n_sep_by_space': 127,
75                'decimal_point': '.',
76                'negative_sign': '',
77                'positive_sign': '',
78                'p_sep_by_space': 127,
79                'int_curr_symbol': '',
80                'p_sign_posn': 127,
81                'thousands_sep': '',
82                'mon_thousands_sep': '',
83                'frac_digits': 127,
84                'mon_decimal_point': '',
85                'int_frac_digits': 127}
86
87    def setlocale(category, value=None):
88        """ setlocale(integer,string=None) -> string.
89            Activates/queries locale processing.
90        """
91        if value not in (None, '', 'C'):
92            raise Error('_locale emulation only supports "C" locale')
93        return 'C'
94
95# These may or may not exist in _locale, so be sure to set them.
96if 'strxfrm' not in globals():
97    strxfrm = _strxfrm
98if 'strcoll' not in globals():
99    strcoll = _strcoll
100
101
102_localeconv = localeconv
103
104# With this dict, you can override some items of localeconv's return value.
105# This is useful for testing purposes.
106_override_localeconv = {}
107
108@functools.wraps(_localeconv)
109def localeconv():
110    d = _localeconv()
111    if _override_localeconv:
112        d.update(_override_localeconv)
113    return d
114
115
116### Number formatting APIs
117
118# Author: Martin von Loewis
119# improved by Georg Brandl
120
121# Iterate over grouping intervals
122def _grouping_intervals(grouping):
123    last_interval = None
124    for interval in grouping:
125        # if grouping is -1, we are done
126        if interval == CHAR_MAX:
127            return
128        # 0: re-use last group ad infinitum
129        if interval == 0:
130            if last_interval is None:
131                raise ValueError("invalid grouping")
132            while True:
133                yield last_interval
134        yield interval
135        last_interval = interval
136
137#perform the grouping from right to left
138def _group(s, monetary=False):
139    conv = localeconv()
140    thousands_sep = conv[monetary and 'mon_thousands_sep' or 'thousands_sep']
141    grouping = conv[monetary and 'mon_grouping' or 'grouping']
142    if not grouping:
143        return (s, 0)
144    if s[-1] == ' ':
145        stripped = s.rstrip()
146        right_spaces = s[len(stripped):]
147        s = stripped
148    else:
149        right_spaces = ''
150    left_spaces = ''
151    groups = []
152    for interval in _grouping_intervals(grouping):
153        if not s or s[-1] not in "0123456789":
154            # only non-digit characters remain (sign, spaces)
155            left_spaces = s
156            s = ''
157            break
158        groups.append(s[-interval:])
159        s = s[:-interval]
160    if s:
161        groups.append(s)
162    groups.reverse()
163    return (
164        left_spaces + thousands_sep.join(groups) + right_spaces,
165        len(thousands_sep) * (len(groups) - 1)
166    )
167
168# Strip a given amount of excess padding from the given string
169def _strip_padding(s, amount):
170    lpos = 0
171    while amount and s[lpos] == ' ':
172        lpos += 1
173        amount -= 1
174    rpos = len(s) - 1
175    while amount and s[rpos] == ' ':
176        rpos -= 1
177        amount -= 1
178    return s[lpos:rpos+1]
179
180_percent_re = re.compile(r'%(?:\((?P<key>.*?)\))?'
181                         r'(?P<modifiers>[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]')
182
183def _format(percent, value, grouping=False, monetary=False, *additional):
184    if additional:
185        formatted = percent % ((value,) + additional)
186    else:
187        formatted = percent % value
188    # floats and decimal ints need special action!
189    if percent[-1] in 'eEfFgG':
190        seps = 0
191        parts = formatted.split('.')
192        if grouping:
193            parts[0], seps = _group(parts[0], monetary=monetary)
194        decimal_point = localeconv()[monetary and 'mon_decimal_point'
195                                              or 'decimal_point']
196        formatted = decimal_point.join(parts)
197        if seps:
198            formatted = _strip_padding(formatted, seps)
199    elif percent[-1] in 'diu':
200        seps = 0
201        if grouping:
202            formatted, seps = _group(formatted, monetary=monetary)
203        if seps:
204            formatted = _strip_padding(formatted, seps)
205    return formatted
206
207def format_string(f, val, grouping=False, monetary=False):
208    """Formats a string in the same way that the % formatting would use,
209    but takes the current locale into account.
210
211    Grouping is applied if the third parameter is true.
212    Conversion uses monetary thousands separator and grouping strings if
213    forth parameter monetary is true."""
214    percents = list(_percent_re.finditer(f))
215    new_f = _percent_re.sub('%s', f)
216
217    if isinstance(val, _collections_abc.Mapping):
218        new_val = []
219        for perc in percents:
220            if perc.group()[-1]=='%':
221                new_val.append('%')
222            else:
223                new_val.append(_format(perc.group(), val, grouping, monetary))
224    else:
225        if not isinstance(val, tuple):
226            val = (val,)
227        new_val = []
228        i = 0
229        for perc in percents:
230            if perc.group()[-1]=='%':
231                new_val.append('%')
232            else:
233                starcount = perc.group('modifiers').count('*')
234                new_val.append(_format(perc.group(),
235                                      val[i],
236                                      grouping,
237                                      monetary,
238                                      *val[i+1:i+1+starcount]))
239                i += (1 + starcount)
240    val = tuple(new_val)
241
242    return new_f % val
243
244def format(percent, value, grouping=False, monetary=False, *additional):
245    """Deprecated, use format_string instead."""
246    import warnings
247    warnings.warn(
248        "This method will be removed in a future version of Python. "
249        "Use 'locale.format_string()' instead.",
250        DeprecationWarning, stacklevel=2
251    )
252
253    match = _percent_re.match(percent)
254    if not match or len(match.group())!= len(percent):
255        raise ValueError(("format() must be given exactly one %%char "
256                         "format specifier, %s not valid") % repr(percent))
257    return _format(percent, value, grouping, monetary, *additional)
258
259def currency(val, symbol=True, grouping=False, international=False):
260    """Formats val according to the currency settings
261    in the current locale."""
262    conv = localeconv()
263
264    # check for illegal values
265    digits = conv[international and 'int_frac_digits' or 'frac_digits']
266    if digits == 127:
267        raise ValueError("Currency formatting is not possible using "
268                         "the 'C' locale.")
269
270    s = _format('%%.%if' % digits, abs(val), grouping, monetary=True)
271    # '<' and '>' are markers if the sign must be inserted between symbol and value
272    s = '<' + s + '>'
273
274    if symbol:
275        smb = conv[international and 'int_curr_symbol' or 'currency_symbol']
276        precedes = conv[val<0 and 'n_cs_precedes' or 'p_cs_precedes']
277        separated = conv[val<0 and 'n_sep_by_space' or 'p_sep_by_space']
278
279        if precedes:
280            s = smb + (separated and ' ' or '') + s
281        else:
282            if international and smb[-1] == ' ':
283                smb = smb[:-1]
284            s = s + (separated and ' ' or '') + smb
285
286    sign_pos = conv[val<0 and 'n_sign_posn' or 'p_sign_posn']
287    sign = conv[val<0 and 'negative_sign' or 'positive_sign']
288
289    if sign_pos == 0:
290        s = '(' + s + ')'
291    elif sign_pos == 1:
292        s = sign + s
293    elif sign_pos == 2:
294        s = s + sign
295    elif sign_pos == 3:
296        s = s.replace('<', sign)
297    elif sign_pos == 4:
298        s = s.replace('>', sign)
299    else:
300        # the default if nothing specified;
301        # this should be the most fitting sign position
302        s = sign + s
303
304    return s.replace('<', '').replace('>', '')
305
306def str(val):
307    """Convert float to string, taking the locale into account."""
308    return _format("%.12g", val)
309
310def delocalize(string):
311    "Parses a string as a normalized number according to the locale settings."
312
313    conv = localeconv()
314
315    #First, get rid of the grouping
316    ts = conv['thousands_sep']
317    if ts:
318        string = string.replace(ts, '')
319
320    #next, replace the decimal point with a dot
321    dd = conv['decimal_point']
322    if dd:
323        string = string.replace(dd, '.')
324    return string
325
326def atof(string, func=float):
327    "Parses a string as a float according to the locale settings."
328    return func(delocalize(string))
329
330def atoi(string):
331    "Converts a string to an integer according to the locale settings."
332    return int(delocalize(string))
333
334def _test():
335    setlocale(LC_ALL, "")
336    #do grouping
337    s1 = format_string("%d", 123456789,1)
338    print(s1, "is", atoi(s1))
339    #standard formatting
340    s1 = str(3.14)
341    print(s1, "is", atof(s1))
342
343### Locale name aliasing engine
344
345# Author: Marc-Andre Lemburg, mal@lemburg.com
346# Various tweaks by Fredrik Lundh <fredrik@pythonware.com>
347
348# store away the low-level version of setlocale (it's
349# overridden below)
350_setlocale = setlocale
351
352def _replace_encoding(code, encoding):
353    if '.' in code:
354        langname = code[:code.index('.')]
355    else:
356        langname = code
357    # Convert the encoding to a C lib compatible encoding string
358    norm_encoding = encodings.normalize_encoding(encoding)
359    #print('norm encoding: %r' % norm_encoding)
360    norm_encoding = encodings.aliases.aliases.get(norm_encoding.lower(),
361                                                  norm_encoding)
362    #print('aliased encoding: %r' % norm_encoding)
363    encoding = norm_encoding
364    norm_encoding = norm_encoding.lower()
365    if norm_encoding in locale_encoding_alias:
366        encoding = locale_encoding_alias[norm_encoding]
367    else:
368        norm_encoding = norm_encoding.replace('_', '')
369        norm_encoding = norm_encoding.replace('-', '')
370        if norm_encoding in locale_encoding_alias:
371            encoding = locale_encoding_alias[norm_encoding]
372    #print('found encoding %r' % encoding)
373    return langname + '.' + encoding
374
375def _append_modifier(code, modifier):
376    if modifier == 'euro':
377        if '.' not in code:
378            return code + '.ISO8859-15'
379        _, _, encoding = code.partition('.')
380        if encoding in ('ISO8859-15', 'UTF-8'):
381            return code
382        if encoding == 'ISO8859-1':
383            return _replace_encoding(code, 'ISO8859-15')
384    return code + '@' + modifier
385
386def normalize(localename):
387
388    """ Returns a normalized locale code for the given locale
389        name.
390
391        The returned locale code is formatted for use with
392        setlocale().
393
394        If normalization fails, the original name is returned
395        unchanged.
396
397        If the given encoding is not known, the function defaults to
398        the default encoding for the locale code just like setlocale()
399        does.
400
401    """
402    # Normalize the locale name and extract the encoding and modifier
403    code = localename.lower()
404    if ':' in code:
405        # ':' is sometimes used as encoding delimiter.
406        code = code.replace(':', '.')
407    if '@' in code:
408        code, modifier = code.split('@', 1)
409    else:
410        modifier = ''
411    if '.' in code:
412        langname, encoding = code.split('.')[:2]
413    else:
414        langname = code
415        encoding = ''
416
417    # First lookup: fullname (possibly with encoding and modifier)
418    lang_enc = langname
419    if encoding:
420        norm_encoding = encoding.replace('-', '')
421        norm_encoding = norm_encoding.replace('_', '')
422        lang_enc += '.' + norm_encoding
423    lookup_name = lang_enc
424    if modifier:
425        lookup_name += '@' + modifier
426    code = locale_alias.get(lookup_name, None)
427    if code is not None:
428        return code
429    #print('first lookup failed')
430
431    if modifier:
432        # Second try: fullname without modifier (possibly with encoding)
433        code = locale_alias.get(lang_enc, None)
434        if code is not None:
435            #print('lookup without modifier succeeded')
436            if '@' not in code:
437                return _append_modifier(code, modifier)
438            if code.split('@', 1)[1].lower() == modifier:
439                return code
440        #print('second lookup failed')
441
442    if encoding:
443        # Third try: langname (without encoding, possibly with modifier)
444        lookup_name = langname
445        if modifier:
446            lookup_name += '@' + modifier
447        code = locale_alias.get(lookup_name, None)
448        if code is not None:
449            #print('lookup without encoding succeeded')
450            if '@' not in code:
451                return _replace_encoding(code, encoding)
452            code, modifier = code.split('@', 1)
453            return _replace_encoding(code, encoding) + '@' + modifier
454
455        if modifier:
456            # Fourth try: langname (without encoding and modifier)
457            code = locale_alias.get(langname, None)
458            if code is not None:
459                #print('lookup without modifier and encoding succeeded')
460                if '@' not in code:
461                    code = _replace_encoding(code, encoding)
462                    return _append_modifier(code, modifier)
463                code, defmod = code.split('@', 1)
464                if defmod.lower() == modifier:
465                    return _replace_encoding(code, encoding) + '@' + defmod
466
467    return localename
468
469def _parse_localename(localename):
470
471    """ Parses the locale code for localename and returns the
472        result as tuple (language code, encoding).
473
474        The localename is normalized and passed through the locale
475        alias engine. A ValueError is raised in case the locale name
476        cannot be parsed.
477
478        The language code corresponds to RFC 1766.  code and encoding
479        can be None in case the values cannot be determined or are
480        unknown to this implementation.
481
482    """
483    code = normalize(localename)
484    if '@' in code:
485        # Deal with locale modifiers
486        code, modifier = code.split('@', 1)
487        if modifier == 'euro' and '.' not in code:
488            # Assume Latin-9 for @euro locales. This is bogus,
489            # since some systems may use other encodings for these
490            # locales. Also, we ignore other modifiers.
491            return code, 'iso-8859-15'
492
493    if '.' in code:
494        return tuple(code.split('.')[:2])
495    elif code == 'C':
496        return None, None
497    elif code == 'UTF-8':
498        # On macOS "LC_CTYPE=UTF-8" is a valid locale setting
499        # for getting UTF-8 handling for text.
500        return None, 'UTF-8'
501    raise ValueError('unknown locale: %s' % localename)
502
503def _build_localename(localetuple):
504
505    """ Builds a locale code from the given tuple (language code,
506        encoding).
507
508        No aliasing or normalizing takes place.
509
510    """
511    try:
512        language, encoding = localetuple
513
514        if language is None:
515            language = 'C'
516        if encoding is None:
517            return language
518        else:
519            return language + '.' + encoding
520    except (TypeError, ValueError):
521        raise TypeError('Locale must be None, a string, or an iterable of '
522                        'two strings -- language code, encoding.') from None
523
524def getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')):
525
526    """ Tries to determine the default locale settings and returns
527        them as tuple (language code, encoding).
528
529        According to POSIX, a program which has not called
530        setlocale(LC_ALL, "") runs using the portable 'C' locale.
531        Calling setlocale(LC_ALL, "") lets it use the default locale as
532        defined by the LANG variable. Since we don't want to interfere
533        with the current locale setting we thus emulate the behavior
534        in the way described above.
535
536        To maintain compatibility with other platforms, not only the
537        LANG variable is tested, but a list of variables given as
538        envvars parameter. The first found to be defined will be
539        used. envvars defaults to the search path used in GNU gettext;
540        it must always contain the variable name 'LANG'.
541
542        Except for the code 'C', the language code corresponds to RFC
543        1766.  code and encoding can be None in case the values cannot
544        be determined.
545
546    """
547
548    try:
549        # check if it's supported by the _locale module
550        import _locale
551        code, encoding = _locale._getdefaultlocale()
552    except (ImportError, AttributeError):
553        pass
554    else:
555        # make sure the code/encoding values are valid
556        if sys.platform == "win32" and code and code[:2] == "0x":
557            # map windows language identifier to language name
558            code = windows_locale.get(int(code, 0))
559        # ...add other platform-specific processing here, if
560        # necessary...
561        return code, encoding
562
563    # fall back on POSIX behaviour
564    import os
565    lookup = os.environ.get
566    for variable in envvars:
567        localename = lookup(variable,None)
568        if localename:
569            if variable == 'LANGUAGE':
570                localename = localename.split(':')[0]
571            break
572    else:
573        localename = 'C'
574    return _parse_localename(localename)
575
576
577def getlocale(category=LC_CTYPE):
578
579    """ Returns the current setting for the given locale category as
580        tuple (language code, encoding).
581
582        category may be one of the LC_* value except LC_ALL. It
583        defaults to LC_CTYPE.
584
585        Except for the code 'C', the language code corresponds to RFC
586        1766.  code and encoding can be None in case the values cannot
587        be determined.
588
589    """
590    localename = _setlocale(category)
591    if category == LC_ALL and ';' in localename:
592        raise TypeError('category LC_ALL is not supported')
593    return _parse_localename(localename)
594
595def setlocale(category, locale=None):
596
597    """ Set the locale for the given category.  The locale can be
598        a string, an iterable of two strings (language code and encoding),
599        or None.
600
601        Iterables are converted to strings using the locale aliasing
602        engine.  Locale strings are passed directly to the C lib.
603
604        category may be given as one of the LC_* values.
605
606    """
607    if locale and not isinstance(locale, _builtin_str):
608        # convert to string
609        locale = normalize(_build_localename(locale))
610    return _setlocale(category, locale)
611
612def resetlocale(category=LC_ALL):
613
614    """ Sets the locale for category to the default setting.
615
616        The default setting is determined by calling
617        getdefaultlocale(). category defaults to LC_ALL.
618
619    """
620    _setlocale(category, _build_localename(getdefaultlocale()))
621
622if sys.platform.startswith("win"):
623    # On Win32, this will return the ANSI code page
624    def getpreferredencoding(do_setlocale = True):
625        """Return the charset that the user is likely using."""
626        if sys.flags.utf8_mode:
627            return 'UTF-8'
628        import _bootlocale
629        return _bootlocale.getpreferredencoding(False)
630else:
631    # On Unix, if CODESET is available, use that.
632    try:
633        CODESET
634    except NameError:
635        if hasattr(sys, 'getandroidapilevel'):
636            # On Android langinfo.h and CODESET are missing, and UTF-8 is
637            # always used in mbstowcs() and wcstombs().
638            def getpreferredencoding(do_setlocale = True):
639                return 'UTF-8'
640        else:
641            # Fall back to parsing environment variables :-(
642            def getpreferredencoding(do_setlocale = True):
643                """Return the charset that the user is likely using,
644                by looking at environment variables."""
645                if sys.flags.utf8_mode:
646                    return 'UTF-8'
647                res = getdefaultlocale()[1]
648                if res is None:
649                    # LANG not set, default conservatively to ASCII
650                    res = 'ascii'
651                return res
652    else:
653        def getpreferredencoding(do_setlocale = True):
654            """Return the charset that the user is likely using,
655            according to the system configuration."""
656            if sys.flags.utf8_mode:
657                return 'UTF-8'
658            import _bootlocale
659            if do_setlocale:
660                oldloc = setlocale(LC_CTYPE)
661                try:
662                    setlocale(LC_CTYPE, "")
663                except Error:
664                    pass
665            result = _bootlocale.getpreferredencoding(False)
666            if do_setlocale:
667                setlocale(LC_CTYPE, oldloc)
668            return result
669
670
671### Database
672#
673# The following data was extracted from the locale.alias file which
674# comes with X11 and then hand edited removing the explicit encoding
675# definitions and adding some more aliases. The file is usually
676# available as /usr/lib/X11/locale/locale.alias.
677#
678
679#
680# The local_encoding_alias table maps lowercase encoding alias names
681# to C locale encoding names (case-sensitive). Note that normalize()
682# first looks up the encoding in the encodings.aliases dictionary and
683# then applies this mapping to find the correct C lib name for the
684# encoding.
685#
686locale_encoding_alias = {
687
688    # Mappings for non-standard encoding names used in locale names
689    '437':                          'C',
690    'c':                            'C',
691    'en':                           'ISO8859-1',
692    'jis':                          'JIS7',
693    'jis7':                         'JIS7',
694    'ajec':                         'eucJP',
695    'koi8c':                        'KOI8-C',
696    'microsoftcp1251':              'CP1251',
697    'microsoftcp1255':              'CP1255',
698    'microsoftcp1256':              'CP1256',
699    '88591':                        'ISO8859-1',
700    '88592':                        'ISO8859-2',
701    '88595':                        'ISO8859-5',
702    '885915':                       'ISO8859-15',
703
704    # Mappings from Python codec names to C lib encoding names
705    'ascii':                        'ISO8859-1',
706    'latin_1':                      'ISO8859-1',
707    'iso8859_1':                    'ISO8859-1',
708    'iso8859_10':                   'ISO8859-10',
709    'iso8859_11':                   'ISO8859-11',
710    'iso8859_13':                   'ISO8859-13',
711    'iso8859_14':                   'ISO8859-14',
712    'iso8859_15':                   'ISO8859-15',
713    'iso8859_16':                   'ISO8859-16',
714    'iso8859_2':                    'ISO8859-2',
715    'iso8859_3':                    'ISO8859-3',
716    'iso8859_4':                    'ISO8859-4',
717    'iso8859_5':                    'ISO8859-5',
718    'iso8859_6':                    'ISO8859-6',
719    'iso8859_7':                    'ISO8859-7',
720    'iso8859_8':                    'ISO8859-8',
721    'iso8859_9':                    'ISO8859-9',
722    'iso2022_jp':                   'JIS7',
723    'shift_jis':                    'SJIS',
724    'tactis':                       'TACTIS',
725    'euc_jp':                       'eucJP',
726    'euc_kr':                       'eucKR',
727    'utf_8':                        'UTF-8',
728    'koi8_r':                       'KOI8-R',
729    'koi8_t':                       'KOI8-T',
730    'koi8_u':                       'KOI8-U',
731    'kz1048':                       'RK1048',
732    'cp1251':                       'CP1251',
733    'cp1255':                       'CP1255',
734    'cp1256':                       'CP1256',
735
736    # XXX This list is still incomplete. If you know more
737    # mappings, please file a bug report. Thanks.
738}
739
740for k, v in sorted(locale_encoding_alias.items()):
741    k = k.replace('_', '')
742    locale_encoding_alias.setdefault(k, v)
743
744#
745# The locale_alias table maps lowercase alias names to C locale names
746# (case-sensitive). Encodings are always separated from the locale
747# name using a dot ('.'); they should only be given in case the
748# language name is needed to interpret the given encoding alias
749# correctly (CJK codes often have this need).
750#
751# Note that the normalize() function which uses this tables
752# removes '_' and '-' characters from the encoding part of the
753# locale name before doing the lookup. This saves a lot of
754# space in the table.
755#
756# MAL 2004-12-10:
757# Updated alias mapping to most recent locale.alias file
758# from X.org distribution using makelocalealias.py.
759#
760# These are the differences compared to the old mapping (Python 2.4
761# and older):
762#
763#    updated 'bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
764#    updated 'bg_bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
765#    updated 'bulgarian' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
766#    updated 'cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
767#    updated 'cz_cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
768#    updated 'czech' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
769#    updated 'dutch' -> 'nl_BE.ISO8859-1' to 'nl_NL.ISO8859-1'
770#    updated 'et' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
771#    updated 'et_ee' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
772#    updated 'fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
773#    updated 'fi_fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
774#    updated 'iw' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
775#    updated 'iw_il' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
776#    updated 'japanese' -> 'ja_JP.SJIS' to 'ja_JP.eucJP'
777#    updated 'lt' -> 'lt_LT.ISO8859-4' to 'lt_LT.ISO8859-13'
778#    updated 'lv' -> 'lv_LV.ISO8859-4' to 'lv_LV.ISO8859-13'
779#    updated 'sl' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
780#    updated 'slovene' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
781#    updated 'th_th' -> 'th_TH.TACTIS' to 'th_TH.ISO8859-11'
782#    updated 'zh_cn' -> 'zh_CN.eucCN' to 'zh_CN.gb2312'
783#    updated 'zh_cn.big5' -> 'zh_TW.eucTW' to 'zh_TW.big5'
784#    updated 'zh_tw' -> 'zh_TW.eucTW' to 'zh_TW.big5'
785#
786# MAL 2008-05-30:
787# Updated alias mapping to most recent locale.alias file
788# from X.org distribution using makelocalealias.py.
789#
790# These are the differences compared to the old mapping (Python 2.5
791# and older):
792#
793#    updated 'cs_cs.iso88592' -> 'cs_CZ.ISO8859-2' to 'cs_CS.ISO8859-2'
794#    updated 'serbocroatian' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
795#    updated 'sh' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
796#    updated 'sh_hr.iso88592' -> 'sh_HR.ISO8859-2' to 'hr_HR.ISO8859-2'
797#    updated 'sh_sp' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
798#    updated 'sh_yu' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
799#    updated 'sp' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
800#    updated 'sp_yu' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
801#    updated 'sr' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
802#    updated 'sr@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
803#    updated 'sr_sp' -> 'sr_SP.ISO8859-2' to 'sr_CS.ISO8859-2'
804#    updated 'sr_yu' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
805#    updated 'sr_yu.cp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251'
806#    updated 'sr_yu.iso88592' -> 'sr_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
807#    updated 'sr_yu.iso88595' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
808#    updated 'sr_yu.iso88595@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
809#    updated 'sr_yu.microsoftcp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251'
810#    updated 'sr_yu.utf8@cyrillic' -> 'sr_YU.UTF-8' to 'sr_CS.UTF-8'
811#    updated 'sr_yu@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
812#
813# AP 2010-04-12:
814# Updated alias mapping to most recent locale.alias file
815# from X.org distribution using makelocalealias.py.
816#
817# These are the differences compared to the old mapping (Python 2.6.5
818# and older):
819#
820#    updated 'ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8'
821#    updated 'ru_ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8'
822#    updated 'serbocroatian' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
823#    updated 'sh' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
824#    updated 'sh_yu' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
825#    updated 'sr' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
826#    updated 'sr@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
827#    updated 'sr@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
828#    updated 'sr_cs.utf8@latn' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8@latin'
829#    updated 'sr_cs@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
830#    updated 'sr_yu' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8@latin'
831#    updated 'sr_yu.utf8@cyrillic' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8'
832#    updated 'sr_yu@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
833#
834# SS 2013-12-20:
835# Updated alias mapping to most recent locale.alias file
836# from X.org distribution using makelocalealias.py.
837#
838# These are the differences compared to the old mapping (Python 3.3.3
839# and older):
840#
841#    updated 'a3' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
842#    updated 'a3_az' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
843#    updated 'a3_az.koi8c' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
844#    updated 'cs_cs.iso88592' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
845#    updated 'hebrew' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
846#    updated 'hebrew.iso88598' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
847#    updated 'sd' -> 'sd_IN@devanagari.UTF-8' to 'sd_IN.UTF-8'
848#    updated 'sr@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
849#    updated 'sr_cs' -> 'sr_RS.UTF-8' to 'sr_CS.UTF-8'
850#    updated 'sr_cs.utf8@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
851#    updated 'sr_cs@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
852#
853# SS 2014-10-01:
854# Updated alias mapping with glibc 2.19 supported locales.
855#
856# SS 2018-05-05:
857# Updated alias mapping with glibc 2.27 supported locales.
858#
859# These are the differences compared to the old mapping (Python 3.6.5
860# and older):
861#
862#    updated 'ca_es@valencia' -> 'ca_ES.ISO8859-15@valencia' to 'ca_ES.UTF-8@valencia'
863#    updated 'kk_kz' -> 'kk_KZ.RK1048' to 'kk_KZ.ptcp154'
864#    updated 'russian' -> 'ru_RU.ISO8859-5' to 'ru_RU.KOI8-R'
865
866locale_alias = {
867    'a3':                                   'az_AZ.KOI8-C',
868    'a3_az':                                'az_AZ.KOI8-C',
869    'a3_az.koic':                           'az_AZ.KOI8-C',
870    'aa_dj':                                'aa_DJ.ISO8859-1',
871    'aa_er':                                'aa_ER.UTF-8',
872    'aa_et':                                'aa_ET.UTF-8',
873    'af':                                   'af_ZA.ISO8859-1',
874    'af_za':                                'af_ZA.ISO8859-1',
875    'agr_pe':                               'agr_PE.UTF-8',
876    'ak_gh':                                'ak_GH.UTF-8',
877    'am':                                   'am_ET.UTF-8',
878    'am_et':                                'am_ET.UTF-8',
879    'american':                             'en_US.ISO8859-1',
880    'an_es':                                'an_ES.ISO8859-15',
881    'anp_in':                               'anp_IN.UTF-8',
882    'ar':                                   'ar_AA.ISO8859-6',
883    'ar_aa':                                'ar_AA.ISO8859-6',
884    'ar_ae':                                'ar_AE.ISO8859-6',
885    'ar_bh':                                'ar_BH.ISO8859-6',
886    'ar_dz':                                'ar_DZ.ISO8859-6',
887    'ar_eg':                                'ar_EG.ISO8859-6',
888    'ar_in':                                'ar_IN.UTF-8',
889    'ar_iq':                                'ar_IQ.ISO8859-6',
890    'ar_jo':                                'ar_JO.ISO8859-6',
891    'ar_kw':                                'ar_KW.ISO8859-6',
892    'ar_lb':                                'ar_LB.ISO8859-6',
893    'ar_ly':                                'ar_LY.ISO8859-6',
894    'ar_ma':                                'ar_MA.ISO8859-6',
895    'ar_om':                                'ar_OM.ISO8859-6',
896    'ar_qa':                                'ar_QA.ISO8859-6',
897    'ar_sa':                                'ar_SA.ISO8859-6',
898    'ar_sd':                                'ar_SD.ISO8859-6',
899    'ar_ss':                                'ar_SS.UTF-8',
900    'ar_sy':                                'ar_SY.ISO8859-6',
901    'ar_tn':                                'ar_TN.ISO8859-6',
902    'ar_ye':                                'ar_YE.ISO8859-6',
903    'arabic':                               'ar_AA.ISO8859-6',
904    'as':                                   'as_IN.UTF-8',
905    'as_in':                                'as_IN.UTF-8',
906    'ast_es':                               'ast_ES.ISO8859-15',
907    'ayc_pe':                               'ayc_PE.UTF-8',
908    'az':                                   'az_AZ.ISO8859-9E',
909    'az_az':                                'az_AZ.ISO8859-9E',
910    'az_az.iso88599e':                      'az_AZ.ISO8859-9E',
911    'az_ir':                                'az_IR.UTF-8',
912    'be':                                   'be_BY.CP1251',
913    'be@latin':                             'be_BY.UTF-8@latin',
914    'be_bg.utf8':                           'bg_BG.UTF-8',
915    'be_by':                                'be_BY.CP1251',
916    'be_by@latin':                          'be_BY.UTF-8@latin',
917    'bem_zm':                               'bem_ZM.UTF-8',
918    'ber_dz':                               'ber_DZ.UTF-8',
919    'ber_ma':                               'ber_MA.UTF-8',
920    'bg':                                   'bg_BG.CP1251',
921    'bg_bg':                                'bg_BG.CP1251',
922    'bhb_in.utf8':                          'bhb_IN.UTF-8',
923    'bho_in':                               'bho_IN.UTF-8',
924    'bho_np':                               'bho_NP.UTF-8',
925    'bi_vu':                                'bi_VU.UTF-8',
926    'bn_bd':                                'bn_BD.UTF-8',
927    'bn_in':                                'bn_IN.UTF-8',
928    'bo_cn':                                'bo_CN.UTF-8',
929    'bo_in':                                'bo_IN.UTF-8',
930    'bokmal':                               'nb_NO.ISO8859-1',
931    'bokm\xe5l':                            'nb_NO.ISO8859-1',
932    'br':                                   'br_FR.ISO8859-1',
933    'br_fr':                                'br_FR.ISO8859-1',
934    'brx_in':                               'brx_IN.UTF-8',
935    'bs':                                   'bs_BA.ISO8859-2',
936    'bs_ba':                                'bs_BA.ISO8859-2',
937    'bulgarian':                            'bg_BG.CP1251',
938    'byn_er':                               'byn_ER.UTF-8',
939    'c':                                    'C',
940    'c-french':                             'fr_CA.ISO8859-1',
941    'c.ascii':                              'C',
942    'c.en':                                 'C',
943    'c.iso88591':                           'en_US.ISO8859-1',
944    'c.utf8':                               'en_US.UTF-8',
945    'c_c':                                  'C',
946    'c_c.c':                                'C',
947    'ca':                                   'ca_ES.ISO8859-1',
948    'ca_ad':                                'ca_AD.ISO8859-1',
949    'ca_es':                                'ca_ES.ISO8859-1',
950    'ca_es@valencia':                       'ca_ES.UTF-8@valencia',
951    'ca_fr':                                'ca_FR.ISO8859-1',
952    'ca_it':                                'ca_IT.ISO8859-1',
953    'catalan':                              'ca_ES.ISO8859-1',
954    'ce_ru':                                'ce_RU.UTF-8',
955    'cextend':                              'en_US.ISO8859-1',
956    'chinese-s':                            'zh_CN.eucCN',
957    'chinese-t':                            'zh_TW.eucTW',
958    'chr_us':                               'chr_US.UTF-8',
959    'ckb_iq':                               'ckb_IQ.UTF-8',
960    'cmn_tw':                               'cmn_TW.UTF-8',
961    'crh_ua':                               'crh_UA.UTF-8',
962    'croatian':                             'hr_HR.ISO8859-2',
963    'cs':                                   'cs_CZ.ISO8859-2',
964    'cs_cs':                                'cs_CZ.ISO8859-2',
965    'cs_cz':                                'cs_CZ.ISO8859-2',
966    'csb_pl':                               'csb_PL.UTF-8',
967    'cv_ru':                                'cv_RU.UTF-8',
968    'cy':                                   'cy_GB.ISO8859-1',
969    'cy_gb':                                'cy_GB.ISO8859-1',
970    'cz':                                   'cs_CZ.ISO8859-2',
971    'cz_cz':                                'cs_CZ.ISO8859-2',
972    'czech':                                'cs_CZ.ISO8859-2',
973    'da':                                   'da_DK.ISO8859-1',
974    'da_dk':                                'da_DK.ISO8859-1',
975    'danish':                               'da_DK.ISO8859-1',
976    'dansk':                                'da_DK.ISO8859-1',
977    'de':                                   'de_DE.ISO8859-1',
978    'de_at':                                'de_AT.ISO8859-1',
979    'de_be':                                'de_BE.ISO8859-1',
980    'de_ch':                                'de_CH.ISO8859-1',
981    'de_de':                                'de_DE.ISO8859-1',
982    'de_it':                                'de_IT.ISO8859-1',
983    'de_li.utf8':                           'de_LI.UTF-8',
984    'de_lu':                                'de_LU.ISO8859-1',
985    'deutsch':                              'de_DE.ISO8859-1',
986    'doi_in':                               'doi_IN.UTF-8',
987    'dutch':                                'nl_NL.ISO8859-1',
988    'dutch.iso88591':                       'nl_BE.ISO8859-1',
989    'dv_mv':                                'dv_MV.UTF-8',
990    'dz_bt':                                'dz_BT.UTF-8',
991    'ee':                                   'ee_EE.ISO8859-4',
992    'ee_ee':                                'ee_EE.ISO8859-4',
993    'eesti':                                'et_EE.ISO8859-1',
994    'el':                                   'el_GR.ISO8859-7',
995    'el_cy':                                'el_CY.ISO8859-7',
996    'el_gr':                                'el_GR.ISO8859-7',
997    'el_gr@euro':                           'el_GR.ISO8859-15',
998    'en':                                   'en_US.ISO8859-1',
999    'en_ag':                                'en_AG.UTF-8',
1000    'en_au':                                'en_AU.ISO8859-1',
1001    'en_be':                                'en_BE.ISO8859-1',
1002    'en_bw':                                'en_BW.ISO8859-1',
1003    'en_ca':                                'en_CA.ISO8859-1',
1004    'en_dk':                                'en_DK.ISO8859-1',
1005    'en_dl.utf8':                           'en_DL.UTF-8',
1006    'en_gb':                                'en_GB.ISO8859-1',
1007    'en_hk':                                'en_HK.ISO8859-1',
1008    'en_ie':                                'en_IE.ISO8859-1',
1009    'en_il':                                'en_IL.UTF-8',
1010    'en_in':                                'en_IN.ISO8859-1',
1011    'en_ng':                                'en_NG.UTF-8',
1012    'en_nz':                                'en_NZ.ISO8859-1',
1013    'en_ph':                                'en_PH.ISO8859-1',
1014    'en_sc.utf8':                           'en_SC.UTF-8',
1015    'en_sg':                                'en_SG.ISO8859-1',
1016    'en_uk':                                'en_GB.ISO8859-1',
1017    'en_us':                                'en_US.ISO8859-1',
1018    'en_us@euro@euro':                      'en_US.ISO8859-15',
1019    'en_za':                                'en_ZA.ISO8859-1',
1020    'en_zm':                                'en_ZM.UTF-8',
1021    'en_zw':                                'en_ZW.ISO8859-1',
1022    'en_zw.utf8':                           'en_ZS.UTF-8',
1023    'eng_gb':                               'en_GB.ISO8859-1',
1024    'english':                              'en_EN.ISO8859-1',
1025    'english.iso88591':                     'en_US.ISO8859-1',
1026    'english_uk':                           'en_GB.ISO8859-1',
1027    'english_united-states':                'en_US.ISO8859-1',
1028    'english_united-states.437':            'C',
1029    'english_us':                           'en_US.ISO8859-1',
1030    'eo':                                   'eo_XX.ISO8859-3',
1031    'eo.utf8':                              'eo.UTF-8',
1032    'eo_eo':                                'eo_EO.ISO8859-3',
1033    'eo_us.utf8':                           'eo_US.UTF-8',
1034    'eo_xx':                                'eo_XX.ISO8859-3',
1035    'es':                                   'es_ES.ISO8859-1',
1036    'es_ar':                                'es_AR.ISO8859-1',
1037    'es_bo':                                'es_BO.ISO8859-1',
1038    'es_cl':                                'es_CL.ISO8859-1',
1039    'es_co':                                'es_CO.ISO8859-1',
1040    'es_cr':                                'es_CR.ISO8859-1',
1041    'es_cu':                                'es_CU.UTF-8',
1042    'es_do':                                'es_DO.ISO8859-1',
1043    'es_ec':                                'es_EC.ISO8859-1',
1044    'es_es':                                'es_ES.ISO8859-1',
1045    'es_gt':                                'es_GT.ISO8859-1',
1046    'es_hn':                                'es_HN.ISO8859-1',
1047    'es_mx':                                'es_MX.ISO8859-1',
1048    'es_ni':                                'es_NI.ISO8859-1',
1049    'es_pa':                                'es_PA.ISO8859-1',
1050    'es_pe':                                'es_PE.ISO8859-1',
1051    'es_pr':                                'es_PR.ISO8859-1',
1052    'es_py':                                'es_PY.ISO8859-1',
1053    'es_sv':                                'es_SV.ISO8859-1',
1054    'es_us':                                'es_US.ISO8859-1',
1055    'es_uy':                                'es_UY.ISO8859-1',
1056    'es_ve':                                'es_VE.ISO8859-1',
1057    'estonian':                             'et_EE.ISO8859-1',
1058    'et':                                   'et_EE.ISO8859-15',
1059    'et_ee':                                'et_EE.ISO8859-15',
1060    'eu':                                   'eu_ES.ISO8859-1',
1061    'eu_es':                                'eu_ES.ISO8859-1',
1062    'eu_fr':                                'eu_FR.ISO8859-1',
1063    'fa':                                   'fa_IR.UTF-8',
1064    'fa_ir':                                'fa_IR.UTF-8',
1065    'fa_ir.isiri3342':                      'fa_IR.ISIRI-3342',
1066    'ff_sn':                                'ff_SN.UTF-8',
1067    'fi':                                   'fi_FI.ISO8859-15',
1068    'fi_fi':                                'fi_FI.ISO8859-15',
1069    'fil_ph':                               'fil_PH.UTF-8',
1070    'finnish':                              'fi_FI.ISO8859-1',
1071    'fo':                                   'fo_FO.ISO8859-1',
1072    'fo_fo':                                'fo_FO.ISO8859-1',
1073    'fr':                                   'fr_FR.ISO8859-1',
1074    'fr_be':                                'fr_BE.ISO8859-1',
1075    'fr_ca':                                'fr_CA.ISO8859-1',
1076    'fr_ch':                                'fr_CH.ISO8859-1',
1077    'fr_fr':                                'fr_FR.ISO8859-1',
1078    'fr_lu':                                'fr_LU.ISO8859-1',
1079    'fran\xe7ais':                          'fr_FR.ISO8859-1',
1080    'fre_fr':                               'fr_FR.ISO8859-1',
1081    'french':                               'fr_FR.ISO8859-1',
1082    'french.iso88591':                      'fr_CH.ISO8859-1',
1083    'french_france':                        'fr_FR.ISO8859-1',
1084    'fur_it':                               'fur_IT.UTF-8',
1085    'fy_de':                                'fy_DE.UTF-8',
1086    'fy_nl':                                'fy_NL.UTF-8',
1087    'ga':                                   'ga_IE.ISO8859-1',
1088    'ga_ie':                                'ga_IE.ISO8859-1',
1089    'galego':                               'gl_ES.ISO8859-1',
1090    'galician':                             'gl_ES.ISO8859-1',
1091    'gd':                                   'gd_GB.ISO8859-1',
1092    'gd_gb':                                'gd_GB.ISO8859-1',
1093    'ger_de':                               'de_DE.ISO8859-1',
1094    'german':                               'de_DE.ISO8859-1',
1095    'german.iso88591':                      'de_CH.ISO8859-1',
1096    'german_germany':                       'de_DE.ISO8859-1',
1097    'gez_er':                               'gez_ER.UTF-8',
1098    'gez_et':                               'gez_ET.UTF-8',
1099    'gl':                                   'gl_ES.ISO8859-1',
1100    'gl_es':                                'gl_ES.ISO8859-1',
1101    'greek':                                'el_GR.ISO8859-7',
1102    'gu_in':                                'gu_IN.UTF-8',
1103    'gv':                                   'gv_GB.ISO8859-1',
1104    'gv_gb':                                'gv_GB.ISO8859-1',
1105    'ha_ng':                                'ha_NG.UTF-8',
1106    'hak_tw':                               'hak_TW.UTF-8',
1107    'he':                                   'he_IL.ISO8859-8',
1108    'he_il':                                'he_IL.ISO8859-8',
1109    'hebrew':                               'he_IL.ISO8859-8',
1110    'hi':                                   'hi_IN.ISCII-DEV',
1111    'hi_in':                                'hi_IN.ISCII-DEV',
1112    'hi_in.isciidev':                       'hi_IN.ISCII-DEV',
1113    'hif_fj':                               'hif_FJ.UTF-8',
1114    'hne':                                  'hne_IN.UTF-8',
1115    'hne_in':                               'hne_IN.UTF-8',
1116    'hr':                                   'hr_HR.ISO8859-2',
1117    'hr_hr':                                'hr_HR.ISO8859-2',
1118    'hrvatski':                             'hr_HR.ISO8859-2',
1119    'hsb_de':                               'hsb_DE.ISO8859-2',
1120    'ht_ht':                                'ht_HT.UTF-8',
1121    'hu':                                   'hu_HU.ISO8859-2',
1122    'hu_hu':                                'hu_HU.ISO8859-2',
1123    'hungarian':                            'hu_HU.ISO8859-2',
1124    'hy_am':                                'hy_AM.UTF-8',
1125    'hy_am.armscii8':                       'hy_AM.ARMSCII_8',
1126    'ia':                                   'ia.UTF-8',
1127    'ia_fr':                                'ia_FR.UTF-8',
1128    'icelandic':                            'is_IS.ISO8859-1',
1129    'id':                                   'id_ID.ISO8859-1',
1130    'id_id':                                'id_ID.ISO8859-1',
1131    'ig_ng':                                'ig_NG.UTF-8',
1132    'ik_ca':                                'ik_CA.UTF-8',
1133    'in':                                   'id_ID.ISO8859-1',
1134    'in_id':                                'id_ID.ISO8859-1',
1135    'is':                                   'is_IS.ISO8859-1',
1136    'is_is':                                'is_IS.ISO8859-1',
1137    'iso-8859-1':                           'en_US.ISO8859-1',
1138    'iso-8859-15':                          'en_US.ISO8859-15',
1139    'iso8859-1':                            'en_US.ISO8859-1',
1140    'iso8859-15':                           'en_US.ISO8859-15',
1141    'iso_8859_1':                           'en_US.ISO8859-1',
1142    'iso_8859_15':                          'en_US.ISO8859-15',
1143    'it':                                   'it_IT.ISO8859-1',
1144    'it_ch':                                'it_CH.ISO8859-1',
1145    'it_it':                                'it_IT.ISO8859-1',
1146    'italian':                              'it_IT.ISO8859-1',
1147    'iu':                                   'iu_CA.NUNACOM-8',
1148    'iu_ca':                                'iu_CA.NUNACOM-8',
1149    'iu_ca.nunacom8':                       'iu_CA.NUNACOM-8',
1150    'iw':                                   'he_IL.ISO8859-8',
1151    'iw_il':                                'he_IL.ISO8859-8',
1152    'iw_il.utf8':                           'iw_IL.UTF-8',
1153    'ja':                                   'ja_JP.eucJP',
1154    'ja_jp':                                'ja_JP.eucJP',
1155    'ja_jp.euc':                            'ja_JP.eucJP',
1156    'ja_jp.mscode':                         'ja_JP.SJIS',
1157    'ja_jp.pck':                            'ja_JP.SJIS',
1158    'japan':                                'ja_JP.eucJP',
1159    'japanese':                             'ja_JP.eucJP',
1160    'japanese-euc':                         'ja_JP.eucJP',
1161    'japanese.euc':                         'ja_JP.eucJP',
1162    'jp_jp':                                'ja_JP.eucJP',
1163    'ka':                                   'ka_GE.GEORGIAN-ACADEMY',
1164    'ka_ge':                                'ka_GE.GEORGIAN-ACADEMY',
1165    'ka_ge.georgianacademy':                'ka_GE.GEORGIAN-ACADEMY',
1166    'ka_ge.georgianps':                     'ka_GE.GEORGIAN-PS',
1167    'ka_ge.georgianrs':                     'ka_GE.GEORGIAN-ACADEMY',
1168    'kab_dz':                               'kab_DZ.UTF-8',
1169    'kk_kz':                                'kk_KZ.ptcp154',
1170    'kl':                                   'kl_GL.ISO8859-1',
1171    'kl_gl':                                'kl_GL.ISO8859-1',
1172    'km_kh':                                'km_KH.UTF-8',
1173    'kn':                                   'kn_IN.UTF-8',
1174    'kn_in':                                'kn_IN.UTF-8',
1175    'ko':                                   'ko_KR.eucKR',
1176    'ko_kr':                                'ko_KR.eucKR',
1177    'ko_kr.euc':                            'ko_KR.eucKR',
1178    'kok_in':                               'kok_IN.UTF-8',
1179    'korean':                               'ko_KR.eucKR',
1180    'korean.euc':                           'ko_KR.eucKR',
1181    'ks':                                   'ks_IN.UTF-8',
1182    'ks_in':                                'ks_IN.UTF-8',
1183    'ks_in@devanagari.utf8':                'ks_IN.UTF-8@devanagari',
1184    'ku_tr':                                'ku_TR.ISO8859-9',
1185    'kw':                                   'kw_GB.ISO8859-1',
1186    'kw_gb':                                'kw_GB.ISO8859-1',
1187    'ky':                                   'ky_KG.UTF-8',
1188    'ky_kg':                                'ky_KG.UTF-8',
1189    'lb_lu':                                'lb_LU.UTF-8',
1190    'lg_ug':                                'lg_UG.ISO8859-10',
1191    'li_be':                                'li_BE.UTF-8',
1192    'li_nl':                                'li_NL.UTF-8',
1193    'lij_it':                               'lij_IT.UTF-8',
1194    'lithuanian':                           'lt_LT.ISO8859-13',
1195    'ln_cd':                                'ln_CD.UTF-8',
1196    'lo':                                   'lo_LA.MULELAO-1',
1197    'lo_la':                                'lo_LA.MULELAO-1',
1198    'lo_la.cp1133':                         'lo_LA.IBM-CP1133',
1199    'lo_la.ibmcp1133':                      'lo_LA.IBM-CP1133',
1200    'lo_la.mulelao1':                       'lo_LA.MULELAO-1',
1201    'lt':                                   'lt_LT.ISO8859-13',
1202    'lt_lt':                                'lt_LT.ISO8859-13',
1203    'lv':                                   'lv_LV.ISO8859-13',
1204    'lv_lv':                                'lv_LV.ISO8859-13',
1205    'lzh_tw':                               'lzh_TW.UTF-8',
1206    'mag_in':                               'mag_IN.UTF-8',
1207    'mai':                                  'mai_IN.UTF-8',
1208    'mai_in':                               'mai_IN.UTF-8',
1209    'mai_np':                               'mai_NP.UTF-8',
1210    'mfe_mu':                               'mfe_MU.UTF-8',
1211    'mg_mg':                                'mg_MG.ISO8859-15',
1212    'mhr_ru':                               'mhr_RU.UTF-8',
1213    'mi':                                   'mi_NZ.ISO8859-1',
1214    'mi_nz':                                'mi_NZ.ISO8859-1',
1215    'miq_ni':                               'miq_NI.UTF-8',
1216    'mjw_in':                               'mjw_IN.UTF-8',
1217    'mk':                                   'mk_MK.ISO8859-5',
1218    'mk_mk':                                'mk_MK.ISO8859-5',
1219    'ml':                                   'ml_IN.UTF-8',
1220    'ml_in':                                'ml_IN.UTF-8',
1221    'mn_mn':                                'mn_MN.UTF-8',
1222    'mni_in':                               'mni_IN.UTF-8',
1223    'mr':                                   'mr_IN.UTF-8',
1224    'mr_in':                                'mr_IN.UTF-8',
1225    'ms':                                   'ms_MY.ISO8859-1',
1226    'ms_my':                                'ms_MY.ISO8859-1',
1227    'mt':                                   'mt_MT.ISO8859-3',
1228    'mt_mt':                                'mt_MT.ISO8859-3',
1229    'my_mm':                                'my_MM.UTF-8',
1230    'nan_tw':                               'nan_TW.UTF-8',
1231    'nb':                                   'nb_NO.ISO8859-1',
1232    'nb_no':                                'nb_NO.ISO8859-1',
1233    'nds_de':                               'nds_DE.UTF-8',
1234    'nds_nl':                               'nds_NL.UTF-8',
1235    'ne_np':                                'ne_NP.UTF-8',
1236    'nhn_mx':                               'nhn_MX.UTF-8',
1237    'niu_nu':                               'niu_NU.UTF-8',
1238    'niu_nz':                               'niu_NZ.UTF-8',
1239    'nl':                                   'nl_NL.ISO8859-1',
1240    'nl_aw':                                'nl_AW.UTF-8',
1241    'nl_be':                                'nl_BE.ISO8859-1',
1242    'nl_nl':                                'nl_NL.ISO8859-1',
1243    'nn':                                   'nn_NO.ISO8859-1',
1244    'nn_no':                                'nn_NO.ISO8859-1',
1245    'no':                                   'no_NO.ISO8859-1',
1246    'no@nynorsk':                           'ny_NO.ISO8859-1',
1247    'no_no':                                'no_NO.ISO8859-1',
1248    'no_no.iso88591@bokmal':                'no_NO.ISO8859-1',
1249    'no_no.iso88591@nynorsk':               'no_NO.ISO8859-1',
1250    'norwegian':                            'no_NO.ISO8859-1',
1251    'nr':                                   'nr_ZA.ISO8859-1',
1252    'nr_za':                                'nr_ZA.ISO8859-1',
1253    'nso':                                  'nso_ZA.ISO8859-15',
1254    'nso_za':                               'nso_ZA.ISO8859-15',
1255    'ny':                                   'ny_NO.ISO8859-1',
1256    'ny_no':                                'ny_NO.ISO8859-1',
1257    'nynorsk':                              'nn_NO.ISO8859-1',
1258    'oc':                                   'oc_FR.ISO8859-1',
1259    'oc_fr':                                'oc_FR.ISO8859-1',
1260    'om_et':                                'om_ET.UTF-8',
1261    'om_ke':                                'om_KE.ISO8859-1',
1262    'or':                                   'or_IN.UTF-8',
1263    'or_in':                                'or_IN.UTF-8',
1264    'os_ru':                                'os_RU.UTF-8',
1265    'pa':                                   'pa_IN.UTF-8',
1266    'pa_in':                                'pa_IN.UTF-8',
1267    'pa_pk':                                'pa_PK.UTF-8',
1268    'pap_an':                               'pap_AN.UTF-8',
1269    'pap_aw':                               'pap_AW.UTF-8',
1270    'pap_cw':                               'pap_CW.UTF-8',
1271    'pd':                                   'pd_US.ISO8859-1',
1272    'pd_de':                                'pd_DE.ISO8859-1',
1273    'pd_us':                                'pd_US.ISO8859-1',
1274    'ph':                                   'ph_PH.ISO8859-1',
1275    'ph_ph':                                'ph_PH.ISO8859-1',
1276    'pl':                                   'pl_PL.ISO8859-2',
1277    'pl_pl':                                'pl_PL.ISO8859-2',
1278    'polish':                               'pl_PL.ISO8859-2',
1279    'portuguese':                           'pt_PT.ISO8859-1',
1280    'portuguese_brazil':                    'pt_BR.ISO8859-1',
1281    'posix':                                'C',
1282    'posix-utf2':                           'C',
1283    'pp':                                   'pp_AN.ISO8859-1',
1284    'pp_an':                                'pp_AN.ISO8859-1',
1285    'ps_af':                                'ps_AF.UTF-8',
1286    'pt':                                   'pt_PT.ISO8859-1',
1287    'pt_br':                                'pt_BR.ISO8859-1',
1288    'pt_pt':                                'pt_PT.ISO8859-1',
1289    'quz_pe':                               'quz_PE.UTF-8',
1290    'raj_in':                               'raj_IN.UTF-8',
1291    'ro':                                   'ro_RO.ISO8859-2',
1292    'ro_ro':                                'ro_RO.ISO8859-2',
1293    'romanian':                             'ro_RO.ISO8859-2',
1294    'ru':                                   'ru_RU.UTF-8',
1295    'ru_ru':                                'ru_RU.UTF-8',
1296    'ru_ua':                                'ru_UA.KOI8-U',
1297    'rumanian':                             'ro_RO.ISO8859-2',
1298    'russian':                              'ru_RU.KOI8-R',
1299    'rw':                                   'rw_RW.ISO8859-1',
1300    'rw_rw':                                'rw_RW.ISO8859-1',
1301    'sa_in':                                'sa_IN.UTF-8',
1302    'sat_in':                               'sat_IN.UTF-8',
1303    'sc_it':                                'sc_IT.UTF-8',
1304    'sd':                                   'sd_IN.UTF-8',
1305    'sd_in':                                'sd_IN.UTF-8',
1306    'sd_in@devanagari.utf8':                'sd_IN.UTF-8@devanagari',
1307    'sd_pk':                                'sd_PK.UTF-8',
1308    'se_no':                                'se_NO.UTF-8',
1309    'serbocroatian':                        'sr_RS.UTF-8@latin',
1310    'sgs_lt':                               'sgs_LT.UTF-8',
1311    'sh':                                   'sr_RS.UTF-8@latin',
1312    'sh_ba.iso88592@bosnia':                'sr_CS.ISO8859-2',
1313    'sh_hr':                                'sh_HR.ISO8859-2',
1314    'sh_hr.iso88592':                       'hr_HR.ISO8859-2',
1315    'sh_sp':                                'sr_CS.ISO8859-2',
1316    'sh_yu':                                'sr_RS.UTF-8@latin',
1317    'shn_mm':                               'shn_MM.UTF-8',
1318    'shs_ca':                               'shs_CA.UTF-8',
1319    'si':                                   'si_LK.UTF-8',
1320    'si_lk':                                'si_LK.UTF-8',
1321    'sid_et':                               'sid_ET.UTF-8',
1322    'sinhala':                              'si_LK.UTF-8',
1323    'sk':                                   'sk_SK.ISO8859-2',
1324    'sk_sk':                                'sk_SK.ISO8859-2',
1325    'sl':                                   'sl_SI.ISO8859-2',
1326    'sl_cs':                                'sl_CS.ISO8859-2',
1327    'sl_si':                                'sl_SI.ISO8859-2',
1328    'slovak':                               'sk_SK.ISO8859-2',
1329    'slovene':                              'sl_SI.ISO8859-2',
1330    'slovenian':                            'sl_SI.ISO8859-2',
1331    'sm_ws':                                'sm_WS.UTF-8',
1332    'so_dj':                                'so_DJ.ISO8859-1',
1333    'so_et':                                'so_ET.UTF-8',
1334    'so_ke':                                'so_KE.ISO8859-1',
1335    'so_so':                                'so_SO.ISO8859-1',
1336    'sp':                                   'sr_CS.ISO8859-5',
1337    'sp_yu':                                'sr_CS.ISO8859-5',
1338    'spanish':                              'es_ES.ISO8859-1',
1339    'spanish_spain':                        'es_ES.ISO8859-1',
1340    'sq':                                   'sq_AL.ISO8859-2',
1341    'sq_al':                                'sq_AL.ISO8859-2',
1342    'sq_mk':                                'sq_MK.UTF-8',
1343    'sr':                                   'sr_RS.UTF-8',
1344    'sr@cyrillic':                          'sr_RS.UTF-8',
1345    'sr@latn':                              'sr_CS.UTF-8@latin',
1346    'sr_cs':                                'sr_CS.UTF-8',
1347    'sr_cs.iso88592@latn':                  'sr_CS.ISO8859-2',
1348    'sr_cs@latn':                           'sr_CS.UTF-8@latin',
1349    'sr_me':                                'sr_ME.UTF-8',
1350    'sr_rs':                                'sr_RS.UTF-8',
1351    'sr_rs@latn':                           'sr_RS.UTF-8@latin',
1352    'sr_sp':                                'sr_CS.ISO8859-2',
1353    'sr_yu':                                'sr_RS.UTF-8@latin',
1354    'sr_yu.cp1251@cyrillic':                'sr_CS.CP1251',
1355    'sr_yu.iso88592':                       'sr_CS.ISO8859-2',
1356    'sr_yu.iso88595':                       'sr_CS.ISO8859-5',
1357    'sr_yu.iso88595@cyrillic':              'sr_CS.ISO8859-5',
1358    'sr_yu.microsoftcp1251@cyrillic':       'sr_CS.CP1251',
1359    'sr_yu.utf8':                           'sr_RS.UTF-8',
1360    'sr_yu.utf8@cyrillic':                  'sr_RS.UTF-8',
1361    'sr_yu@cyrillic':                       'sr_RS.UTF-8',
1362    'ss':                                   'ss_ZA.ISO8859-1',
1363    'ss_za':                                'ss_ZA.ISO8859-1',
1364    'st':                                   'st_ZA.ISO8859-1',
1365    'st_za':                                'st_ZA.ISO8859-1',
1366    'sv':                                   'sv_SE.ISO8859-1',
1367    'sv_fi':                                'sv_FI.ISO8859-1',
1368    'sv_se':                                'sv_SE.ISO8859-1',
1369    'sw_ke':                                'sw_KE.UTF-8',
1370    'sw_tz':                                'sw_TZ.UTF-8',
1371    'swedish':                              'sv_SE.ISO8859-1',
1372    'szl_pl':                               'szl_PL.UTF-8',
1373    'ta':                                   'ta_IN.TSCII-0',
1374    'ta_in':                                'ta_IN.TSCII-0',
1375    'ta_in.tscii':                          'ta_IN.TSCII-0',
1376    'ta_in.tscii0':                         'ta_IN.TSCII-0',
1377    'ta_lk':                                'ta_LK.UTF-8',
1378    'tcy_in.utf8':                          'tcy_IN.UTF-8',
1379    'te':                                   'te_IN.UTF-8',
1380    'te_in':                                'te_IN.UTF-8',
1381    'tg':                                   'tg_TJ.KOI8-C',
1382    'tg_tj':                                'tg_TJ.KOI8-C',
1383    'th':                                   'th_TH.ISO8859-11',
1384    'th_th':                                'th_TH.ISO8859-11',
1385    'th_th.tactis':                         'th_TH.TIS620',
1386    'th_th.tis620':                         'th_TH.TIS620',
1387    'thai':                                 'th_TH.ISO8859-11',
1388    'the_np':                               'the_NP.UTF-8',
1389    'ti_er':                                'ti_ER.UTF-8',
1390    'ti_et':                                'ti_ET.UTF-8',
1391    'tig_er':                               'tig_ER.UTF-8',
1392    'tk_tm':                                'tk_TM.UTF-8',
1393    'tl':                                   'tl_PH.ISO8859-1',
1394    'tl_ph':                                'tl_PH.ISO8859-1',
1395    'tn':                                   'tn_ZA.ISO8859-15',
1396    'tn_za':                                'tn_ZA.ISO8859-15',
1397    'to_to':                                'to_TO.UTF-8',
1398    'tpi_pg':                               'tpi_PG.UTF-8',
1399    'tr':                                   'tr_TR.ISO8859-9',
1400    'tr_cy':                                'tr_CY.ISO8859-9',
1401    'tr_tr':                                'tr_TR.ISO8859-9',
1402    'ts':                                   'ts_ZA.ISO8859-1',
1403    'ts_za':                                'ts_ZA.ISO8859-1',
1404    'tt':                                   'tt_RU.TATAR-CYR',
1405    'tt_ru':                                'tt_RU.TATAR-CYR',
1406    'tt_ru.tatarcyr':                       'tt_RU.TATAR-CYR',
1407    'tt_ru@iqtelif':                        'tt_RU.UTF-8@iqtelif',
1408    'turkish':                              'tr_TR.ISO8859-9',
1409    'ug_cn':                                'ug_CN.UTF-8',
1410    'uk':                                   'uk_UA.KOI8-U',
1411    'uk_ua':                                'uk_UA.KOI8-U',
1412    'univ':                                 'en_US.utf',
1413    'universal':                            'en_US.utf',
1414    'universal.utf8@ucs4':                  'en_US.UTF-8',
1415    'unm_us':                               'unm_US.UTF-8',
1416    'ur':                                   'ur_PK.CP1256',
1417    'ur_in':                                'ur_IN.UTF-8',
1418    'ur_pk':                                'ur_PK.CP1256',
1419    'uz':                                   'uz_UZ.UTF-8',
1420    'uz_uz':                                'uz_UZ.UTF-8',
1421    'uz_uz@cyrillic':                       'uz_UZ.UTF-8',
1422    've':                                   've_ZA.UTF-8',
1423    've_za':                                've_ZA.UTF-8',
1424    'vi':                                   'vi_VN.TCVN',
1425    'vi_vn':                                'vi_VN.TCVN',
1426    'vi_vn.tcvn':                           'vi_VN.TCVN',
1427    'vi_vn.tcvn5712':                       'vi_VN.TCVN',
1428    'vi_vn.viscii':                         'vi_VN.VISCII',
1429    'vi_vn.viscii111':                      'vi_VN.VISCII',
1430    'wa':                                   'wa_BE.ISO8859-1',
1431    'wa_be':                                'wa_BE.ISO8859-1',
1432    'wae_ch':                               'wae_CH.UTF-8',
1433    'wal_et':                               'wal_ET.UTF-8',
1434    'wo_sn':                                'wo_SN.UTF-8',
1435    'xh':                                   'xh_ZA.ISO8859-1',
1436    'xh_za':                                'xh_ZA.ISO8859-1',
1437    'yi':                                   'yi_US.CP1255',
1438    'yi_us':                                'yi_US.CP1255',
1439    'yo_ng':                                'yo_NG.UTF-8',
1440    'yue_hk':                               'yue_HK.UTF-8',
1441    'yuw_pg':                               'yuw_PG.UTF-8',
1442    'zh':                                   'zh_CN.eucCN',
1443    'zh_cn':                                'zh_CN.gb2312',
1444    'zh_cn.big5':                           'zh_TW.big5',
1445    'zh_cn.euc':                            'zh_CN.eucCN',
1446    'zh_hk':                                'zh_HK.big5hkscs',
1447    'zh_hk.big5hk':                         'zh_HK.big5hkscs',
1448    'zh_sg':                                'zh_SG.GB2312',
1449    'zh_sg.gbk':                            'zh_SG.GBK',
1450    'zh_tw':                                'zh_TW.big5',
1451    'zh_tw.euc':                            'zh_TW.eucTW',
1452    'zh_tw.euctw':                          'zh_TW.eucTW',
1453    'zu':                                   'zu_ZA.ISO8859-1',
1454    'zu_za':                                'zu_ZA.ISO8859-1',
1455}
1456
1457#
1458# This maps Windows language identifiers to locale strings.
1459#
1460# This list has been updated from
1461# http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp
1462# to include every locale up to Windows Vista.
1463#
1464# NOTE: this mapping is incomplete.  If your language is missing, please
1465# submit a bug report to the Python bug tracker at http://bugs.python.org/
1466# Make sure you include the missing language identifier and the suggested
1467# locale code.
1468#
1469
1470windows_locale = {
1471    0x0436: "af_ZA", # Afrikaans
1472    0x041c: "sq_AL", # Albanian
1473    0x0484: "gsw_FR",# Alsatian - France
1474    0x045e: "am_ET", # Amharic - Ethiopia
1475    0x0401: "ar_SA", # Arabic - Saudi Arabia
1476    0x0801: "ar_IQ", # Arabic - Iraq
1477    0x0c01: "ar_EG", # Arabic - Egypt
1478    0x1001: "ar_LY", # Arabic - Libya
1479    0x1401: "ar_DZ", # Arabic - Algeria
1480    0x1801: "ar_MA", # Arabic - Morocco
1481    0x1c01: "ar_TN", # Arabic - Tunisia
1482    0x2001: "ar_OM", # Arabic - Oman
1483    0x2401: "ar_YE", # Arabic - Yemen
1484    0x2801: "ar_SY", # Arabic - Syria
1485    0x2c01: "ar_JO", # Arabic - Jordan
1486    0x3001: "ar_LB", # Arabic - Lebanon
1487    0x3401: "ar_KW", # Arabic - Kuwait
1488    0x3801: "ar_AE", # Arabic - United Arab Emirates
1489    0x3c01: "ar_BH", # Arabic - Bahrain
1490    0x4001: "ar_QA", # Arabic - Qatar
1491    0x042b: "hy_AM", # Armenian
1492    0x044d: "as_IN", # Assamese - India
1493    0x042c: "az_AZ", # Azeri - Latin
1494    0x082c: "az_AZ", # Azeri - Cyrillic
1495    0x046d: "ba_RU", # Bashkir
1496    0x042d: "eu_ES", # Basque - Russia
1497    0x0423: "be_BY", # Belarusian
1498    0x0445: "bn_IN", # Begali
1499    0x201a: "bs_BA", # Bosnian - Cyrillic
1500    0x141a: "bs_BA", # Bosnian - Latin
1501    0x047e: "br_FR", # Breton - France
1502    0x0402: "bg_BG", # Bulgarian
1503#    0x0455: "my_MM", # Burmese - Not supported
1504    0x0403: "ca_ES", # Catalan
1505    0x0004: "zh_CHS",# Chinese - Simplified
1506    0x0404: "zh_TW", # Chinese - Taiwan
1507    0x0804: "zh_CN", # Chinese - PRC
1508    0x0c04: "zh_HK", # Chinese - Hong Kong S.A.R.
1509    0x1004: "zh_SG", # Chinese - Singapore
1510    0x1404: "zh_MO", # Chinese - Macao S.A.R.
1511    0x7c04: "zh_CHT",# Chinese - Traditional
1512    0x0483: "co_FR", # Corsican - France
1513    0x041a: "hr_HR", # Croatian
1514    0x101a: "hr_BA", # Croatian - Bosnia
1515    0x0405: "cs_CZ", # Czech
1516    0x0406: "da_DK", # Danish
1517    0x048c: "gbz_AF",# Dari - Afghanistan
1518    0x0465: "div_MV",# Divehi - Maldives
1519    0x0413: "nl_NL", # Dutch - The Netherlands
1520    0x0813: "nl_BE", # Dutch - Belgium
1521    0x0409: "en_US", # English - United States
1522    0x0809: "en_GB", # English - United Kingdom
1523    0x0c09: "en_AU", # English - Australia
1524    0x1009: "en_CA", # English - Canada
1525    0x1409: "en_NZ", # English - New Zealand
1526    0x1809: "en_IE", # English - Ireland
1527    0x1c09: "en_ZA", # English - South Africa
1528    0x2009: "en_JA", # English - Jamaica
1529    0x2409: "en_CB", # English - Caribbean
1530    0x2809: "en_BZ", # English - Belize
1531    0x2c09: "en_TT", # English - Trinidad
1532    0x3009: "en_ZW", # English - Zimbabwe
1533    0x3409: "en_PH", # English - Philippines
1534    0x4009: "en_IN", # English - India
1535    0x4409: "en_MY", # English - Malaysia
1536    0x4809: "en_IN", # English - Singapore
1537    0x0425: "et_EE", # Estonian
1538    0x0438: "fo_FO", # Faroese
1539    0x0464: "fil_PH",# Filipino
1540    0x040b: "fi_FI", # Finnish
1541    0x040c: "fr_FR", # French - France
1542    0x080c: "fr_BE", # French - Belgium
1543    0x0c0c: "fr_CA", # French - Canada
1544    0x100c: "fr_CH", # French - Switzerland
1545    0x140c: "fr_LU", # French - Luxembourg
1546    0x180c: "fr_MC", # French - Monaco
1547    0x0462: "fy_NL", # Frisian - Netherlands
1548    0x0456: "gl_ES", # Galician
1549    0x0437: "ka_GE", # Georgian
1550    0x0407: "de_DE", # German - Germany
1551    0x0807: "de_CH", # German - Switzerland
1552    0x0c07: "de_AT", # German - Austria
1553    0x1007: "de_LU", # German - Luxembourg
1554    0x1407: "de_LI", # German - Liechtenstein
1555    0x0408: "el_GR", # Greek
1556    0x046f: "kl_GL", # Greenlandic - Greenland
1557    0x0447: "gu_IN", # Gujarati
1558    0x0468: "ha_NG", # Hausa - Latin
1559    0x040d: "he_IL", # Hebrew
1560    0x0439: "hi_IN", # Hindi
1561    0x040e: "hu_HU", # Hungarian
1562    0x040f: "is_IS", # Icelandic
1563    0x0421: "id_ID", # Indonesian
1564    0x045d: "iu_CA", # Inuktitut - Syllabics
1565    0x085d: "iu_CA", # Inuktitut - Latin
1566    0x083c: "ga_IE", # Irish - Ireland
1567    0x0410: "it_IT", # Italian - Italy
1568    0x0810: "it_CH", # Italian - Switzerland
1569    0x0411: "ja_JP", # Japanese
1570    0x044b: "kn_IN", # Kannada - India
1571    0x043f: "kk_KZ", # Kazakh
1572    0x0453: "kh_KH", # Khmer - Cambodia
1573    0x0486: "qut_GT",# K'iche - Guatemala
1574    0x0487: "rw_RW", # Kinyarwanda - Rwanda
1575    0x0457: "kok_IN",# Konkani
1576    0x0412: "ko_KR", # Korean
1577    0x0440: "ky_KG", # Kyrgyz
1578    0x0454: "lo_LA", # Lao - Lao PDR
1579    0x0426: "lv_LV", # Latvian
1580    0x0427: "lt_LT", # Lithuanian
1581    0x082e: "dsb_DE",# Lower Sorbian - Germany
1582    0x046e: "lb_LU", # Luxembourgish
1583    0x042f: "mk_MK", # FYROM Macedonian
1584    0x043e: "ms_MY", # Malay - Malaysia
1585    0x083e: "ms_BN", # Malay - Brunei Darussalam
1586    0x044c: "ml_IN", # Malayalam - India
1587    0x043a: "mt_MT", # Maltese
1588    0x0481: "mi_NZ", # Maori
1589    0x047a: "arn_CL",# Mapudungun
1590    0x044e: "mr_IN", # Marathi
1591    0x047c: "moh_CA",# Mohawk - Canada
1592    0x0450: "mn_MN", # Mongolian - Cyrillic
1593    0x0850: "mn_CN", # Mongolian - PRC
1594    0x0461: "ne_NP", # Nepali
1595    0x0414: "nb_NO", # Norwegian - Bokmal
1596    0x0814: "nn_NO", # Norwegian - Nynorsk
1597    0x0482: "oc_FR", # Occitan - France
1598    0x0448: "or_IN", # Oriya - India
1599    0x0463: "ps_AF", # Pashto - Afghanistan
1600    0x0429: "fa_IR", # Persian
1601    0x0415: "pl_PL", # Polish
1602    0x0416: "pt_BR", # Portuguese - Brazil
1603    0x0816: "pt_PT", # Portuguese - Portugal
1604    0x0446: "pa_IN", # Punjabi
1605    0x046b: "quz_BO",# Quechua (Bolivia)
1606    0x086b: "quz_EC",# Quechua (Ecuador)
1607    0x0c6b: "quz_PE",# Quechua (Peru)
1608    0x0418: "ro_RO", # Romanian - Romania
1609    0x0417: "rm_CH", # Romansh
1610    0x0419: "ru_RU", # Russian
1611    0x243b: "smn_FI",# Sami Finland
1612    0x103b: "smj_NO",# Sami Norway
1613    0x143b: "smj_SE",# Sami Sweden
1614    0x043b: "se_NO", # Sami Northern Norway
1615    0x083b: "se_SE", # Sami Northern Sweden
1616    0x0c3b: "se_FI", # Sami Northern Finland
1617    0x203b: "sms_FI",# Sami Skolt
1618    0x183b: "sma_NO",# Sami Southern Norway
1619    0x1c3b: "sma_SE",# Sami Southern Sweden
1620    0x044f: "sa_IN", # Sanskrit
1621    0x0c1a: "sr_SP", # Serbian - Cyrillic
1622    0x1c1a: "sr_BA", # Serbian - Bosnia Cyrillic
1623    0x081a: "sr_SP", # Serbian - Latin
1624    0x181a: "sr_BA", # Serbian - Bosnia Latin
1625    0x045b: "si_LK", # Sinhala - Sri Lanka
1626    0x046c: "ns_ZA", # Northern Sotho
1627    0x0432: "tn_ZA", # Setswana - Southern Africa
1628    0x041b: "sk_SK", # Slovak
1629    0x0424: "sl_SI", # Slovenian
1630    0x040a: "es_ES", # Spanish - Spain
1631    0x080a: "es_MX", # Spanish - Mexico
1632    0x0c0a: "es_ES", # Spanish - Spain (Modern)
1633    0x100a: "es_GT", # Spanish - Guatemala
1634    0x140a: "es_CR", # Spanish - Costa Rica
1635    0x180a: "es_PA", # Spanish - Panama
1636    0x1c0a: "es_DO", # Spanish - Dominican Republic
1637    0x200a: "es_VE", # Spanish - Venezuela
1638    0x240a: "es_CO", # Spanish - Colombia
1639    0x280a: "es_PE", # Spanish - Peru
1640    0x2c0a: "es_AR", # Spanish - Argentina
1641    0x300a: "es_EC", # Spanish - Ecuador
1642    0x340a: "es_CL", # Spanish - Chile
1643    0x380a: "es_UR", # Spanish - Uruguay
1644    0x3c0a: "es_PY", # Spanish - Paraguay
1645    0x400a: "es_BO", # Spanish - Bolivia
1646    0x440a: "es_SV", # Spanish - El Salvador
1647    0x480a: "es_HN", # Spanish - Honduras
1648    0x4c0a: "es_NI", # Spanish - Nicaragua
1649    0x500a: "es_PR", # Spanish - Puerto Rico
1650    0x540a: "es_US", # Spanish - United States
1651#    0x0430: "", # Sutu - Not supported
1652    0x0441: "sw_KE", # Swahili
1653    0x041d: "sv_SE", # Swedish - Sweden
1654    0x081d: "sv_FI", # Swedish - Finland
1655    0x045a: "syr_SY",# Syriac
1656    0x0428: "tg_TJ", # Tajik - Cyrillic
1657    0x085f: "tmz_DZ",# Tamazight - Latin
1658    0x0449: "ta_IN", # Tamil
1659    0x0444: "tt_RU", # Tatar
1660    0x044a: "te_IN", # Telugu
1661    0x041e: "th_TH", # Thai
1662    0x0851: "bo_BT", # Tibetan - Bhutan
1663    0x0451: "bo_CN", # Tibetan - PRC
1664    0x041f: "tr_TR", # Turkish
1665    0x0442: "tk_TM", # Turkmen - Cyrillic
1666    0x0480: "ug_CN", # Uighur - Arabic
1667    0x0422: "uk_UA", # Ukrainian
1668    0x042e: "wen_DE",# Upper Sorbian - Germany
1669    0x0420: "ur_PK", # Urdu
1670    0x0820: "ur_IN", # Urdu - India
1671    0x0443: "uz_UZ", # Uzbek - Latin
1672    0x0843: "uz_UZ", # Uzbek - Cyrillic
1673    0x042a: "vi_VN", # Vietnamese
1674    0x0452: "cy_GB", # Welsh
1675    0x0488: "wo_SN", # Wolof - Senegal
1676    0x0434: "xh_ZA", # Xhosa - South Africa
1677    0x0485: "sah_RU",# Yakut - Cyrillic
1678    0x0478: "ii_CN", # Yi - PRC
1679    0x046a: "yo_NG", # Yoruba - Nigeria
1680    0x0435: "zu_ZA", # Zulu
1681}
1682
1683def _print_locale():
1684
1685    """ Test function.
1686    """
1687    categories = {}
1688    def _init_categories(categories=categories):
1689        for k,v in globals().items():
1690            if k[:3] == 'LC_':
1691                categories[k] = v
1692    _init_categories()
1693    del categories['LC_ALL']
1694
1695    print('Locale defaults as determined by getdefaultlocale():')
1696    print('-'*72)
1697    lang, enc = getdefaultlocale()
1698    print('Language: ', lang or '(undefined)')
1699    print('Encoding: ', enc or '(undefined)')
1700    print()
1701
1702    print('Locale settings on startup:')
1703    print('-'*72)
1704    for name,category in categories.items():
1705        print(name, '...')
1706        lang, enc = getlocale(category)
1707        print('   Language: ', lang or '(undefined)')
1708        print('   Encoding: ', enc or '(undefined)')
1709        print()
1710
1711    print()
1712    print('Locale settings after calling resetlocale():')
1713    print('-'*72)
1714    resetlocale()
1715    for name,category in categories.items():
1716        print(name, '...')
1717        lang, enc = getlocale(category)
1718        print('   Language: ', lang or '(undefined)')
1719        print('   Encoding: ', enc or '(undefined)')
1720        print()
1721
1722    try:
1723        setlocale(LC_ALL, "")
1724    except:
1725        print('NOTE:')
1726        print('setlocale(LC_ALL, "") does not support the default locale')
1727        print('given in the OS environment variables.')
1728    else:
1729        print()
1730        print('Locale settings after calling setlocale(LC_ALL, ""):')
1731        print('-'*72)
1732        for name,category in categories.items():
1733            print(name, '...')
1734            lang, enc = getlocale(category)
1735            print('   Language: ', lang or '(undefined)')
1736            print('   Encoding: ', enc or '(undefined)')
1737            print()
1738
1739###
1740
1741try:
1742    LC_MESSAGES
1743except NameError:
1744    pass
1745else:
1746    __all__.append("LC_MESSAGES")
1747
1748if __name__=='__main__':
1749    print('Locale aliasing:')
1750    print()
1751    _print_locale()
1752    print()
1753    print('Number formatting:')
1754    print()
1755    _test()
1756