• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Locale support module.
2
3The module provides low-level access to the C lib's locale APIs and adds high
4level number formatting APIs as well as a locale aliasing engine to complement
5these.
6
7The aliasing engine includes support for many commonly used locale names and
8maps them to values suitable for passing to the C lib's setlocale() function. It
9also includes default encodings for all supported locale names.
10
11"""
12
13import sys
14import encodings
15import encodings.aliases
16import re
17import collections
18from builtins import str as _builtin_str
19import functools
20
21# Try importing the _locale module.
22#
23# If this fails, fall back on a basic 'C' locale emulation.
24
25# Yuck:  LC_MESSAGES is non-standard:  can't tell whether it exists before
26# trying the import.  So __all__ is also fiddled at the end of the file.
27__all__ = ["getlocale", "getdefaultlocale", "getpreferredencoding", "Error",
28           "setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm",
29           "str", "atof", "atoi", "format", "format_string", "currency",
30           "normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY",
31           "LC_NUMERIC", "LC_ALL", "CHAR_MAX"]
32
33def _strcoll(a,b):
34    """ strcoll(string,string) -> int.
35        Compares two strings according to the locale.
36    """
37    return (a > b) - (a < b)
38
39def _strxfrm(s):
40    """ strxfrm(string) -> string.
41        Returns a string that behaves for cmp locale-aware.
42    """
43    return s
44
45try:
46
47    from _locale import *
48
49except ImportError:
50
51    # Locale emulation
52
53    CHAR_MAX = 127
54    LC_ALL = 6
55    LC_COLLATE = 3
56    LC_CTYPE = 0
57    LC_MESSAGES = 5
58    LC_MONETARY = 4
59    LC_NUMERIC = 1
60    LC_TIME = 2
61    Error = ValueError
62
63    def localeconv():
64        """ localeconv() -> dict.
65            Returns numeric and monetary locale-specific parameters.
66        """
67        # 'C' locale default values
68        return {'grouping': [127],
69                'currency_symbol': '',
70                'n_sign_posn': 127,
71                'p_cs_precedes': 127,
72                'n_cs_precedes': 127,
73                'mon_grouping': [],
74                'n_sep_by_space': 127,
75                'decimal_point': '.',
76                'negative_sign': '',
77                'positive_sign': '',
78                'p_sep_by_space': 127,
79                'int_curr_symbol': '',
80                'p_sign_posn': 127,
81                'thousands_sep': '',
82                'mon_thousands_sep': '',
83                'frac_digits': 127,
84                'mon_decimal_point': '',
85                'int_frac_digits': 127}
86
87    def setlocale(category, value=None):
88        """ setlocale(integer,string=None) -> string.
89            Activates/queries locale processing.
90        """
91        if value not in (None, '', 'C'):
92            raise Error('_locale emulation only supports "C" locale')
93        return 'C'
94
95# These may or may not exist in _locale, so be sure to set them.
96if 'strxfrm' not in globals():
97    strxfrm = _strxfrm
98if 'strcoll' not in globals():
99    strcoll = _strcoll
100
101
102_localeconv = localeconv
103
104# With this dict, you can override some items of localeconv's return value.
105# This is useful for testing purposes.
106_override_localeconv = {}
107
108@functools.wraps(_localeconv)
109def localeconv():
110    d = _localeconv()
111    if _override_localeconv:
112        d.update(_override_localeconv)
113    return d
114
115
116### Number formatting APIs
117
118# Author: Martin von Loewis
119# improved by Georg Brandl
120
121# Iterate over grouping intervals
122def _grouping_intervals(grouping):
123    last_interval = None
124    for interval in grouping:
125        # if grouping is -1, we are done
126        if interval == CHAR_MAX:
127            return
128        # 0: re-use last group ad infinitum
129        if interval == 0:
130            if last_interval is None:
131                raise ValueError("invalid grouping")
132            while True:
133                yield last_interval
134        yield interval
135        last_interval = interval
136
137#perform the grouping from right to left
138def _group(s, monetary=False):
139    conv = localeconv()
140    thousands_sep = conv[monetary and 'mon_thousands_sep' or 'thousands_sep']
141    grouping = conv[monetary and 'mon_grouping' or 'grouping']
142    if not grouping:
143        return (s, 0)
144    if s[-1] == ' ':
145        stripped = s.rstrip()
146        right_spaces = s[len(stripped):]
147        s = stripped
148    else:
149        right_spaces = ''
150    left_spaces = ''
151    groups = []
152    for interval in _grouping_intervals(grouping):
153        if not s or s[-1] not in "0123456789":
154            # only non-digit characters remain (sign, spaces)
155            left_spaces = s
156            s = ''
157            break
158        groups.append(s[-interval:])
159        s = s[:-interval]
160    if s:
161        groups.append(s)
162    groups.reverse()
163    return (
164        left_spaces + thousands_sep.join(groups) + right_spaces,
165        len(thousands_sep) * (len(groups) - 1)
166    )
167
168# Strip a given amount of excess padding from the given string
169def _strip_padding(s, amount):
170    lpos = 0
171    while amount and s[lpos] == ' ':
172        lpos += 1
173        amount -= 1
174    rpos = len(s) - 1
175    while amount and s[rpos] == ' ':
176        rpos -= 1
177        amount -= 1
178    return s[lpos:rpos+1]
179
180_percent_re = re.compile(r'%(?:\((?P<key>.*?)\))?'
181                         r'(?P<modifiers>[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]')
182
183def format(percent, value, grouping=False, monetary=False, *additional):
184    """Returns the locale-aware substitution of a %? specifier
185    (percent).
186
187    additional is for format strings which contain one or more
188    '*' modifiers."""
189    # this is only for one-percent-specifier strings and this should be checked
190    match = _percent_re.match(percent)
191    if not match or len(match.group())!= len(percent):
192        raise ValueError(("format() must be given exactly one %%char "
193                         "format specifier, %s not valid") % repr(percent))
194    return _format(percent, value, grouping, monetary, *additional)
195
196def _format(percent, value, grouping=False, monetary=False, *additional):
197    if additional:
198        formatted = percent % ((value,) + additional)
199    else:
200        formatted = percent % value
201    # floats and decimal ints need special action!
202    if percent[-1] in 'eEfFgG':
203        seps = 0
204        parts = formatted.split('.')
205        if grouping:
206            parts[0], seps = _group(parts[0], monetary=monetary)
207        decimal_point = localeconv()[monetary and 'mon_decimal_point'
208                                              or 'decimal_point']
209        formatted = decimal_point.join(parts)
210        if seps:
211            formatted = _strip_padding(formatted, seps)
212    elif percent[-1] in 'diu':
213        seps = 0
214        if grouping:
215            formatted, seps = _group(formatted, monetary=monetary)
216        if seps:
217            formatted = _strip_padding(formatted, seps)
218    return formatted
219
220def format_string(f, val, grouping=False):
221    """Formats a string in the same way that the % formatting would use,
222    but takes the current locale into account.
223    Grouping is applied if the third parameter is true."""
224    percents = list(_percent_re.finditer(f))
225    new_f = _percent_re.sub('%s', f)
226
227    if isinstance(val, collections.Mapping):
228        new_val = []
229        for perc in percents:
230            if perc.group()[-1]=='%':
231                new_val.append('%')
232            else:
233                new_val.append(format(perc.group(), val, grouping))
234    else:
235        if not isinstance(val, tuple):
236            val = (val,)
237        new_val = []
238        i = 0
239        for perc in percents:
240            if perc.group()[-1]=='%':
241                new_val.append('%')
242            else:
243                starcount = perc.group('modifiers').count('*')
244                new_val.append(_format(perc.group(),
245                                      val[i],
246                                      grouping,
247                                      False,
248                                      *val[i+1:i+1+starcount]))
249                i += (1 + starcount)
250    val = tuple(new_val)
251
252    return new_f % val
253
254def currency(val, symbol=True, grouping=False, international=False):
255    """Formats val according to the currency settings
256    in the current locale."""
257    conv = localeconv()
258
259    # check for illegal values
260    digits = conv[international and 'int_frac_digits' or 'frac_digits']
261    if digits == 127:
262        raise ValueError("Currency formatting is not possible using "
263                         "the 'C' locale.")
264
265    s = format('%%.%if' % digits, abs(val), grouping, monetary=True)
266    # '<' and '>' are markers if the sign must be inserted between symbol and value
267    s = '<' + s + '>'
268
269    if symbol:
270        smb = conv[international and 'int_curr_symbol' or 'currency_symbol']
271        precedes = conv[val<0 and 'n_cs_precedes' or 'p_cs_precedes']
272        separated = conv[val<0 and 'n_sep_by_space' or 'p_sep_by_space']
273
274        if precedes:
275            s = smb + (separated and ' ' or '') + s
276        else:
277            s = s + (separated and ' ' or '') + smb
278
279    sign_pos = conv[val<0 and 'n_sign_posn' or 'p_sign_posn']
280    sign = conv[val<0 and 'negative_sign' or 'positive_sign']
281
282    if sign_pos == 0:
283        s = '(' + s + ')'
284    elif sign_pos == 1:
285        s = sign + s
286    elif sign_pos == 2:
287        s = s + sign
288    elif sign_pos == 3:
289        s = s.replace('<', sign)
290    elif sign_pos == 4:
291        s = s.replace('>', sign)
292    else:
293        # the default if nothing specified;
294        # this should be the most fitting sign position
295        s = sign + s
296
297    return s.replace('<', '').replace('>', '')
298
299def str(val):
300    """Convert float to string, taking the locale into account."""
301    return format("%.12g", val)
302
303def delocalize(string):
304    "Parses a string as a normalized number according to the locale settings."
305
306    conv = localeconv()
307
308    #First, get rid of the grouping
309    ts = conv['thousands_sep']
310    if ts:
311        string = string.replace(ts, '')
312
313    #next, replace the decimal point with a dot
314    dd = conv['decimal_point']
315    if dd:
316        string = string.replace(dd, '.')
317    return string
318
319def atof(string, func=float):
320    "Parses a string as a float according to the locale settings."
321    return func(delocalize(string))
322
323def atoi(string):
324    "Converts a string to an integer according to the locale settings."
325    return int(delocalize(string))
326
327def _test():
328    setlocale(LC_ALL, "")
329    #do grouping
330    s1 = format("%d", 123456789,1)
331    print(s1, "is", atoi(s1))
332    #standard formatting
333    s1 = str(3.14)
334    print(s1, "is", atof(s1))
335
336### Locale name aliasing engine
337
338# Author: Marc-Andre Lemburg, mal@lemburg.com
339# Various tweaks by Fredrik Lundh <fredrik@pythonware.com>
340
341# store away the low-level version of setlocale (it's
342# overridden below)
343_setlocale = setlocale
344
345def _replace_encoding(code, encoding):
346    if '.' in code:
347        langname = code[:code.index('.')]
348    else:
349        langname = code
350    # Convert the encoding to a C lib compatible encoding string
351    norm_encoding = encodings.normalize_encoding(encoding)
352    #print('norm encoding: %r' % norm_encoding)
353    norm_encoding = encodings.aliases.aliases.get(norm_encoding.lower(),
354                                                  norm_encoding)
355    #print('aliased encoding: %r' % norm_encoding)
356    encoding = norm_encoding
357    norm_encoding = norm_encoding.lower()
358    if norm_encoding in locale_encoding_alias:
359        encoding = locale_encoding_alias[norm_encoding]
360    else:
361        norm_encoding = norm_encoding.replace('_', '')
362        norm_encoding = norm_encoding.replace('-', '')
363        if norm_encoding in locale_encoding_alias:
364            encoding = locale_encoding_alias[norm_encoding]
365    #print('found encoding %r' % encoding)
366    return langname + '.' + encoding
367
368def _append_modifier(code, modifier):
369    if modifier == 'euro':
370        if '.' not in code:
371            return code + '.ISO8859-15'
372        _, _, encoding = code.partition('.')
373        if encoding in ('ISO8859-15', 'UTF-8'):
374            return code
375        if encoding == 'ISO8859-1':
376            return _replace_encoding(code, 'ISO8859-15')
377    return code + '@' + modifier
378
379def normalize(localename):
380
381    """ Returns a normalized locale code for the given locale
382        name.
383
384        The returned locale code is formatted for use with
385        setlocale().
386
387        If normalization fails, the original name is returned
388        unchanged.
389
390        If the given encoding is not known, the function defaults to
391        the default encoding for the locale code just like setlocale()
392        does.
393
394    """
395    # Normalize the locale name and extract the encoding and modifier
396    code = localename.lower()
397    if ':' in code:
398        # ':' is sometimes used as encoding delimiter.
399        code = code.replace(':', '.')
400    if '@' in code:
401        code, modifier = code.split('@', 1)
402    else:
403        modifier = ''
404    if '.' in code:
405        langname, encoding = code.split('.')[:2]
406    else:
407        langname = code
408        encoding = ''
409
410    # First lookup: fullname (possibly with encoding and modifier)
411    lang_enc = langname
412    if encoding:
413        norm_encoding = encoding.replace('-', '')
414        norm_encoding = norm_encoding.replace('_', '')
415        lang_enc += '.' + norm_encoding
416    lookup_name = lang_enc
417    if modifier:
418        lookup_name += '@' + modifier
419    code = locale_alias.get(lookup_name, None)
420    if code is not None:
421        return code
422    #print('first lookup failed')
423
424    if modifier:
425        # Second try: fullname without modifier (possibly with encoding)
426        code = locale_alias.get(lang_enc, None)
427        if code is not None:
428            #print('lookup without modifier succeeded')
429            if '@' not in code:
430                return _append_modifier(code, modifier)
431            if code.split('@', 1)[1].lower() == modifier:
432                return code
433        #print('second lookup failed')
434
435    if encoding:
436        # Third try: langname (without encoding, possibly with modifier)
437        lookup_name = langname
438        if modifier:
439            lookup_name += '@' + modifier
440        code = locale_alias.get(lookup_name, None)
441        if code is not None:
442            #print('lookup without encoding succeeded')
443            if '@' not in code:
444                return _replace_encoding(code, encoding)
445            code, modifier = code.split('@', 1)
446            return _replace_encoding(code, encoding) + '@' + modifier
447
448        if modifier:
449            # Fourth try: langname (without encoding and modifier)
450            code = locale_alias.get(langname, None)
451            if code is not None:
452                #print('lookup without modifier and encoding succeeded')
453                if '@' not in code:
454                    code = _replace_encoding(code, encoding)
455                    return _append_modifier(code, modifier)
456                code, defmod = code.split('@', 1)
457                if defmod.lower() == modifier:
458                    return _replace_encoding(code, encoding) + '@' + defmod
459
460    return localename
461
462def _parse_localename(localename):
463
464    """ Parses the locale code for localename and returns the
465        result as tuple (language code, encoding).
466
467        The localename is normalized and passed through the locale
468        alias engine. A ValueError is raised in case the locale name
469        cannot be parsed.
470
471        The language code corresponds to RFC 1766.  code and encoding
472        can be None in case the values cannot be determined or are
473        unknown to this implementation.
474
475    """
476    code = normalize(localename)
477    if '@' in code:
478        # Deal with locale modifiers
479        code, modifier = code.split('@', 1)
480        if modifier == 'euro' and '.' not in code:
481            # Assume Latin-9 for @euro locales. This is bogus,
482            # since some systems may use other encodings for these
483            # locales. Also, we ignore other modifiers.
484            return code, 'iso-8859-15'
485
486    if '.' in code:
487        return tuple(code.split('.')[:2])
488    elif code == 'C':
489        return None, None
490    raise ValueError('unknown locale: %s' % localename)
491
492def _build_localename(localetuple):
493
494    """ Builds a locale code from the given tuple (language code,
495        encoding).
496
497        No aliasing or normalizing takes place.
498
499    """
500    try:
501        language, encoding = localetuple
502
503        if language is None:
504            language = 'C'
505        if encoding is None:
506            return language
507        else:
508            return language + '.' + encoding
509    except (TypeError, ValueError):
510        raise TypeError('Locale must be None, a string, or an iterable of two strings -- language code, encoding.')
511
512def getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')):
513
514    """ Tries to determine the default locale settings and returns
515        them as tuple (language code, encoding).
516
517        According to POSIX, a program which has not called
518        setlocale(LC_ALL, "") runs using the portable 'C' locale.
519        Calling setlocale(LC_ALL, "") lets it use the default locale as
520        defined by the LANG variable. Since we don't want to interfere
521        with the current locale setting we thus emulate the behavior
522        in the way described above.
523
524        To maintain compatibility with other platforms, not only the
525        LANG variable is tested, but a list of variables given as
526        envvars parameter. The first found to be defined will be
527        used. envvars defaults to the search path used in GNU gettext;
528        it must always contain the variable name 'LANG'.
529
530        Except for the code 'C', the language code corresponds to RFC
531        1766.  code and encoding can be None in case the values cannot
532        be determined.
533
534    """
535
536    try:
537        # check if it's supported by the _locale module
538        import _locale
539        code, encoding = _locale._getdefaultlocale()
540    except (ImportError, AttributeError):
541        pass
542    else:
543        # make sure the code/encoding values are valid
544        if sys.platform == "win32" and code and code[:2] == "0x":
545            # map windows language identifier to language name
546            code = windows_locale.get(int(code, 0))
547        # ...add other platform-specific processing here, if
548        # necessary...
549        return code, encoding
550
551    # fall back on POSIX behaviour
552    import os
553    lookup = os.environ.get
554    for variable in envvars:
555        localename = lookup(variable,None)
556        if localename:
557            if variable == 'LANGUAGE':
558                localename = localename.split(':')[0]
559            break
560    else:
561        localename = 'C'
562    return _parse_localename(localename)
563
564
565def getlocale(category=LC_CTYPE):
566
567    """ Returns the current setting for the given locale category as
568        tuple (language code, encoding).
569
570        category may be one of the LC_* value except LC_ALL. It
571        defaults to LC_CTYPE.
572
573        Except for the code 'C', the language code corresponds to RFC
574        1766.  code and encoding can be None in case the values cannot
575        be determined.
576
577    """
578    localename = _setlocale(category)
579    if category == LC_ALL and ';' in localename:
580        raise TypeError('category LC_ALL is not supported')
581    return _parse_localename(localename)
582
583def setlocale(category, locale=None):
584
585    """ Set the locale for the given category.  The locale can be
586        a string, an iterable of two strings (language code and encoding),
587        or None.
588
589        Iterables are converted to strings using the locale aliasing
590        engine.  Locale strings are passed directly to the C lib.
591
592        category may be given as one of the LC_* values.
593
594    """
595    if locale and not isinstance(locale, _builtin_str):
596        # convert to string
597        locale = normalize(_build_localename(locale))
598    return _setlocale(category, locale)
599
600def resetlocale(category=LC_ALL):
601
602    """ Sets the locale for category to the default setting.
603
604        The default setting is determined by calling
605        getdefaultlocale(). category defaults to LC_ALL.
606
607    """
608    _setlocale(category, _build_localename(getdefaultlocale()))
609
610if sys.platform.startswith("win"):
611    # On Win32, this will return the ANSI code page
612    def getpreferredencoding(do_setlocale = True):
613        """Return the charset that the user is likely using."""
614        import _bootlocale
615        return _bootlocale.getpreferredencoding(False)
616else:
617    # On Unix, if CODESET is available, use that.
618    try:
619        CODESET
620    except NameError:
621        # Fall back to parsing environment variables :-(
622        def getpreferredencoding(do_setlocale = True):
623            """Return the charset that the user is likely using,
624            by looking at environment variables."""
625            res = getdefaultlocale()[1]
626            if res is None:
627                # LANG not set, default conservatively to ASCII
628                res = 'ascii'
629            return res
630    else:
631        def getpreferredencoding(do_setlocale = True):
632            """Return the charset that the user is likely using,
633            according to the system configuration."""
634            import _bootlocale
635            if do_setlocale:
636                oldloc = setlocale(LC_CTYPE)
637                try:
638                    setlocale(LC_CTYPE, "")
639                except Error:
640                    pass
641            result = _bootlocale.getpreferredencoding(False)
642            if do_setlocale:
643                setlocale(LC_CTYPE, oldloc)
644            return result
645
646
647### Database
648#
649# The following data was extracted from the locale.alias file which
650# comes with X11 and then hand edited removing the explicit encoding
651# definitions and adding some more aliases. The file is usually
652# available as /usr/lib/X11/locale/locale.alias.
653#
654
655#
656# The local_encoding_alias table maps lowercase encoding alias names
657# to C locale encoding names (case-sensitive). Note that normalize()
658# first looks up the encoding in the encodings.aliases dictionary and
659# then applies this mapping to find the correct C lib name for the
660# encoding.
661#
662locale_encoding_alias = {
663
664    # Mappings for non-standard encoding names used in locale names
665    '437':                          'C',
666    'c':                            'C',
667    'en':                           'ISO8859-1',
668    'jis':                          'JIS7',
669    'jis7':                         'JIS7',
670    'ajec':                         'eucJP',
671    'koi8c':                        'KOI8-C',
672    'microsoftcp1251':              'CP1251',
673    'microsoftcp1255':              'CP1255',
674    'microsoftcp1256':              'CP1256',
675    '88591':                        'ISO8859-1',
676    '88592':                        'ISO8859-2',
677    '88595':                        'ISO8859-5',
678    '885915':                       'ISO8859-15',
679
680    # Mappings from Python codec names to C lib encoding names
681    'ascii':                        'ISO8859-1',
682    'latin_1':                      'ISO8859-1',
683    'iso8859_1':                    'ISO8859-1',
684    'iso8859_10':                   'ISO8859-10',
685    'iso8859_11':                   'ISO8859-11',
686    'iso8859_13':                   'ISO8859-13',
687    'iso8859_14':                   'ISO8859-14',
688    'iso8859_15':                   'ISO8859-15',
689    'iso8859_16':                   'ISO8859-16',
690    'iso8859_2':                    'ISO8859-2',
691    'iso8859_3':                    'ISO8859-3',
692    'iso8859_4':                    'ISO8859-4',
693    'iso8859_5':                    'ISO8859-5',
694    'iso8859_6':                    'ISO8859-6',
695    'iso8859_7':                    'ISO8859-7',
696    'iso8859_8':                    'ISO8859-8',
697    'iso8859_9':                    'ISO8859-9',
698    'iso2022_jp':                   'JIS7',
699    'shift_jis':                    'SJIS',
700    'tactis':                       'TACTIS',
701    'euc_jp':                       'eucJP',
702    'euc_kr':                       'eucKR',
703    'utf_8':                        'UTF-8',
704    'koi8_r':                       'KOI8-R',
705    'koi8_t':                       'KOI8-T',
706    'koi8_u':                       'KOI8-U',
707    'kz1048':                       'RK1048',
708    'cp1251':                       'CP1251',
709    'cp1255':                       'CP1255',
710    'cp1256':                       'CP1256',
711
712    # XXX This list is still incomplete. If you know more
713    # mappings, please file a bug report. Thanks.
714}
715
716for k, v in sorted(locale_encoding_alias.items()):
717    k = k.replace('_', '')
718    locale_encoding_alias.setdefault(k, v)
719
720#
721# The locale_alias table maps lowercase alias names to C locale names
722# (case-sensitive). Encodings are always separated from the locale
723# name using a dot ('.'); they should only be given in case the
724# language name is needed to interpret the given encoding alias
725# correctly (CJK codes often have this need).
726#
727# Note that the normalize() function which uses this tables
728# removes '_' and '-' characters from the encoding part of the
729# locale name before doing the lookup. This saves a lot of
730# space in the table.
731#
732# MAL 2004-12-10:
733# Updated alias mapping to most recent locale.alias file
734# from X.org distribution using makelocalealias.py.
735#
736# These are the differences compared to the old mapping (Python 2.4
737# and older):
738#
739#    updated 'bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
740#    updated 'bg_bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
741#    updated 'bulgarian' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
742#    updated 'cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
743#    updated 'cz_cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
744#    updated 'czech' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
745#    updated 'dutch' -> 'nl_BE.ISO8859-1' to 'nl_NL.ISO8859-1'
746#    updated 'et' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
747#    updated 'et_ee' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
748#    updated 'fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
749#    updated 'fi_fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
750#    updated 'iw' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
751#    updated 'iw_il' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
752#    updated 'japanese' -> 'ja_JP.SJIS' to 'ja_JP.eucJP'
753#    updated 'lt' -> 'lt_LT.ISO8859-4' to 'lt_LT.ISO8859-13'
754#    updated 'lv' -> 'lv_LV.ISO8859-4' to 'lv_LV.ISO8859-13'
755#    updated 'sl' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
756#    updated 'slovene' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
757#    updated 'th_th' -> 'th_TH.TACTIS' to 'th_TH.ISO8859-11'
758#    updated 'zh_cn' -> 'zh_CN.eucCN' to 'zh_CN.gb2312'
759#    updated 'zh_cn.big5' -> 'zh_TW.eucTW' to 'zh_TW.big5'
760#    updated 'zh_tw' -> 'zh_TW.eucTW' to 'zh_TW.big5'
761#
762# MAL 2008-05-30:
763# Updated alias mapping to most recent locale.alias file
764# from X.org distribution using makelocalealias.py.
765#
766# These are the differences compared to the old mapping (Python 2.5
767# and older):
768#
769#    updated 'cs_cs.iso88592' -> 'cs_CZ.ISO8859-2' to 'cs_CS.ISO8859-2'
770#    updated 'serbocroatian' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
771#    updated 'sh' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
772#    updated 'sh_hr.iso88592' -> 'sh_HR.ISO8859-2' to 'hr_HR.ISO8859-2'
773#    updated 'sh_sp' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
774#    updated 'sh_yu' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
775#    updated 'sp' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
776#    updated 'sp_yu' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
777#    updated 'sr' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
778#    updated 'sr@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
779#    updated 'sr_sp' -> 'sr_SP.ISO8859-2' to 'sr_CS.ISO8859-2'
780#    updated 'sr_yu' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
781#    updated 'sr_yu.cp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251'
782#    updated 'sr_yu.iso88592' -> 'sr_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
783#    updated 'sr_yu.iso88595' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
784#    updated 'sr_yu.iso88595@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
785#    updated 'sr_yu.microsoftcp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251'
786#    updated 'sr_yu.utf8@cyrillic' -> 'sr_YU.UTF-8' to 'sr_CS.UTF-8'
787#    updated 'sr_yu@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
788#
789# AP 2010-04-12:
790# Updated alias mapping to most recent locale.alias file
791# from X.org distribution using makelocalealias.py.
792#
793# These are the differences compared to the old mapping (Python 2.6.5
794# and older):
795#
796#    updated 'ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8'
797#    updated 'ru_ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8'
798#    updated 'serbocroatian' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
799#    updated 'sh' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
800#    updated 'sh_yu' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
801#    updated 'sr' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
802#    updated 'sr@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
803#    updated 'sr@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
804#    updated 'sr_cs.utf8@latn' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8@latin'
805#    updated 'sr_cs@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
806#    updated 'sr_yu' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8@latin'
807#    updated 'sr_yu.utf8@cyrillic' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8'
808#    updated 'sr_yu@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
809#
810# SS 2013-12-20:
811# Updated alias mapping to most recent locale.alias file
812# from X.org distribution using makelocalealias.py.
813#
814# These are the differences compared to the old mapping (Python 3.3.3
815# and older):
816#
817#    updated 'a3' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
818#    updated 'a3_az' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
819#    updated 'a3_az.koi8c' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
820#    updated 'cs_cs.iso88592' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
821#    updated 'hebrew' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
822#    updated 'hebrew.iso88598' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
823#    updated 'sd' -> 'sd_IN@devanagari.UTF-8' to 'sd_IN.UTF-8'
824#    updated 'sr@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
825#    updated 'sr_cs' -> 'sr_RS.UTF-8' to 'sr_CS.UTF-8'
826#    updated 'sr_cs.utf8@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
827#    updated 'sr_cs@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
828#
829# SS 2014-10-01:
830# Updated alias mapping with glibc 2.19 supported locales.
831
832locale_alias = {
833    'a3':                                   'az_AZ.KOI8-C',
834    'a3_az':                                'az_AZ.KOI8-C',
835    'a3_az.koic':                           'az_AZ.KOI8-C',
836    'aa_dj':                                'aa_DJ.ISO8859-1',
837    'aa_er':                                'aa_ER.UTF-8',
838    'aa_et':                                'aa_ET.UTF-8',
839    'af':                                   'af_ZA.ISO8859-1',
840    'af_za':                                'af_ZA.ISO8859-1',
841    'am':                                   'am_ET.UTF-8',
842    'am_et':                                'am_ET.UTF-8',
843    'american':                             'en_US.ISO8859-1',
844    'an_es':                                'an_ES.ISO8859-15',
845    'ar':                                   'ar_AA.ISO8859-6',
846    'ar_aa':                                'ar_AA.ISO8859-6',
847    'ar_ae':                                'ar_AE.ISO8859-6',
848    'ar_bh':                                'ar_BH.ISO8859-6',
849    'ar_dz':                                'ar_DZ.ISO8859-6',
850    'ar_eg':                                'ar_EG.ISO8859-6',
851    'ar_in':                                'ar_IN.UTF-8',
852    'ar_iq':                                'ar_IQ.ISO8859-6',
853    'ar_jo':                                'ar_JO.ISO8859-6',
854    'ar_kw':                                'ar_KW.ISO8859-6',
855    'ar_lb':                                'ar_LB.ISO8859-6',
856    'ar_ly':                                'ar_LY.ISO8859-6',
857    'ar_ma':                                'ar_MA.ISO8859-6',
858    'ar_om':                                'ar_OM.ISO8859-6',
859    'ar_qa':                                'ar_QA.ISO8859-6',
860    'ar_sa':                                'ar_SA.ISO8859-6',
861    'ar_sd':                                'ar_SD.ISO8859-6',
862    'ar_sy':                                'ar_SY.ISO8859-6',
863    'ar_tn':                                'ar_TN.ISO8859-6',
864    'ar_ye':                                'ar_YE.ISO8859-6',
865    'arabic':                               'ar_AA.ISO8859-6',
866    'as':                                   'as_IN.UTF-8',
867    'as_in':                                'as_IN.UTF-8',
868    'ast_es':                               'ast_ES.ISO8859-15',
869    'ayc_pe':                               'ayc_PE.UTF-8',
870    'az':                                   'az_AZ.ISO8859-9E',
871    'az_az':                                'az_AZ.ISO8859-9E',
872    'az_az.iso88599e':                      'az_AZ.ISO8859-9E',
873    'be':                                   'be_BY.CP1251',
874    'be@latin':                             'be_BY.UTF-8@latin',
875    'be_bg.utf8':                           'bg_BG.UTF-8',
876    'be_by':                                'be_BY.CP1251',
877    'be_by@latin':                          'be_BY.UTF-8@latin',
878    'bem_zm':                               'bem_ZM.UTF-8',
879    'ber_dz':                               'ber_DZ.UTF-8',
880    'ber_ma':                               'ber_MA.UTF-8',
881    'bg':                                   'bg_BG.CP1251',
882    'bg_bg':                                'bg_BG.CP1251',
883    'bho_in':                               'bho_IN.UTF-8',
884    'bn_bd':                                'bn_BD.UTF-8',
885    'bn_in':                                'bn_IN.UTF-8',
886    'bo_cn':                                'bo_CN.UTF-8',
887    'bo_in':                                'bo_IN.UTF-8',
888    'bokmal':                               'nb_NO.ISO8859-1',
889    'bokm\xe5l':                            'nb_NO.ISO8859-1',
890    'br':                                   'br_FR.ISO8859-1',
891    'br_fr':                                'br_FR.ISO8859-1',
892    'brx_in':                               'brx_IN.UTF-8',
893    'bs':                                   'bs_BA.ISO8859-2',
894    'bs_ba':                                'bs_BA.ISO8859-2',
895    'bulgarian':                            'bg_BG.CP1251',
896    'byn_er':                               'byn_ER.UTF-8',
897    'c':                                    'C',
898    'c-french':                             'fr_CA.ISO8859-1',
899    'c.ascii':                              'C',
900    'c.en':                                 'C',
901    'c.iso88591':                           'en_US.ISO8859-1',
902    'c.utf8':                               'en_US.UTF-8',
903    'c_c':                                  'C',
904    'c_c.c':                                'C',
905    'ca':                                   'ca_ES.ISO8859-1',
906    'ca_ad':                                'ca_AD.ISO8859-1',
907    'ca_es':                                'ca_ES.ISO8859-1',
908    'ca_es@valencia':                       'ca_ES.ISO8859-15@valencia',
909    'ca_fr':                                'ca_FR.ISO8859-1',
910    'ca_it':                                'ca_IT.ISO8859-1',
911    'catalan':                              'ca_ES.ISO8859-1',
912    'cextend':                              'en_US.ISO8859-1',
913    'chinese-s':                            'zh_CN.eucCN',
914    'chinese-t':                            'zh_TW.eucTW',
915    'crh_ua':                               'crh_UA.UTF-8',
916    'croatian':                             'hr_HR.ISO8859-2',
917    'cs':                                   'cs_CZ.ISO8859-2',
918    'cs_cs':                                'cs_CZ.ISO8859-2',
919    'cs_cz':                                'cs_CZ.ISO8859-2',
920    'csb_pl':                               'csb_PL.UTF-8',
921    'cv_ru':                                'cv_RU.UTF-8',
922    'cy':                                   'cy_GB.ISO8859-1',
923    'cy_gb':                                'cy_GB.ISO8859-1',
924    'cz':                                   'cs_CZ.ISO8859-2',
925    'cz_cz':                                'cs_CZ.ISO8859-2',
926    'czech':                                'cs_CZ.ISO8859-2',
927    'da':                                   'da_DK.ISO8859-1',
928    'da_dk':                                'da_DK.ISO8859-1',
929    'danish':                               'da_DK.ISO8859-1',
930    'dansk':                                'da_DK.ISO8859-1',
931    'de':                                   'de_DE.ISO8859-1',
932    'de_at':                                'de_AT.ISO8859-1',
933    'de_be':                                'de_BE.ISO8859-1',
934    'de_ch':                                'de_CH.ISO8859-1',
935    'de_de':                                'de_DE.ISO8859-1',
936    'de_li.utf8':                           'de_LI.UTF-8',
937    'de_lu':                                'de_LU.ISO8859-1',
938    'deutsch':                              'de_DE.ISO8859-1',
939    'doi_in':                               'doi_IN.UTF-8',
940    'dutch':                                'nl_NL.ISO8859-1',
941    'dutch.iso88591':                       'nl_BE.ISO8859-1',
942    'dv_mv':                                'dv_MV.UTF-8',
943    'dz_bt':                                'dz_BT.UTF-8',
944    'ee':                                   'ee_EE.ISO8859-4',
945    'ee_ee':                                'ee_EE.ISO8859-4',
946    'eesti':                                'et_EE.ISO8859-1',
947    'el':                                   'el_GR.ISO8859-7',
948    'el_cy':                                'el_CY.ISO8859-7',
949    'el_gr':                                'el_GR.ISO8859-7',
950    'el_gr@euro':                           'el_GR.ISO8859-15',
951    'en':                                   'en_US.ISO8859-1',
952    'en_ag':                                'en_AG.UTF-8',
953    'en_au':                                'en_AU.ISO8859-1',
954    'en_be':                                'en_BE.ISO8859-1',
955    'en_bw':                                'en_BW.ISO8859-1',
956    'en_ca':                                'en_CA.ISO8859-1',
957    'en_dk':                                'en_DK.ISO8859-1',
958    'en_dl.utf8':                           'en_DL.UTF-8',
959    'en_gb':                                'en_GB.ISO8859-1',
960    'en_hk':                                'en_HK.ISO8859-1',
961    'en_ie':                                'en_IE.ISO8859-1',
962    'en_in':                                'en_IN.ISO8859-1',
963    'en_ng':                                'en_NG.UTF-8',
964    'en_nz':                                'en_NZ.ISO8859-1',
965    'en_ph':                                'en_PH.ISO8859-1',
966    'en_sg':                                'en_SG.ISO8859-1',
967    'en_uk':                                'en_GB.ISO8859-1',
968    'en_us':                                'en_US.ISO8859-1',
969    'en_us@euro@euro':                      'en_US.ISO8859-15',
970    'en_za':                                'en_ZA.ISO8859-1',
971    'en_zm':                                'en_ZM.UTF-8',
972    'en_zw':                                'en_ZW.ISO8859-1',
973    'en_zw.utf8':                           'en_ZS.UTF-8',
974    'eng_gb':                               'en_GB.ISO8859-1',
975    'english':                              'en_EN.ISO8859-1',
976    'english_uk':                           'en_GB.ISO8859-1',
977    'english_united-states':                'en_US.ISO8859-1',
978    'english_united-states.437':            'C',
979    'english_us':                           'en_US.ISO8859-1',
980    'eo':                                   'eo_XX.ISO8859-3',
981    'eo.utf8':                              'eo.UTF-8',
982    'eo_eo':                                'eo_EO.ISO8859-3',
983    'eo_us.utf8':                           'eo_US.UTF-8',
984    'eo_xx':                                'eo_XX.ISO8859-3',
985    'es':                                   'es_ES.ISO8859-1',
986    'es_ar':                                'es_AR.ISO8859-1',
987    'es_bo':                                'es_BO.ISO8859-1',
988    'es_cl':                                'es_CL.ISO8859-1',
989    'es_co':                                'es_CO.ISO8859-1',
990    'es_cr':                                'es_CR.ISO8859-1',
991    'es_cu':                                'es_CU.UTF-8',
992    'es_do':                                'es_DO.ISO8859-1',
993    'es_ec':                                'es_EC.ISO8859-1',
994    'es_es':                                'es_ES.ISO8859-1',
995    'es_gt':                                'es_GT.ISO8859-1',
996    'es_hn':                                'es_HN.ISO8859-1',
997    'es_mx':                                'es_MX.ISO8859-1',
998    'es_ni':                                'es_NI.ISO8859-1',
999    'es_pa':                                'es_PA.ISO8859-1',
1000    'es_pe':                                'es_PE.ISO8859-1',
1001    'es_pr':                                'es_PR.ISO8859-1',
1002    'es_py':                                'es_PY.ISO8859-1',
1003    'es_sv':                                'es_SV.ISO8859-1',
1004    'es_us':                                'es_US.ISO8859-1',
1005    'es_uy':                                'es_UY.ISO8859-1',
1006    'es_ve':                                'es_VE.ISO8859-1',
1007    'estonian':                             'et_EE.ISO8859-1',
1008    'et':                                   'et_EE.ISO8859-15',
1009    'et_ee':                                'et_EE.ISO8859-15',
1010    'eu':                                   'eu_ES.ISO8859-1',
1011    'eu_es':                                'eu_ES.ISO8859-1',
1012    'eu_fr':                                'eu_FR.ISO8859-1',
1013    'fa':                                   'fa_IR.UTF-8',
1014    'fa_ir':                                'fa_IR.UTF-8',
1015    'fa_ir.isiri3342':                      'fa_IR.ISIRI-3342',
1016    'ff_sn':                                'ff_SN.UTF-8',
1017    'fi':                                   'fi_FI.ISO8859-15',
1018    'fi_fi':                                'fi_FI.ISO8859-15',
1019    'fil_ph':                               'fil_PH.UTF-8',
1020    'finnish':                              'fi_FI.ISO8859-1',
1021    'fo':                                   'fo_FO.ISO8859-1',
1022    'fo_fo':                                'fo_FO.ISO8859-1',
1023    'fr':                                   'fr_FR.ISO8859-1',
1024    'fr_be':                                'fr_BE.ISO8859-1',
1025    'fr_ca':                                'fr_CA.ISO8859-1',
1026    'fr_ch':                                'fr_CH.ISO8859-1',
1027    'fr_fr':                                'fr_FR.ISO8859-1',
1028    'fr_lu':                                'fr_LU.ISO8859-1',
1029    'fran\xe7ais':                          'fr_FR.ISO8859-1',
1030    'fre_fr':                               'fr_FR.ISO8859-1',
1031    'french':                               'fr_FR.ISO8859-1',
1032    'french.iso88591':                      'fr_CH.ISO8859-1',
1033    'french_france':                        'fr_FR.ISO8859-1',
1034    'fur_it':                               'fur_IT.UTF-8',
1035    'fy_de':                                'fy_DE.UTF-8',
1036    'fy_nl':                                'fy_NL.UTF-8',
1037    'ga':                                   'ga_IE.ISO8859-1',
1038    'ga_ie':                                'ga_IE.ISO8859-1',
1039    'galego':                               'gl_ES.ISO8859-1',
1040    'galician':                             'gl_ES.ISO8859-1',
1041    'gd':                                   'gd_GB.ISO8859-1',
1042    'gd_gb':                                'gd_GB.ISO8859-1',
1043    'ger_de':                               'de_DE.ISO8859-1',
1044    'german':                               'de_DE.ISO8859-1',
1045    'german.iso88591':                      'de_CH.ISO8859-1',
1046    'german_germany':                       'de_DE.ISO8859-1',
1047    'gez_er':                               'gez_ER.UTF-8',
1048    'gez_et':                               'gez_ET.UTF-8',
1049    'gl':                                   'gl_ES.ISO8859-1',
1050    'gl_es':                                'gl_ES.ISO8859-1',
1051    'greek':                                'el_GR.ISO8859-7',
1052    'gu_in':                                'gu_IN.UTF-8',
1053    'gv':                                   'gv_GB.ISO8859-1',
1054    'gv_gb':                                'gv_GB.ISO8859-1',
1055    'ha_ng':                                'ha_NG.UTF-8',
1056    'he':                                   'he_IL.ISO8859-8',
1057    'he_il':                                'he_IL.ISO8859-8',
1058    'hebrew':                               'he_IL.ISO8859-8',
1059    'hi':                                   'hi_IN.ISCII-DEV',
1060    'hi_in':                                'hi_IN.ISCII-DEV',
1061    'hi_in.isciidev':                       'hi_IN.ISCII-DEV',
1062    'hne':                                  'hne_IN.UTF-8',
1063    'hne_in':                               'hne_IN.UTF-8',
1064    'hr':                                   'hr_HR.ISO8859-2',
1065    'hr_hr':                                'hr_HR.ISO8859-2',
1066    'hrvatski':                             'hr_HR.ISO8859-2',
1067    'hsb_de':                               'hsb_DE.ISO8859-2',
1068    'ht_ht':                                'ht_HT.UTF-8',
1069    'hu':                                   'hu_HU.ISO8859-2',
1070    'hu_hu':                                'hu_HU.ISO8859-2',
1071    'hungarian':                            'hu_HU.ISO8859-2',
1072    'hy_am':                                'hy_AM.UTF-8',
1073    'hy_am.armscii8':                       'hy_AM.ARMSCII_8',
1074    'ia':                                   'ia.UTF-8',
1075    'ia_fr':                                'ia_FR.UTF-8',
1076    'icelandic':                            'is_IS.ISO8859-1',
1077    'id':                                   'id_ID.ISO8859-1',
1078    'id_id':                                'id_ID.ISO8859-1',
1079    'ig_ng':                                'ig_NG.UTF-8',
1080    'ik_ca':                                'ik_CA.UTF-8',
1081    'in':                                   'id_ID.ISO8859-1',
1082    'in_id':                                'id_ID.ISO8859-1',
1083    'is':                                   'is_IS.ISO8859-1',
1084    'is_is':                                'is_IS.ISO8859-1',
1085    'iso-8859-1':                           'en_US.ISO8859-1',
1086    'iso-8859-15':                          'en_US.ISO8859-15',
1087    'iso8859-1':                            'en_US.ISO8859-1',
1088    'iso8859-15':                           'en_US.ISO8859-15',
1089    'iso_8859_1':                           'en_US.ISO8859-1',
1090    'iso_8859_15':                          'en_US.ISO8859-15',
1091    'it':                                   'it_IT.ISO8859-1',
1092    'it_ch':                                'it_CH.ISO8859-1',
1093    'it_it':                                'it_IT.ISO8859-1',
1094    'italian':                              'it_IT.ISO8859-1',
1095    'iu':                                   'iu_CA.NUNACOM-8',
1096    'iu_ca':                                'iu_CA.NUNACOM-8',
1097    'iu_ca.nunacom8':                       'iu_CA.NUNACOM-8',
1098    'iw':                                   'he_IL.ISO8859-8',
1099    'iw_il':                                'he_IL.ISO8859-8',
1100    'iw_il.utf8':                           'iw_IL.UTF-8',
1101    'ja':                                   'ja_JP.eucJP',
1102    'ja_jp':                                'ja_JP.eucJP',
1103    'ja_jp.euc':                            'ja_JP.eucJP',
1104    'ja_jp.mscode':                         'ja_JP.SJIS',
1105    'ja_jp.pck':                            'ja_JP.SJIS',
1106    'japan':                                'ja_JP.eucJP',
1107    'japanese':                             'ja_JP.eucJP',
1108    'japanese-euc':                         'ja_JP.eucJP',
1109    'japanese.euc':                         'ja_JP.eucJP',
1110    'jp_jp':                                'ja_JP.eucJP',
1111    'ka':                                   'ka_GE.GEORGIAN-ACADEMY',
1112    'ka_ge':                                'ka_GE.GEORGIAN-ACADEMY',
1113    'ka_ge.georgianacademy':                'ka_GE.GEORGIAN-ACADEMY',
1114    'ka_ge.georgianps':                     'ka_GE.GEORGIAN-PS',
1115    'ka_ge.georgianrs':                     'ka_GE.GEORGIAN-ACADEMY',
1116    'kk_kz':                                'kk_KZ.RK1048',
1117    'kl':                                   'kl_GL.ISO8859-1',
1118    'kl_gl':                                'kl_GL.ISO8859-1',
1119    'km_kh':                                'km_KH.UTF-8',
1120    'kn':                                   'kn_IN.UTF-8',
1121    'kn_in':                                'kn_IN.UTF-8',
1122    'ko':                                   'ko_KR.eucKR',
1123    'ko_kr':                                'ko_KR.eucKR',
1124    'ko_kr.euc':                            'ko_KR.eucKR',
1125    'kok_in':                               'kok_IN.UTF-8',
1126    'korean':                               'ko_KR.eucKR',
1127    'korean.euc':                           'ko_KR.eucKR',
1128    'ks':                                   'ks_IN.UTF-8',
1129    'ks_in':                                'ks_IN.UTF-8',
1130    'ks_in@devanagari.utf8':                'ks_IN.UTF-8@devanagari',
1131    'ku_tr':                                'ku_TR.ISO8859-9',
1132    'kw':                                   'kw_GB.ISO8859-1',
1133    'kw_gb':                                'kw_GB.ISO8859-1',
1134    'ky':                                   'ky_KG.UTF-8',
1135    'ky_kg':                                'ky_KG.UTF-8',
1136    'lb_lu':                                'lb_LU.UTF-8',
1137    'lg_ug':                                'lg_UG.ISO8859-10',
1138    'li_be':                                'li_BE.UTF-8',
1139    'li_nl':                                'li_NL.UTF-8',
1140    'lij_it':                               'lij_IT.UTF-8',
1141    'lithuanian':                           'lt_LT.ISO8859-13',
1142    'lo':                                   'lo_LA.MULELAO-1',
1143    'lo_la':                                'lo_LA.MULELAO-1',
1144    'lo_la.cp1133':                         'lo_LA.IBM-CP1133',
1145    'lo_la.ibmcp1133':                      'lo_LA.IBM-CP1133',
1146    'lo_la.mulelao1':                       'lo_LA.MULELAO-1',
1147    'lt':                                   'lt_LT.ISO8859-13',
1148    'lt_lt':                                'lt_LT.ISO8859-13',
1149    'lv':                                   'lv_LV.ISO8859-13',
1150    'lv_lv':                                'lv_LV.ISO8859-13',
1151    'mag_in':                               'mag_IN.UTF-8',
1152    'mai':                                  'mai_IN.UTF-8',
1153    'mai_in':                               'mai_IN.UTF-8',
1154    'mg_mg':                                'mg_MG.ISO8859-15',
1155    'mhr_ru':                               'mhr_RU.UTF-8',
1156    'mi':                                   'mi_NZ.ISO8859-1',
1157    'mi_nz':                                'mi_NZ.ISO8859-1',
1158    'mk':                                   'mk_MK.ISO8859-5',
1159    'mk_mk':                                'mk_MK.ISO8859-5',
1160    'ml':                                   'ml_IN.UTF-8',
1161    'ml_in':                                'ml_IN.UTF-8',
1162    'mn_mn':                                'mn_MN.UTF-8',
1163    'mni_in':                               'mni_IN.UTF-8',
1164    'mr':                                   'mr_IN.UTF-8',
1165    'mr_in':                                'mr_IN.UTF-8',
1166    'ms':                                   'ms_MY.ISO8859-1',
1167    'ms_my':                                'ms_MY.ISO8859-1',
1168    'mt':                                   'mt_MT.ISO8859-3',
1169    'mt_mt':                                'mt_MT.ISO8859-3',
1170    'my_mm':                                'my_MM.UTF-8',
1171    'nan_tw@latin':                         'nan_TW.UTF-8@latin',
1172    'nb':                                   'nb_NO.ISO8859-1',
1173    'nb_no':                                'nb_NO.ISO8859-1',
1174    'nds_de':                               'nds_DE.UTF-8',
1175    'nds_nl':                               'nds_NL.UTF-8',
1176    'ne_np':                                'ne_NP.UTF-8',
1177    'nhn_mx':                               'nhn_MX.UTF-8',
1178    'niu_nu':                               'niu_NU.UTF-8',
1179    'niu_nz':                               'niu_NZ.UTF-8',
1180    'nl':                                   'nl_NL.ISO8859-1',
1181    'nl_aw':                                'nl_AW.UTF-8',
1182    'nl_be':                                'nl_BE.ISO8859-1',
1183    'nl_nl':                                'nl_NL.ISO8859-1',
1184    'nn':                                   'nn_NO.ISO8859-1',
1185    'nn_no':                                'nn_NO.ISO8859-1',
1186    'no':                                   'no_NO.ISO8859-1',
1187    'no@nynorsk':                           'ny_NO.ISO8859-1',
1188    'no_no':                                'no_NO.ISO8859-1',
1189    'no_no.iso88591@bokmal':                'no_NO.ISO8859-1',
1190    'no_no.iso88591@nynorsk':               'no_NO.ISO8859-1',
1191    'norwegian':                            'no_NO.ISO8859-1',
1192    'nr':                                   'nr_ZA.ISO8859-1',
1193    'nr_za':                                'nr_ZA.ISO8859-1',
1194    'nso':                                  'nso_ZA.ISO8859-15',
1195    'nso_za':                               'nso_ZA.ISO8859-15',
1196    'ny':                                   'ny_NO.ISO8859-1',
1197    'ny_no':                                'ny_NO.ISO8859-1',
1198    'nynorsk':                              'nn_NO.ISO8859-1',
1199    'oc':                                   'oc_FR.ISO8859-1',
1200    'oc_fr':                                'oc_FR.ISO8859-1',
1201    'om_et':                                'om_ET.UTF-8',
1202    'om_ke':                                'om_KE.ISO8859-1',
1203    'or':                                   'or_IN.UTF-8',
1204    'or_in':                                'or_IN.UTF-8',
1205    'os_ru':                                'os_RU.UTF-8',
1206    'pa':                                   'pa_IN.UTF-8',
1207    'pa_in':                                'pa_IN.UTF-8',
1208    'pa_pk':                                'pa_PK.UTF-8',
1209    'pap_an':                               'pap_AN.UTF-8',
1210    'pd':                                   'pd_US.ISO8859-1',
1211    'pd_de':                                'pd_DE.ISO8859-1',
1212    'pd_us':                                'pd_US.ISO8859-1',
1213    'ph':                                   'ph_PH.ISO8859-1',
1214    'ph_ph':                                'ph_PH.ISO8859-1',
1215    'pl':                                   'pl_PL.ISO8859-2',
1216    'pl_pl':                                'pl_PL.ISO8859-2',
1217    'polish':                               'pl_PL.ISO8859-2',
1218    'portuguese':                           'pt_PT.ISO8859-1',
1219    'portuguese_brazil':                    'pt_BR.ISO8859-1',
1220    'posix':                                'C',
1221    'posix-utf2':                           'C',
1222    'pp':                                   'pp_AN.ISO8859-1',
1223    'pp_an':                                'pp_AN.ISO8859-1',
1224    'ps_af':                                'ps_AF.UTF-8',
1225    'pt':                                   'pt_PT.ISO8859-1',
1226    'pt_br':                                'pt_BR.ISO8859-1',
1227    'pt_pt':                                'pt_PT.ISO8859-1',
1228    'ro':                                   'ro_RO.ISO8859-2',
1229    'ro_ro':                                'ro_RO.ISO8859-2',
1230    'romanian':                             'ro_RO.ISO8859-2',
1231    'ru':                                   'ru_RU.UTF-8',
1232    'ru_ru':                                'ru_RU.UTF-8',
1233    'ru_ua':                                'ru_UA.KOI8-U',
1234    'rumanian':                             'ro_RO.ISO8859-2',
1235    'russian':                              'ru_RU.ISO8859-5',
1236    'rw':                                   'rw_RW.ISO8859-1',
1237    'rw_rw':                                'rw_RW.ISO8859-1',
1238    'sa_in':                                'sa_IN.UTF-8',
1239    'sat_in':                               'sat_IN.UTF-8',
1240    'sc_it':                                'sc_IT.UTF-8',
1241    'sd':                                   'sd_IN.UTF-8',
1242    'sd_in':                                'sd_IN.UTF-8',
1243    'sd_in@devanagari.utf8':                'sd_IN.UTF-8@devanagari',
1244    'sd_pk':                                'sd_PK.UTF-8',
1245    'se_no':                                'se_NO.UTF-8',
1246    'serbocroatian':                        'sr_RS.UTF-8@latin',
1247    'sh':                                   'sr_RS.UTF-8@latin',
1248    'sh_ba.iso88592@bosnia':                'sr_CS.ISO8859-2',
1249    'sh_hr':                                'sh_HR.ISO8859-2',
1250    'sh_hr.iso88592':                       'hr_HR.ISO8859-2',
1251    'sh_sp':                                'sr_CS.ISO8859-2',
1252    'sh_yu':                                'sr_RS.UTF-8@latin',
1253    'shs_ca':                               'shs_CA.UTF-8',
1254    'si':                                   'si_LK.UTF-8',
1255    'si_lk':                                'si_LK.UTF-8',
1256    'sid_et':                               'sid_ET.UTF-8',
1257    'sinhala':                              'si_LK.UTF-8',
1258    'sk':                                   'sk_SK.ISO8859-2',
1259    'sk_sk':                                'sk_SK.ISO8859-2',
1260    'sl':                                   'sl_SI.ISO8859-2',
1261    'sl_cs':                                'sl_CS.ISO8859-2',
1262    'sl_si':                                'sl_SI.ISO8859-2',
1263    'slovak':                               'sk_SK.ISO8859-2',
1264    'slovene':                              'sl_SI.ISO8859-2',
1265    'slovenian':                            'sl_SI.ISO8859-2',
1266    'so_dj':                                'so_DJ.ISO8859-1',
1267    'so_et':                                'so_ET.UTF-8',
1268    'so_ke':                                'so_KE.ISO8859-1',
1269    'so_so':                                'so_SO.ISO8859-1',
1270    'sp':                                   'sr_CS.ISO8859-5',
1271    'sp_yu':                                'sr_CS.ISO8859-5',
1272    'spanish':                              'es_ES.ISO8859-1',
1273    'spanish_spain':                        'es_ES.ISO8859-1',
1274    'sq':                                   'sq_AL.ISO8859-2',
1275    'sq_al':                                'sq_AL.ISO8859-2',
1276    'sq_mk':                                'sq_MK.UTF-8',
1277    'sr':                                   'sr_RS.UTF-8',
1278    'sr@cyrillic':                          'sr_RS.UTF-8',
1279    'sr@latn':                              'sr_CS.UTF-8@latin',
1280    'sr_cs':                                'sr_CS.UTF-8',
1281    'sr_cs.iso88592@latn':                  'sr_CS.ISO8859-2',
1282    'sr_cs@latn':                           'sr_CS.UTF-8@latin',
1283    'sr_me':                                'sr_ME.UTF-8',
1284    'sr_rs':                                'sr_RS.UTF-8',
1285    'sr_rs@latn':                           'sr_RS.UTF-8@latin',
1286    'sr_sp':                                'sr_CS.ISO8859-2',
1287    'sr_yu':                                'sr_RS.UTF-8@latin',
1288    'sr_yu.cp1251@cyrillic':                'sr_CS.CP1251',
1289    'sr_yu.iso88592':                       'sr_CS.ISO8859-2',
1290    'sr_yu.iso88595':                       'sr_CS.ISO8859-5',
1291    'sr_yu.iso88595@cyrillic':              'sr_CS.ISO8859-5',
1292    'sr_yu.microsoftcp1251@cyrillic':       'sr_CS.CP1251',
1293    'sr_yu.utf8':                           'sr_RS.UTF-8',
1294    'sr_yu.utf8@cyrillic':                  'sr_RS.UTF-8',
1295    'sr_yu@cyrillic':                       'sr_RS.UTF-8',
1296    'ss':                                   'ss_ZA.ISO8859-1',
1297    'ss_za':                                'ss_ZA.ISO8859-1',
1298    'st':                                   'st_ZA.ISO8859-1',
1299    'st_za':                                'st_ZA.ISO8859-1',
1300    'sv':                                   'sv_SE.ISO8859-1',
1301    'sv_fi':                                'sv_FI.ISO8859-1',
1302    'sv_se':                                'sv_SE.ISO8859-1',
1303    'sw_ke':                                'sw_KE.UTF-8',
1304    'sw_tz':                                'sw_TZ.UTF-8',
1305    'swedish':                              'sv_SE.ISO8859-1',
1306    'szl_pl':                               'szl_PL.UTF-8',
1307    'ta':                                   'ta_IN.TSCII-0',
1308    'ta_in':                                'ta_IN.TSCII-0',
1309    'ta_in.tscii':                          'ta_IN.TSCII-0',
1310    'ta_in.tscii0':                         'ta_IN.TSCII-0',
1311    'ta_lk':                                'ta_LK.UTF-8',
1312    'te':                                   'te_IN.UTF-8',
1313    'te_in':                                'te_IN.UTF-8',
1314    'tg':                                   'tg_TJ.KOI8-C',
1315    'tg_tj':                                'tg_TJ.KOI8-C',
1316    'th':                                   'th_TH.ISO8859-11',
1317    'th_th':                                'th_TH.ISO8859-11',
1318    'th_th.tactis':                         'th_TH.TIS620',
1319    'th_th.tis620':                         'th_TH.TIS620',
1320    'thai':                                 'th_TH.ISO8859-11',
1321    'ti_er':                                'ti_ER.UTF-8',
1322    'ti_et':                                'ti_ET.UTF-8',
1323    'tig_er':                               'tig_ER.UTF-8',
1324    'tk_tm':                                'tk_TM.UTF-8',
1325    'tl':                                   'tl_PH.ISO8859-1',
1326    'tl_ph':                                'tl_PH.ISO8859-1',
1327    'tn':                                   'tn_ZA.ISO8859-15',
1328    'tn_za':                                'tn_ZA.ISO8859-15',
1329    'tr':                                   'tr_TR.ISO8859-9',
1330    'tr_cy':                                'tr_CY.ISO8859-9',
1331    'tr_tr':                                'tr_TR.ISO8859-9',
1332    'ts':                                   'ts_ZA.ISO8859-1',
1333    'ts_za':                                'ts_ZA.ISO8859-1',
1334    'tt':                                   'tt_RU.TATAR-CYR',
1335    'tt_ru':                                'tt_RU.TATAR-CYR',
1336    'tt_ru.tatarcyr':                       'tt_RU.TATAR-CYR',
1337    'tt_ru@iqtelif':                        'tt_RU.UTF-8@iqtelif',
1338    'turkish':                              'tr_TR.ISO8859-9',
1339    'ug_cn':                                'ug_CN.UTF-8',
1340    'uk':                                   'uk_UA.KOI8-U',
1341    'uk_ua':                                'uk_UA.KOI8-U',
1342    'univ':                                 'en_US.utf',
1343    'universal':                            'en_US.utf',
1344    'universal.utf8@ucs4':                  'en_US.UTF-8',
1345    'unm_us':                               'unm_US.UTF-8',
1346    'ur':                                   'ur_PK.CP1256',
1347    'ur_in':                                'ur_IN.UTF-8',
1348    'ur_pk':                                'ur_PK.CP1256',
1349    'uz':                                   'uz_UZ.UTF-8',
1350    'uz_uz':                                'uz_UZ.UTF-8',
1351    'uz_uz@cyrillic':                       'uz_UZ.UTF-8',
1352    've':                                   've_ZA.UTF-8',
1353    've_za':                                've_ZA.UTF-8',
1354    'vi':                                   'vi_VN.TCVN',
1355    'vi_vn':                                'vi_VN.TCVN',
1356    'vi_vn.tcvn':                           'vi_VN.TCVN',
1357    'vi_vn.tcvn5712':                       'vi_VN.TCVN',
1358    'vi_vn.viscii':                         'vi_VN.VISCII',
1359    'vi_vn.viscii111':                      'vi_VN.VISCII',
1360    'wa':                                   'wa_BE.ISO8859-1',
1361    'wa_be':                                'wa_BE.ISO8859-1',
1362    'wae_ch':                               'wae_CH.UTF-8',
1363    'wal_et':                               'wal_ET.UTF-8',
1364    'wo_sn':                                'wo_SN.UTF-8',
1365    'xh':                                   'xh_ZA.ISO8859-1',
1366    'xh_za':                                'xh_ZA.ISO8859-1',
1367    'yi':                                   'yi_US.CP1255',
1368    'yi_us':                                'yi_US.CP1255',
1369    'yo_ng':                                'yo_NG.UTF-8',
1370    'yue_hk':                               'yue_HK.UTF-8',
1371    'zh':                                   'zh_CN.eucCN',
1372    'zh_cn':                                'zh_CN.gb2312',
1373    'zh_cn.big5':                           'zh_TW.big5',
1374    'zh_cn.euc':                            'zh_CN.eucCN',
1375    'zh_hk':                                'zh_HK.big5hkscs',
1376    'zh_hk.big5hk':                         'zh_HK.big5hkscs',
1377    'zh_sg':                                'zh_SG.GB2312',
1378    'zh_sg.gbk':                            'zh_SG.GBK',
1379    'zh_tw':                                'zh_TW.big5',
1380    'zh_tw.euc':                            'zh_TW.eucTW',
1381    'zh_tw.euctw':                          'zh_TW.eucTW',
1382    'zu':                                   'zu_ZA.ISO8859-1',
1383    'zu_za':                                'zu_ZA.ISO8859-1',
1384}
1385
1386#
1387# This maps Windows language identifiers to locale strings.
1388#
1389# This list has been updated from
1390# http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp
1391# to include every locale up to Windows Vista.
1392#
1393# NOTE: this mapping is incomplete.  If your language is missing, please
1394# submit a bug report to the Python bug tracker at http://bugs.python.org/
1395# Make sure you include the missing language identifier and the suggested
1396# locale code.
1397#
1398
1399windows_locale = {
1400    0x0436: "af_ZA", # Afrikaans
1401    0x041c: "sq_AL", # Albanian
1402    0x0484: "gsw_FR",# Alsatian - France
1403    0x045e: "am_ET", # Amharic - Ethiopia
1404    0x0401: "ar_SA", # Arabic - Saudi Arabia
1405    0x0801: "ar_IQ", # Arabic - Iraq
1406    0x0c01: "ar_EG", # Arabic - Egypt
1407    0x1001: "ar_LY", # Arabic - Libya
1408    0x1401: "ar_DZ", # Arabic - Algeria
1409    0x1801: "ar_MA", # Arabic - Morocco
1410    0x1c01: "ar_TN", # Arabic - Tunisia
1411    0x2001: "ar_OM", # Arabic - Oman
1412    0x2401: "ar_YE", # Arabic - Yemen
1413    0x2801: "ar_SY", # Arabic - Syria
1414    0x2c01: "ar_JO", # Arabic - Jordan
1415    0x3001: "ar_LB", # Arabic - Lebanon
1416    0x3401: "ar_KW", # Arabic - Kuwait
1417    0x3801: "ar_AE", # Arabic - United Arab Emirates
1418    0x3c01: "ar_BH", # Arabic - Bahrain
1419    0x4001: "ar_QA", # Arabic - Qatar
1420    0x042b: "hy_AM", # Armenian
1421    0x044d: "as_IN", # Assamese - India
1422    0x042c: "az_AZ", # Azeri - Latin
1423    0x082c: "az_AZ", # Azeri - Cyrillic
1424    0x046d: "ba_RU", # Bashkir
1425    0x042d: "eu_ES", # Basque - Russia
1426    0x0423: "be_BY", # Belarusian
1427    0x0445: "bn_IN", # Begali
1428    0x201a: "bs_BA", # Bosnian - Cyrillic
1429    0x141a: "bs_BA", # Bosnian - Latin
1430    0x047e: "br_FR", # Breton - France
1431    0x0402: "bg_BG", # Bulgarian
1432#    0x0455: "my_MM", # Burmese - Not supported
1433    0x0403: "ca_ES", # Catalan
1434    0x0004: "zh_CHS",# Chinese - Simplified
1435    0x0404: "zh_TW", # Chinese - Taiwan
1436    0x0804: "zh_CN", # Chinese - PRC
1437    0x0c04: "zh_HK", # Chinese - Hong Kong S.A.R.
1438    0x1004: "zh_SG", # Chinese - Singapore
1439    0x1404: "zh_MO", # Chinese - Macao S.A.R.
1440    0x7c04: "zh_CHT",# Chinese - Traditional
1441    0x0483: "co_FR", # Corsican - France
1442    0x041a: "hr_HR", # Croatian
1443    0x101a: "hr_BA", # Croatian - Bosnia
1444    0x0405: "cs_CZ", # Czech
1445    0x0406: "da_DK", # Danish
1446    0x048c: "gbz_AF",# Dari - Afghanistan
1447    0x0465: "div_MV",# Divehi - Maldives
1448    0x0413: "nl_NL", # Dutch - The Netherlands
1449    0x0813: "nl_BE", # Dutch - Belgium
1450    0x0409: "en_US", # English - United States
1451    0x0809: "en_GB", # English - United Kingdom
1452    0x0c09: "en_AU", # English - Australia
1453    0x1009: "en_CA", # English - Canada
1454    0x1409: "en_NZ", # English - New Zealand
1455    0x1809: "en_IE", # English - Ireland
1456    0x1c09: "en_ZA", # English - South Africa
1457    0x2009: "en_JA", # English - Jamaica
1458    0x2409: "en_CB", # English - Caribbean
1459    0x2809: "en_BZ", # English - Belize
1460    0x2c09: "en_TT", # English - Trinidad
1461    0x3009: "en_ZW", # English - Zimbabwe
1462    0x3409: "en_PH", # English - Philippines
1463    0x4009: "en_IN", # English - India
1464    0x4409: "en_MY", # English - Malaysia
1465    0x4809: "en_IN", # English - Singapore
1466    0x0425: "et_EE", # Estonian
1467    0x0438: "fo_FO", # Faroese
1468    0x0464: "fil_PH",# Filipino
1469    0x040b: "fi_FI", # Finnish
1470    0x040c: "fr_FR", # French - France
1471    0x080c: "fr_BE", # French - Belgium
1472    0x0c0c: "fr_CA", # French - Canada
1473    0x100c: "fr_CH", # French - Switzerland
1474    0x140c: "fr_LU", # French - Luxembourg
1475    0x180c: "fr_MC", # French - Monaco
1476    0x0462: "fy_NL", # Frisian - Netherlands
1477    0x0456: "gl_ES", # Galician
1478    0x0437: "ka_GE", # Georgian
1479    0x0407: "de_DE", # German - Germany
1480    0x0807: "de_CH", # German - Switzerland
1481    0x0c07: "de_AT", # German - Austria
1482    0x1007: "de_LU", # German - Luxembourg
1483    0x1407: "de_LI", # German - Liechtenstein
1484    0x0408: "el_GR", # Greek
1485    0x046f: "kl_GL", # Greenlandic - Greenland
1486    0x0447: "gu_IN", # Gujarati
1487    0x0468: "ha_NG", # Hausa - Latin
1488    0x040d: "he_IL", # Hebrew
1489    0x0439: "hi_IN", # Hindi
1490    0x040e: "hu_HU", # Hungarian
1491    0x040f: "is_IS", # Icelandic
1492    0x0421: "id_ID", # Indonesian
1493    0x045d: "iu_CA", # Inuktitut - Syllabics
1494    0x085d: "iu_CA", # Inuktitut - Latin
1495    0x083c: "ga_IE", # Irish - Ireland
1496    0x0410: "it_IT", # Italian - Italy
1497    0x0810: "it_CH", # Italian - Switzerland
1498    0x0411: "ja_JP", # Japanese
1499    0x044b: "kn_IN", # Kannada - India
1500    0x043f: "kk_KZ", # Kazakh
1501    0x0453: "kh_KH", # Khmer - Cambodia
1502    0x0486: "qut_GT",# K'iche - Guatemala
1503    0x0487: "rw_RW", # Kinyarwanda - Rwanda
1504    0x0457: "kok_IN",# Konkani
1505    0x0412: "ko_KR", # Korean
1506    0x0440: "ky_KG", # Kyrgyz
1507    0x0454: "lo_LA", # Lao - Lao PDR
1508    0x0426: "lv_LV", # Latvian
1509    0x0427: "lt_LT", # Lithuanian
1510    0x082e: "dsb_DE",# Lower Sorbian - Germany
1511    0x046e: "lb_LU", # Luxembourgish
1512    0x042f: "mk_MK", # FYROM Macedonian
1513    0x043e: "ms_MY", # Malay - Malaysia
1514    0x083e: "ms_BN", # Malay - Brunei Darussalam
1515    0x044c: "ml_IN", # Malayalam - India
1516    0x043a: "mt_MT", # Maltese
1517    0x0481: "mi_NZ", # Maori
1518    0x047a: "arn_CL",# Mapudungun
1519    0x044e: "mr_IN", # Marathi
1520    0x047c: "moh_CA",# Mohawk - Canada
1521    0x0450: "mn_MN", # Mongolian - Cyrillic
1522    0x0850: "mn_CN", # Mongolian - PRC
1523    0x0461: "ne_NP", # Nepali
1524    0x0414: "nb_NO", # Norwegian - Bokmal
1525    0x0814: "nn_NO", # Norwegian - Nynorsk
1526    0x0482: "oc_FR", # Occitan - France
1527    0x0448: "or_IN", # Oriya - India
1528    0x0463: "ps_AF", # Pashto - Afghanistan
1529    0x0429: "fa_IR", # Persian
1530    0x0415: "pl_PL", # Polish
1531    0x0416: "pt_BR", # Portuguese - Brazil
1532    0x0816: "pt_PT", # Portuguese - Portugal
1533    0x0446: "pa_IN", # Punjabi
1534    0x046b: "quz_BO",# Quechua (Bolivia)
1535    0x086b: "quz_EC",# Quechua (Ecuador)
1536    0x0c6b: "quz_PE",# Quechua (Peru)
1537    0x0418: "ro_RO", # Romanian - Romania
1538    0x0417: "rm_CH", # Romansh
1539    0x0419: "ru_RU", # Russian
1540    0x243b: "smn_FI",# Sami Finland
1541    0x103b: "smj_NO",# Sami Norway
1542    0x143b: "smj_SE",# Sami Sweden
1543    0x043b: "se_NO", # Sami Northern Norway
1544    0x083b: "se_SE", # Sami Northern Sweden
1545    0x0c3b: "se_FI", # Sami Northern Finland
1546    0x203b: "sms_FI",# Sami Skolt
1547    0x183b: "sma_NO",# Sami Southern Norway
1548    0x1c3b: "sma_SE",# Sami Southern Sweden
1549    0x044f: "sa_IN", # Sanskrit
1550    0x0c1a: "sr_SP", # Serbian - Cyrillic
1551    0x1c1a: "sr_BA", # Serbian - Bosnia Cyrillic
1552    0x081a: "sr_SP", # Serbian - Latin
1553    0x181a: "sr_BA", # Serbian - Bosnia Latin
1554    0x045b: "si_LK", # Sinhala - Sri Lanka
1555    0x046c: "ns_ZA", # Northern Sotho
1556    0x0432: "tn_ZA", # Setswana - Southern Africa
1557    0x041b: "sk_SK", # Slovak
1558    0x0424: "sl_SI", # Slovenian
1559    0x040a: "es_ES", # Spanish - Spain
1560    0x080a: "es_MX", # Spanish - Mexico
1561    0x0c0a: "es_ES", # Spanish - Spain (Modern)
1562    0x100a: "es_GT", # Spanish - Guatemala
1563    0x140a: "es_CR", # Spanish - Costa Rica
1564    0x180a: "es_PA", # Spanish - Panama
1565    0x1c0a: "es_DO", # Spanish - Dominican Republic
1566    0x200a: "es_VE", # Spanish - Venezuela
1567    0x240a: "es_CO", # Spanish - Colombia
1568    0x280a: "es_PE", # Spanish - Peru
1569    0x2c0a: "es_AR", # Spanish - Argentina
1570    0x300a: "es_EC", # Spanish - Ecuador
1571    0x340a: "es_CL", # Spanish - Chile
1572    0x380a: "es_UR", # Spanish - Uruguay
1573    0x3c0a: "es_PY", # Spanish - Paraguay
1574    0x400a: "es_BO", # Spanish - Bolivia
1575    0x440a: "es_SV", # Spanish - El Salvador
1576    0x480a: "es_HN", # Spanish - Honduras
1577    0x4c0a: "es_NI", # Spanish - Nicaragua
1578    0x500a: "es_PR", # Spanish - Puerto Rico
1579    0x540a: "es_US", # Spanish - United States
1580#    0x0430: "", # Sutu - Not supported
1581    0x0441: "sw_KE", # Swahili
1582    0x041d: "sv_SE", # Swedish - Sweden
1583    0x081d: "sv_FI", # Swedish - Finland
1584    0x045a: "syr_SY",# Syriac
1585    0x0428: "tg_TJ", # Tajik - Cyrillic
1586    0x085f: "tmz_DZ",# Tamazight - Latin
1587    0x0449: "ta_IN", # Tamil
1588    0x0444: "tt_RU", # Tatar
1589    0x044a: "te_IN", # Telugu
1590    0x041e: "th_TH", # Thai
1591    0x0851: "bo_BT", # Tibetan - Bhutan
1592    0x0451: "bo_CN", # Tibetan - PRC
1593    0x041f: "tr_TR", # Turkish
1594    0x0442: "tk_TM", # Turkmen - Cyrillic
1595    0x0480: "ug_CN", # Uighur - Arabic
1596    0x0422: "uk_UA", # Ukrainian
1597    0x042e: "wen_DE",# Upper Sorbian - Germany
1598    0x0420: "ur_PK", # Urdu
1599    0x0820: "ur_IN", # Urdu - India
1600    0x0443: "uz_UZ", # Uzbek - Latin
1601    0x0843: "uz_UZ", # Uzbek - Cyrillic
1602    0x042a: "vi_VN", # Vietnamese
1603    0x0452: "cy_GB", # Welsh
1604    0x0488: "wo_SN", # Wolof - Senegal
1605    0x0434: "xh_ZA", # Xhosa - South Africa
1606    0x0485: "sah_RU",# Yakut - Cyrillic
1607    0x0478: "ii_CN", # Yi - PRC
1608    0x046a: "yo_NG", # Yoruba - Nigeria
1609    0x0435: "zu_ZA", # Zulu
1610}
1611
1612def _print_locale():
1613
1614    """ Test function.
1615    """
1616    categories = {}
1617    def _init_categories(categories=categories):
1618        for k,v in globals().items():
1619            if k[:3] == 'LC_':
1620                categories[k] = v
1621    _init_categories()
1622    del categories['LC_ALL']
1623
1624    print('Locale defaults as determined by getdefaultlocale():')
1625    print('-'*72)
1626    lang, enc = getdefaultlocale()
1627    print('Language: ', lang or '(undefined)')
1628    print('Encoding: ', enc or '(undefined)')
1629    print()
1630
1631    print('Locale settings on startup:')
1632    print('-'*72)
1633    for name,category in categories.items():
1634        print(name, '...')
1635        lang, enc = getlocale(category)
1636        print('   Language: ', lang or '(undefined)')
1637        print('   Encoding: ', enc or '(undefined)')
1638        print()
1639
1640    print()
1641    print('Locale settings after calling resetlocale():')
1642    print('-'*72)
1643    resetlocale()
1644    for name,category in categories.items():
1645        print(name, '...')
1646        lang, enc = getlocale(category)
1647        print('   Language: ', lang or '(undefined)')
1648        print('   Encoding: ', enc or '(undefined)')
1649        print()
1650
1651    try:
1652        setlocale(LC_ALL, "")
1653    except:
1654        print('NOTE:')
1655        print('setlocale(LC_ALL, "") does not support the default locale')
1656        print('given in the OS environment variables.')
1657    else:
1658        print()
1659        print('Locale settings after calling setlocale(LC_ALL, ""):')
1660        print('-'*72)
1661        for name,category in categories.items():
1662            print(name, '...')
1663            lang, enc = getlocale(category)
1664            print('   Language: ', lang or '(undefined)')
1665            print('   Encoding: ', enc or '(undefined)')
1666            print()
1667
1668###
1669
1670try:
1671    LC_MESSAGES
1672except NameError:
1673    pass
1674else:
1675    __all__.append("LC_MESSAGES")
1676
1677if __name__=='__main__':
1678    print('Locale aliasing:')
1679    print()
1680    _print_locale()
1681    print()
1682    print('Number formatting:')
1683    print()
1684    _test()
1685