• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Licensed to the Apache Software Foundation (ASF) under one or more
3  *  contributor license agreements.  See the NOTICE file distributed with
4  *  this work for additional information regarding copyright ownership.
5  *  The ASF licenses this file to You under the Apache License, Version 2.0
6  *  (the "License"); you may not use this file except in compliance with
7  *  the License.  You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *  Unless required by applicable law or agreed to in writing, software
12  *  distributed under the License is distributed on an "AS IS" BASIS,
13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *  See the License for the specific language governing permissions and
15  *  limitations under the License.
16  */
17 
18 package java.util;
19 
20 import java.io.IOException;
21 import java.io.ObjectInputStream;
22 import java.io.ObjectOutputStream;
23 import java.io.ObjectStreamField;
24 import java.io.Serializable;
25 import java.nio.charset.StandardCharsets;
26 import libcore.icu.ICU;
27 
28 /**
29  * {@code Locale} represents a language/country/variant combination. Locales are used to
30  * alter the presentation of information such as numbers or dates to suit the conventions
31  * in the region they describe.
32  *
33  * <p>The language codes are two-letter lowercase ISO language codes (such as "en") as defined by
34  * <a href="http://en.wikipedia.org/wiki/ISO_639-1">ISO 639-1</a>.
35  * The country codes are two-letter uppercase ISO country codes (such as "US") as defined by
36  * <a href="http://en.wikipedia.org/wiki/ISO_3166-1_alpha-3">ISO 3166-1</a>.
37  * The variant codes are unspecified.
38  *
39  * <p>Note that Java uses several deprecated two-letter codes. The Hebrew ("he") language
40  * code is rewritten as "iw", Indonesian ("id") as "in", and Yiddish ("yi") as "ji". This
41  * rewriting happens even if you construct your own {@code Locale} object, not just for
42  * instances returned by the various lookup methods.
43  *
44  * <a name="available_locales"></a><h3>Available locales</h3>
45  * <p>This class' constructors do no error checking. You can create a {@code Locale} for languages
46  * and countries that don't exist, and you can create instances for combinations that don't
47  * exist (such as "de_US" for "German as spoken in the US").
48  *
49  * <p>Note that locale data is not necessarily available for any of the locales pre-defined as
50  * constants in this class except for en_US, which is the only locale Java guarantees is always
51  * available.
52  *
53  * <p>It is also a mistake to assume that all devices have the same locales available.
54  * A device sold in the US will almost certainly support en_US and es_US, but not necessarily
55  * any locales with the same language but different countries (such as en_GB or es_ES),
56  * nor any locales for other languages (such as de_DE). The opposite may well be true for a device
57  * sold in Europe.
58  *
59  * <p>You can use {@link Locale#getDefault} to get an appropriate locale for the <i>user</i> of the
60  * device you're running on, or {@link Locale#getAvailableLocales} to get a list of all the locales
61  * available on the device you're running on.
62  *
63  * <a name="locale_data"></a><h3>Locale data</h3>
64  * <p>Note that locale data comes solely from ICU. User-supplied locale service providers (using
65  * the {@code java.text.spi} or {@code java.util.spi} mechanisms) are not supported.
66  *
67  * <p>Here are the versions of ICU (and the corresponding CLDR and Unicode versions) used in
68  * various Android releases:
69  * <table BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY="">
70  * <tr><td>Android 1.5 (Cupcake)/Android 1.6 (Donut)/Android 2.0 (Eclair)</td>
71  *     <td>ICU 3.8</td>
72  *     <td><a href="http://cldr.unicode.org/index/downloads/cldr-1-5">CLDR 1.5</a></td>
73  *     <td><a href="http://www.unicode.org/versions/Unicode5.0.0/">Unicode 5.0</a></td></tr>
74  * <tr><td>Android 2.2 (Froyo)</td>
75  *     <td>ICU 4.2</td>
76  *     <td><a href="http://cldr.unicode.org/index/downloads/cldr-1-7">CLDR 1.7</a></td>
77  *     <td><a href="http://www.unicode.org/versions/Unicode5.1.0/">Unicode 5.1</a></td></tr>
78  * <tr><td>Android 2.3 (Gingerbread)/Android 3.0 (Honeycomb)</td>
79  *     <td>ICU 4.4</td>
80  *     <td><a href="http://cldr.unicode.org/index/downloads/cldr-1-8">CLDR 1.8</a></td>
81  *     <td><a href="http://www.unicode.org/versions/Unicode5.2.0/">Unicode 5.2</a></td></tr>
82  * <tr><td>Android 4.0 (Ice Cream Sandwich)</td>
83  *     <td><a href="http://site.icu-project.org/download/46">ICU 4.6</a></td>
84  *     <td><a href="http://cldr.unicode.org/index/downloads/cldr-1-9">CLDR 1.9</a></td>
85  *     <td><a href="http://www.unicode.org/versions/Unicode6.0.0/">Unicode 6.0</a></td></tr>
86  * <tr><td>Android 4.1 (Jelly Bean)</td>
87  *     <td><a href="http://site.icu-project.org/download/48">ICU 4.8</a></td>
88  *     <td><a href="http://cldr.unicode.org/index/downloads/cldr-2-0">CLDR 2.0</a></td>
89  *     <td><a href="http://www.unicode.org/versions/Unicode6.0.0/">Unicode 6.0</a></td></tr>
90  * <tr><td>Android 4.3 (Jelly Bean MR2)</td>
91  *     <td><a href="http://site.icu-project.org/download/50">ICU 50</a></td>
92  *     <td><a href="http://cldr.unicode.org/index/downloads/cldr-22-1">CLDR 22.1</a></td>
93  *     <td><a href="http://www.unicode.org/versions/Unicode6.2.0/">Unicode 6.2</a></td></tr>
94  * <tr><td>Android 4.4 (KitKat)</td>
95  *     <td><a href="http://site.icu-project.org/download/51">ICU 51</a></td>
96  *     <td><a href="http://cldr.unicode.org/index/downloads/cldr-23">CLDR 23</a></td>
97  *     <td><a href="http://www.unicode.org/versions/Unicode6.2.0/">Unicode 6.2</a></td></tr>
98  * <tr><td>Android 5.0 (Lollipop)</td>
99  *     <td><a href="http://site.icu-project.org/download/53">ICU 53</a></td>
100  *     <td><a href="http://cldr.unicode.org/index/downloads/cldr-25">CLDR 25</a></td>
101  *     <td><a href="http://www.unicode.org/versions/Unicode6.3.0/">Unicode 6.3</a></td></tr>
102  * <tr><td>Android 6.0 (Marshmallow)</td>
103  *     <td><a href="http://site.icu-project.org/download/55">ICU 55.1</a></td>
104  *     <td><a href="http://cldr.unicode.org/index/downloads/cldr-27">CLDR 27.0.1</a></td>
105  *     <td><a href="http://www.unicode.org/versions/Unicode7.0.0/">Unicode 7.0</a></td></tr>
106  * </table>
107  *
108  * <a name="default_locale"></a><h3>Be wary of the default locale</h3>
109  * <p>Note that there are many convenience methods that automatically use the default locale, but
110  * using them may lead to subtle bugs.
111  *
112  * <p>The default locale is appropriate for tasks that involve presenting data to the user. In
113  * this case, you want to use the user's date/time formats, number
114  * formats, rules for conversion to lowercase, and so on. In this case, it's safe to use the
115  * convenience methods.
116  *
117  * <p>The default locale is <i>not</i> appropriate for machine-readable output. The best choice
118  * there is usually {@code Locale.US}&nbsp;&ndash; this locale is guaranteed to be available on all
119  * devices, and the fact that it has no surprising special cases and is frequently used (especially
120  * for computer-computer communication) means that it tends to be the most efficient choice too.
121  *
122  * <p>A common mistake is to implicitly use the default locale when producing output meant to be
123  * machine-readable. This tends to work on the developer's test devices (especially because so many
124  * developers use en_US), but fails when run on a device whose user is in a more complex locale.
125  *
126  * <p>For example, if you're formatting integers some locales will use non-ASCII decimal
127  * digits. As another example, if you're formatting floating-point numbers some locales will use
128  * {@code ','} as the decimal point and {@code '.'} for digit grouping. That's correct for
129  * human-readable output, but likely to cause problems if presented to another
130  * computer ({@link Double#parseDouble} can't parse such a number, for example).
131  * You should also be wary of the {@link String#toLowerCase} and
132  * {@link String#toUpperCase} overloads that don't take a {@code Locale}: in Turkey, for example,
133  * the characters {@code 'i'} and {@code 'I'} won't be converted to {@code 'I'} and {@code 'i'}.
134  * This is the correct behavior for Turkish text (such as user input), but inappropriate for, say,
135  * HTTP headers.
136  */
137 public final class Locale implements Cloneable, Serializable {
138 
139     private static final long serialVersionUID = 9149081749638150636L;
140 
141     /**
142      * Locale constant for en_CA.
143      */
144     public static final Locale CANADA = new Locale(true, "en", "CA");
145 
146     /**
147      * Locale constant for fr_CA.
148      */
149     public static final Locale CANADA_FRENCH = new Locale(true, "fr", "CA");
150 
151     /**
152      * Locale constant for zh_CN.
153      */
154     public static final Locale CHINA = new Locale(true, "zh", "CN");
155 
156     /**
157      * Locale constant for zh.
158      */
159     public static final Locale CHINESE = new Locale(true, "zh", "");
160 
161     /**
162      * Locale constant for en.
163      */
164     public static final Locale ENGLISH = new Locale(true, "en", "");
165 
166     /**
167      * Locale constant for fr_FR.
168      */
169     public static final Locale FRANCE = new Locale(true, "fr", "FR");
170 
171     /**
172      * Locale constant for fr.
173      */
174     public static final Locale FRENCH = new Locale(true, "fr", "");
175 
176     /**
177      * Locale constant for de.
178      */
179     public static final Locale GERMAN = new Locale(true, "de", "");
180 
181     /**
182      * Locale constant for de_DE.
183      */
184     public static final Locale GERMANY = new Locale(true, "de", "DE");
185 
186     /**
187      * Locale constant for it.
188      */
189     public static final Locale ITALIAN = new Locale(true, "it", "");
190 
191     /**
192      * Locale constant for it_IT.
193      */
194     public static final Locale ITALY = new Locale(true, "it", "IT");
195 
196     /**
197      * Locale constant for ja_JP.
198      */
199     public static final Locale JAPAN = new Locale(true, "ja", "JP");
200 
201     /**
202      * Locale constant for ja.
203      */
204     public static final Locale JAPANESE = new Locale(true, "ja", "");
205 
206     /**
207      * Locale constant for ko_KR.
208      */
209     public static final Locale KOREA = new Locale(true, "ko", "KR");
210 
211     /**
212      * Locale constant for ko.
213      */
214     public static final Locale KOREAN = new Locale(true, "ko", "");
215 
216     /**
217      * Locale constant for zh_CN.
218      */
219     public static final Locale PRC = new Locale(true, "zh", "CN");
220 
221     /**
222      * Locale constant for the root locale. The root locale has an empty language,
223      * country, and variant.
224      *
225      * @since 1.6
226      */
227     public static final Locale ROOT = new Locale(true, "", "");
228 
229     /**
230      * Locale constant for zh_CN.
231      */
232     public static final Locale SIMPLIFIED_CHINESE = new Locale(true, "zh", "CN");
233 
234     /**
235      * Locale constant for zh_TW.
236      */
237     public static final Locale TAIWAN = new Locale(true, "zh", "TW");
238 
239     /**
240      * Locale constant for zh_TW.
241      */
242     public static final Locale TRADITIONAL_CHINESE = new Locale(true, "zh", "TW");
243 
244     /**
245      * Locale constant for en_GB.
246      */
247     public static final Locale UK = new Locale(true, "en", "GB");
248 
249     /**
250      * Locale constant for en_US.
251      */
252     public static final Locale US = new Locale(true, "en", "US");
253 
254     /**
255      * BCP-47 extension identifier (or "singleton") for the private
256      * use extension.
257      *
258      * See {@link #getExtension(char)} and {@link Builder#setExtension(char, String)}.
259      *
260      * @since 1.7
261      */
262     public static final char PRIVATE_USE_EXTENSION = 'x';
263 
264     /**
265      * BCP-47 extension identifier (or "singleton") for the unicode locale extension.
266      *
267      *
268      * See {@link #getExtension(char)} and {@link Builder#setExtension(char, String)}.
269      *
270      * @since 1.7
271      */
272     public static final char UNICODE_LOCALE_EXTENSION = 'u';
273 
274     /**
275      * ISO 639-3 generic code for undetermined languages.
276      */
277     private static final String UNDETERMINED_LANGUAGE = "und";
278 
279 
280     /**
281      * Map of grandfathered language tags to their modern replacements.
282      */
283     private static final TreeMap<String, String> GRANDFATHERED_LOCALES;
284 
285     static {
286         GRANDFATHERED_LOCALES = new TreeMap<String, String>(String.CASE_INSENSITIVE_ORDER);
287 
288         // From http://tools.ietf.org/html/bcp47
289         //
290         // grandfathered = irregular           ; non-redundant tags registered
291         //               / regular             ; during the RFC 3066 era
292         //  irregular =
293         GRANDFATHERED_LOCALES.put("en-GB-oed", "en-GB-x-oed");
294         GRANDFATHERED_LOCALES.put("i-ami", "ami");
295         GRANDFATHERED_LOCALES.put("i-bnn", "bnn");
296         GRANDFATHERED_LOCALES.put("i-default", "en-x-i-default");
297         GRANDFATHERED_LOCALES.put("i-enochian", "und-x-i-enochian");
298         GRANDFATHERED_LOCALES.put("i-hak", "hak");
299         GRANDFATHERED_LOCALES.put("i-klingon", "tlh");
300         GRANDFATHERED_LOCALES.put("i-lux", "lb");
301         GRANDFATHERED_LOCALES.put("i-mingo", "see-x-i-mingo");
302         GRANDFATHERED_LOCALES.put("i-navajo", "nv");
303         GRANDFATHERED_LOCALES.put("i-pwn", "pwn");
304         GRANDFATHERED_LOCALES.put("i-tao", "tao");
305         GRANDFATHERED_LOCALES.put("i-tay", "tay");
306         GRANDFATHERED_LOCALES.put("i-tsu", "tsu");
307         GRANDFATHERED_LOCALES.put("sgn-BE-FR", "sfb");
308         GRANDFATHERED_LOCALES.put("sgn-BE-NL", "vgt");
309         GRANDFATHERED_LOCALES.put("sgn-CH-DE", "sgg");
310 
311         // regular =
312         GRANDFATHERED_LOCALES.put("art-lojban", "jbo");
313         GRANDFATHERED_LOCALES.put("cel-gaulish", "xtg-x-cel-gaulish");
314         GRANDFATHERED_LOCALES.put("no-bok", "nb");
315         GRANDFATHERED_LOCALES.put("no-nyn", "nn");
316         GRANDFATHERED_LOCALES.put("zh-guoyu", "cmn");
317         GRANDFATHERED_LOCALES.put("zh-hakka", "hak");
318         GRANDFATHERED_LOCALES.put("zh-min", "nan-x-zh-min");
319         GRANDFATHERED_LOCALES.put("zh-min-nan", "nan");
320         GRANDFATHERED_LOCALES.put("zh-xiang", "hsn");
321     }
322 
323     private static class NoImagePreloadHolder {
324         /**
325          * The default locale, returned by {@code Locale.getDefault()}.
326          * Initialize the default locale from the system properties.
327          */
328         private static Locale defaultLocale = Locale.getDefaultLocaleFromSystemProperties();
329     }
330 
331     /**
332      * Returns the default locale from system properties.
333      *
334      * @hide visible for testing.
335      */
getDefaultLocaleFromSystemProperties()336     public static Locale getDefaultLocaleFromSystemProperties() {
337         final String languageTag = System.getProperty("user.locale", "");
338 
339         final Locale defaultLocale;
340         if (!languageTag.isEmpty()) {
341             defaultLocale = Locale.forLanguageTag(languageTag);
342         } else {
343             String language = System.getProperty("user.language", "en");
344             String region = System.getProperty("user.region", "US");
345             String variant = System.getProperty("user.variant", "");
346             defaultLocale = new Locale(language, region, variant);
347         }
348 
349         return defaultLocale;
350     }
351 
352     /**
353      * A class that helps construct {@link Locale} instances.
354      *
355      * Unlike the public {@code Locale} constructors, the methods of this class
356      * perform much stricter checks on their input.
357      *
358      * Validity checks on the {@code language}, {@code country}, {@code variant}
359      * and {@code extension} values are carried out as per the
360      * <a href="https://tools.ietf.org/html/bcp47">BCP-47</a> specification.
361      *
362      * In addition, we treat the <a href="http://www.unicode.org/reports/tr35/">
363      * Unicode locale extension</a> specially and provide methods to manipulate
364      * the structured state (keywords and attributes) specified therein.
365      *
366      * @since 1.7
367      */
368     public static final class Builder {
369         private String language;
370         private String region;
371         private String variant;
372         private String script;
373 
374         private final Set<String> attributes;
375         private final Map<String, String> keywords;
376         private final Map<Character, String> extensions;
377 
Builder()378         public Builder() {
379             language = region = variant = script = "";
380 
381             // NOTE: We use sorted maps in the builder & the locale class itself
382             // because serialized forms of the unicode locale extension (and
383             // of the extension map itself) are specified to be in alphabetic
384             // order of keys.
385             attributes = new TreeSet<String>();
386             keywords = new TreeMap<String, String>();
387             extensions = new TreeMap<Character, String>();
388         }
389 
390         /**
391          * Sets the locale language. If {@code language} is {@code null} or empty, the
392          * previous value is cleared.
393          *
394          * As per BCP-47, the language must be between 2 and 3 ASCII characters
395          * in length and must only contain characters in the range {@code [a-zA-Z]}.
396          *
397          * This value is usually an <a href="http://www.loc.gov/standards/iso639-2/">
398          * ISO-639-2</a> alpha-2 or alpha-3 code, though no explicit checks are
399          * carried out that it's a valid code in that namespace.
400          *
401          * Values are normalized to lower case.
402          *
403          * Note that we don't support BCP-47 "extlang" languages because they were
404          * only ever used to substitute for a lack of 3 letter language codes.
405          *
406          * @throws IllformedLocaleException if the language was invalid.
407          */
setLanguage(String language)408         public Builder setLanguage(String language) {
409             this.language = normalizeAndValidateLanguage(language, true /* strict */);
410             return this;
411         }
412 
normalizeAndValidateLanguage(String language, boolean strict)413         private static String normalizeAndValidateLanguage(String language, boolean strict) {
414             if (language == null || language.isEmpty()) {
415                 return "";
416             }
417 
418             final String lowercaseLanguage = language.toLowerCase(Locale.ROOT);
419             if (!isValidBcp47Alpha(lowercaseLanguage, 2, 3)) {
420                 if (strict) {
421                     throw new IllformedLocaleException("Invalid language: " + language);
422                 } else {
423                     return UNDETERMINED_LANGUAGE;
424                 }
425             }
426 
427             return lowercaseLanguage;
428         }
429 
430         /**
431          * Set the state of this builder to the parsed contents of the BCP-47 language
432          * tag {@code languageTag}.
433          *
434          * This method is equivalent to a call to {@link #clear} if {@code languageTag}
435          * is {@code null} or empty.
436          *
437          * <b>NOTE:</b> In contrast to {@link Locale#forLanguageTag(String)}, which
438          * simply ignores malformed input, this method will throw an exception if
439          * its input is malformed.
440          *
441          * @throws IllformedLocaleException if {@code languageTag} is not a well formed
442          *         BCP-47 tag.
443          */
setLanguageTag(String languageTag)444         public Builder setLanguageTag(String languageTag) {
445             if (languageTag == null || languageTag.isEmpty()) {
446                 clear();
447                 return this;
448             }
449 
450             final Locale fromIcu = forLanguageTag(languageTag, true /* strict */);
451             // When we ask ICU for strict parsing, it might return a null locale
452             // if the language tag is malformed.
453             if (fromIcu == null) {
454                 throw new IllformedLocaleException("Invalid languageTag: " + languageTag);
455             }
456 
457             setLocale(fromIcu);
458             return this;
459         }
460 
461         /**
462          * Sets the locale region. If {@code region} is {@code null} or empty, the
463          * previous value is cleared.
464          *
465          * As per BCP-47, the region must either be a 2 character ISO-3166-1 code
466          * (each character in the range [a-zA-Z]) OR a 3 digit UN M.49 code.
467          *
468          * Values are normalized to upper case.
469          *
470          * @throws IllformedLocaleException if {@code} region is invalid.
471          */
setRegion(String region)472         public Builder setRegion(String region) {
473             this.region = normalizeAndValidateRegion(region, true /* strict */);
474             return this;
475         }
476 
normalizeAndValidateRegion(String region, boolean strict)477         private static String normalizeAndValidateRegion(String region, boolean strict) {
478             if (region == null || region.isEmpty()) {
479                 return "";
480             }
481 
482             final String uppercaseRegion = region.toUpperCase(Locale.ROOT);
483             if (!isValidBcp47Alpha(uppercaseRegion, 2, 2) &&
484                     !isUnM49AreaCode(uppercaseRegion)) {
485                 if (strict) {
486                     throw new IllformedLocaleException("Invalid region: " + region);
487                 } else {
488                     return "";
489                 }
490             }
491 
492             return uppercaseRegion;
493         }
494 
495         /**
496          * Sets the locale variant. If {@code variant} is {@code null} or empty,
497          * the previous value is cleared.
498          *
499          * The input string my consist of one or more variants separated by
500          * valid separators ('-' or '_').
501          *
502          * As per BCP-47, each variant must be between 5 and 8 alphanumeric characters
503          * in length (each character in the range {@code [a-zA-Z0-9]}) but
504          * can be exactly 4 characters in length if the first character is a digit.
505          *
506          * Note that this is a much stricter interpretation of {@code variant}
507          * than the public {@code Locale} constructors. The latter allowed free form
508          * variants.
509          *
510          * Variants are case sensitive and all separators are normalized to {@code '_'}.
511          *
512          * @throws IllformedLocaleException if {@code} variant is invalid.
513          */
setVariant(String variant)514         public Builder setVariant(String variant) {
515             this.variant = normalizeAndValidateVariant(variant);
516             return this;
517         }
518 
normalizeAndValidateVariant(String variant)519         private static String normalizeAndValidateVariant(String variant) {
520             if (variant == null || variant.isEmpty()) {
521                 return "";
522             }
523 
524             // Note that unlike extensions, we canonicalize to lower case alphabets
525             // and underscores instead of hyphens.
526             final String normalizedVariant = variant.replace('-', '_');
527             String[] subTags = normalizedVariant.split("_");
528 
529             for (String subTag : subTags) {
530                 if (!isValidVariantSubtag(subTag)) {
531                     throw new IllformedLocaleException("Invalid variant: " + variant);
532                 }
533             }
534 
535             return normalizedVariant;
536         }
537 
isValidVariantSubtag(String subTag)538         private static boolean isValidVariantSubtag(String subTag) {
539             // The BCP-47 spec states that :
540             // - Subtags can be between [5, 8] alphanumeric chars in length.
541             // - Subtags that start with a number are allowed to be 4 chars in length.
542             if (subTag.length() >= 5 && subTag.length() <= 8) {
543                 if (isAsciiAlphaNum(subTag)) {
544                     return true;
545                 }
546             } else if (subTag.length() == 4) {
547                 final char firstChar = subTag.charAt(0);
548                 if ((firstChar >= '0' && firstChar <= '9') && isAsciiAlphaNum(subTag)) {
549                     return true;
550                 }
551             }
552 
553             return false;
554         }
555 
556         /**
557          * Sets the locale script. If {@code script} is {@code null} or empty,
558          * the previous value is cleared.
559          *
560          * As per BCP-47, the script must be 4 characters in length, and
561          * each character in the range {@code [a-zA-Z]}.
562          *
563          * A script usually represents a valid ISO 15924 script code, though no
564          * other registry or validity checks are performed.
565          *
566          * Scripts are normalized to title cased values.
567          *
568          * @throws IllformedLocaleException if {@code script} is invalid.
569          */
setScript(String script)570         public Builder setScript(String script) {
571             this.script = normalizeAndValidateScript(script, true /* strict */);
572             return this;
573         }
574 
normalizeAndValidateScript(String script, boolean strict)575         private static String normalizeAndValidateScript(String script, boolean strict) {
576             if (script == null || script.isEmpty()) {
577                 return "";
578             }
579 
580             if (!isValidBcp47Alpha(script, 4, 4)) {
581                 if (strict) {
582                     throw new IllformedLocaleException("Invalid script: " + script);
583                 } else {
584                     return "";
585                 }
586             }
587 
588             return titleCaseAsciiWord(script);
589         }
590 
591         /**
592          * Sets the state of the builder to the {@link Locale} represented by
593          * {@code locale}.
594          *
595          * Note that the locale's language, region and variant are validated as per
596          * the rules specified in {@link #setLanguage}, {@link #setRegion} and
597          * {@link #setVariant}.
598          *
599          * All existing builder state is discarded.
600          *
601          * @throws IllformedLocaleException if {@code locale} is invalid.
602          * @throws NullPointerException if {@code locale} is null.
603          */
setLocale(Locale locale)604         public Builder setLocale(Locale locale) {
605             if (locale == null) {
606                 throw new NullPointerException("locale == null");
607             }
608 
609             // Make copies of the existing values so that we don't partially
610             // update the state if we encounter an error.
611             final String backupLanguage = language;
612             final String backupRegion = region;
613             final String backupVariant = variant;
614 
615             try {
616                 setLanguage(locale.getLanguage());
617                 setRegion(locale.getCountry());
618                 setVariant(locale.getVariant());
619             } catch (IllformedLocaleException ifle) {
620                 language = backupLanguage;
621                 region = backupRegion;
622                 variant = backupVariant;
623 
624                 throw ifle;
625             }
626 
627             // The following values can be set only via the builder class, so
628             // there's no need to normalize them or check their validity.
629 
630             this.script = locale.getScript();
631 
632             extensions.clear();
633             extensions.putAll(locale.extensions);
634 
635             keywords.clear();
636             keywords.putAll(locale.unicodeKeywords);
637 
638             attributes.clear();
639             attributes.addAll(locale.unicodeAttributes);
640 
641             return this;
642         }
643 
644         /**
645          * Adds the specified attribute to the list of attributes in the unicode
646          * locale extension.
647          *
648          * Attributes must be between 3 and 8 characters in length, and each character
649          * must be in the range {@code [a-zA-Z0-9]}.
650          *
651          * Attributes are normalized to lower case values. All added attributes and
652          * keywords are combined to form a complete unicode locale extension on
653          * {@link Locale} objects built by this builder, and accessible via
654          * {@link Locale#getExtension(char)} with the {@link Locale#UNICODE_LOCALE_EXTENSION}
655          * key.
656          *
657          * @throws IllformedLocaleException if {@code attribute} is invalid.
658          * @throws NullPointerException if {@code attribute} is null.
659          */
addUnicodeLocaleAttribute(String attribute)660         public Builder addUnicodeLocaleAttribute(String attribute) {
661             if (attribute == null) {
662                 throw new NullPointerException("attribute == null");
663             }
664 
665             final String lowercaseAttribute = attribute.toLowerCase(Locale.ROOT);
666             if (!isValidBcp47Alphanum(lowercaseAttribute, 3, 8)) {
667                 throw new IllformedLocaleException("Invalid locale attribute: " + attribute);
668             }
669 
670             attributes.add(lowercaseAttribute);
671 
672             return this;
673         }
674 
675         /**
676          * Removes an attribute from the list of attributes in the unicode locale
677          * extension.
678          *
679          * {@code attribute} must be valid as per the rules specified in
680          * {@link #addUnicodeLocaleAttribute}.
681          *
682          * This method has no effect if {@code attribute} hasn't already been
683          * added.
684          *
685          * @throws IllformedLocaleException if {@code attribute} is invalid.
686          * @throws NullPointerException if {@code attribute} is null.
687          */
removeUnicodeLocaleAttribute(String attribute)688         public Builder removeUnicodeLocaleAttribute(String attribute) {
689             if (attribute == null) {
690                 throw new NullPointerException("attribute == null");
691             }
692 
693             // Weirdly, remove is specified to check whether the attribute
694             // is valid, so we have to perform the full alphanumeric check here.
695             final String lowercaseAttribute = attribute.toLowerCase(Locale.ROOT);
696             if (!isValidBcp47Alphanum(lowercaseAttribute, 3, 8)) {
697                 throw new IllformedLocaleException("Invalid locale attribute: " + attribute);
698             }
699 
700             attributes.remove(attribute);
701             return this;
702         }
703 
704         /**
705          * Sets the extension identified by {@code key} to {@code value}.
706          *
707          * {@code key} must be in the range {@code [a-zA-Z0-9]}.
708          *
709          * If {@code value} is {@code null} or empty, the extension is removed.
710          *
711          * In the general case, {@code value} must be a series of subtags separated
712          * by ({@code "-"} or {@code "_"}). Each subtag must be between
713          * 2 and 8 characters in length, and each character in the subtag must be in
714          * the range {@code [a-zA-Z0-9]}.
715          *
716          * <p>
717          * There are two special cases :
718          * <li>
719          *     <ul>
720          *         The unicode locale extension
721          *         ({@code key == 'u'}, {@link Locale#UNICODE_LOCALE_EXTENSION}) : Setting
722          *         the unicode locale extension results in all existing keyword and attribute
723          *         state being replaced by the parsed result of {@code value}. For example,
724          *         {@code  builder.setExtension('u', "baaaz-baaar-fo-baar-ba-baaz")}
725          *         is equivalent to:
726          *         <pre>
727          *             builder.addUnicodeLocaleAttribute("baaaz");
728          *             builder.addUnicodeLocaleAttribute("baaar");
729          *             builder.setUnicodeLocaleKeyword("fo", "baar");
730          *             builder.setUnicodeLocaleKeyword("ba", "baaa");
731          *         </pre>
732          *     </ul>
733          *     <ul>
734          *         The private use extension
735          *         ({@code key == 'x'}, {@link Locale#PRIVATE_USE_EXTENSION}) : Each subtag in a
736          *         private use extension can be between 1 and 8 characters in length (in contrast
737          *         to a minimum length of 2 for all other extensions).
738          *     </ul>
739          * </li>
740          *
741          * @throws IllformedLocaleException if {@code value} is invalid.
742          */
setExtension(char key, String value)743         public Builder setExtension(char key, String value) {
744             if (value == null || value.isEmpty()) {
745                 extensions.remove(key);
746                 return this;
747             }
748 
749             final String normalizedValue = value.toLowerCase(Locale.ROOT).replace('_', '-');
750             final String[] subtags = normalizedValue.split("-");
751             final char normalizedKey = Character.toLowerCase(key);
752 
753             // Lengths for subtags in the private use extension should be [1, 8] chars.
754             // For all other extensions, they should be [2, 8] chars.
755             //
756             // http://www.rfc-editor.org/rfc/bcp/bcp47.txt
757             final int minimumLength = (normalizedKey == PRIVATE_USE_EXTENSION) ? 1 : 2;
758             for (String subtag : subtags) {
759                 if (!isValidBcp47Alphanum(subtag, minimumLength, 8)) {
760                     throw new IllformedLocaleException(
761                             "Invalid private use extension : " + value);
762                 }
763             }
764 
765             // We need to take special action in the case of unicode extensions,
766             // since we claim to understand their keywords and attributes.
767             if (normalizedKey == UNICODE_LOCALE_EXTENSION) {
768                 // First clear existing attributes and keywords.
769                 extensions.clear();
770                 attributes.clear();
771 
772                 parseUnicodeExtension(subtags, keywords, attributes);
773             } else {
774                 extensions.put(normalizedKey, normalizedValue);
775             }
776 
777             return this;
778         }
779 
780         /**
781          * Clears all extensions from this builder. Note that this also implicitly
782          * clears all state related to the unicode locale extension; all attributes
783          * and keywords set by {@link #addUnicodeLocaleAttribute} and
784          * {@link #setUnicodeLocaleKeyword} are cleared.
785          */
clearExtensions()786         public Builder clearExtensions() {
787             extensions.clear();
788             attributes.clear();
789             keywords.clear();
790             return this;
791         }
792 
793         /**
794          * Adds a key / type pair to the list of unicode locale extension keys.
795          *
796          * {@code key} must be 2 characters in length, and each character must be
797          * in the range {@code [a-zA-Z0-9]}.
798          *
799          * {#code type} can either be empty, or a series of one or more subtags
800          * separated by a separator ({@code "-"} or {@code "_"}). Each subtag must
801          * be between 3 and 8 characters in length and each character in the subtag
802          * must be in the range {@code [a-zA-Z0-9]}.
803          *
804          * Note that the type is normalized to lower case, and all separators
805          * are normalized to {@code "-"}. All added attributes and
806          * keywords are combined to form a complete unicode locale extension on
807          * {@link Locale} objects built by this builder, and accessible via
808          * {@link Locale#getExtension(char)} with the {@link Locale#UNICODE_LOCALE_EXTENSION}
809          * key.
810          *
811          * @throws IllformedLocaleException if {@code key} or {@code value} are
812          *         invalid.
813          */
setUnicodeLocaleKeyword(String key, String type)814         public Builder setUnicodeLocaleKeyword(String key, String type) {
815             if (key == null) {
816                 throw new NullPointerException("key == null");
817             }
818 
819             if (type == null && keywords != null) {
820                 keywords.remove(key);
821                 return this;
822             }
823 
824             final String lowerCaseKey = key.toLowerCase(Locale.ROOT);
825             // The key must be exactly two alphanumeric characters.
826             if (lowerCaseKey.length() != 2 || !isAsciiAlphaNum(lowerCaseKey)) {
827                 throw new IllformedLocaleException("Invalid unicode locale keyword: " + key);
828             }
829 
830             // The type can be one or more alphanumeric strings of length [3, 8] characters,
831             // separated by a separator char, which is one of "_" or "-". Though the spec
832             // doesn't require it, we normalize all "_" to "-" to make the rest of our
833             // processing easier.
834             final String lowerCaseType = type.toLowerCase(Locale.ROOT).replace("_", "-");
835             if (!isValidTypeList(lowerCaseType)) {
836                 throw new IllformedLocaleException("Invalid unicode locale type: " + type);
837             }
838 
839             // Everything checks out fine, add the <key, type> mapping to the list.
840             keywords.put(lowerCaseKey, lowerCaseType);
841 
842             return this;
843         }
844 
845         /**
846          * Clears all existing state from this builder.
847          */
clear()848         public Builder clear() {
849             clearExtensions();
850             language = region = variant = script = "";
851 
852             return this;
853         }
854 
855         /**
856          * Constructs a locale from the existing state of the builder. Note that this
857          * method is guaranteed to succeed since field validity checks are performed
858          * at the point of setting them.
859          */
build()860         public Locale build() {
861             // NOTE: We need to make a copy of attributes, keywords and extensions
862             // because the RI allows this builder to reused.
863             return new Locale(language, region, variant, script,
864                     attributes, keywords, extensions,
865                     true /* has validated fields */);
866         }
867     }
868 
869     /**
870      * Returns a locale for a given BCP-47 language tag. This method is more
871      * lenient than {@link Builder#setLanguageTag}. For a given language tag, parsing
872      * will proceed up to the first malformed subtag. All subsequent tags are discarded.
873      * Note that language tags use {@code -} rather than {@code _}, for example {@code en-US}.
874      *
875      * @throws NullPointerException if {@code languageTag} is {@code null}.
876      *
877      * @since 1.7
878      */
forLanguageTag(String languageTag)879     public static Locale forLanguageTag(String languageTag) {
880         if (languageTag == null) {
881             throw new NullPointerException("languageTag == null");
882         }
883 
884         return forLanguageTag(languageTag, false /* strict */);
885     }
886 
887     private transient String countryCode;
888     private transient String languageCode;
889     private transient String variantCode;
890     private transient String scriptCode;
891 
892     /* Sorted, Unmodifiable */
893     private transient Set<String> unicodeAttributes;
894     /* Sorted, Unmodifiable */
895     private transient Map<String, String> unicodeKeywords;
896     /* Sorted, Unmodifiable */
897     private transient Map<Character, String> extensions;
898 
899     /**
900      * Whether this instance was constructed from a builder. We can make
901      * stronger assumptions about the validity of Locale fields if this was
902      * constructed by a builder.
903      */
904     private transient final boolean hasValidatedFields;
905 
906     private transient String cachedToStringResult;
907     private transient String cachedLanguageTag;
908     private transient String cachedIcuLocaleId;
909 
910     /**
911      * There's a circular dependency between toLowerCase/toUpperCase and
912      * Locale.US. Work around this by avoiding these methods when constructing
913      * the built-in locales.
914      */
Locale(boolean hasValidatedFields, String lowerCaseLanguageCode, String upperCaseCountryCode)915     private Locale(boolean hasValidatedFields, String lowerCaseLanguageCode,
916             String upperCaseCountryCode) {
917         this.languageCode = lowerCaseLanguageCode;
918         this.countryCode = upperCaseCountryCode;
919         this.variantCode = "";
920         this.scriptCode = "";
921 
922         this.unicodeAttributes = Collections.EMPTY_SET;
923         this.unicodeKeywords = Collections.EMPTY_MAP;
924         this.extensions = Collections.EMPTY_MAP;
925 
926         this.hasValidatedFields = hasValidatedFields;
927     }
928 
929     /**
930      * Constructs a new {@code Locale} using the specified language.
931      */
Locale(String language)932     public Locale(String language) {
933         this(language, "", "", "", Collections.EMPTY_SET, Collections.EMPTY_MAP,
934                 Collections.EMPTY_MAP, false /* has validated fields */);
935     }
936 
937     /**
938      * Constructs a new {@code Locale} using the specified language and country codes.
939      */
Locale(String language, String country)940     public Locale(String language, String country) {
941         this(language, country, "", "", Collections.EMPTY_SET, Collections.EMPTY_MAP,
942                 Collections.EMPTY_MAP, false /* has validated fields */);
943     }
944 
945     /**
946      * Required by libcore.icu.ICU.
947      *
948      * @hide
949      */
Locale(String language, String country, String variant, String scriptCode, Set<String> unicodeAttributes, Map<String, String> unicodeKeywords, Map<Character, String> extensions, boolean hasValidatedFields)950     public Locale(String language, String country, String variant, String scriptCode,
951             /* nonnull */ Set<String> unicodeAttributes,
952             /* nonnull */ Map<String, String> unicodeKeywords,
953             /* nonnull */ Map<Character, String> extensions,
954             boolean hasValidatedFields) {
955         if (language == null || country == null || variant == null) {
956             throw new NullPointerException("language=" + language +
957                     ",country=" + country +
958                     ",variant=" + variant);
959         }
960 
961         if (hasValidatedFields) {
962             this.languageCode = adjustLanguageCode(language);
963             this.countryCode = country;
964             this.variantCode = variant;
965         } else {
966             if (language.isEmpty() && country.isEmpty()) {
967                 languageCode = "";
968                 countryCode = "";
969                 variantCode = variant;
970             } else {
971                 languageCode = adjustLanguageCode(language);
972                 countryCode = country.toUpperCase(Locale.US);
973                 variantCode = variant;
974             }
975         }
976 
977         this.scriptCode = scriptCode;
978 
979         if (hasValidatedFields) {
980             Set<String> attribsCopy = new TreeSet<String>(unicodeAttributes);
981             Map<String, String> keywordsCopy = new TreeMap<String, String>(unicodeKeywords);
982             Map<Character, String> extensionsCopy = new TreeMap<Character, String>(extensions);
983 
984             // We need to transform the list of attributes & keywords set on the
985             // builder to a unicode locale extension. i.e, if we have any keywords
986             // or attributes set, Locale#getExtension('u') should return a well
987             // formed extension.
988             addUnicodeExtensionToExtensionsMap(attribsCopy, keywordsCopy, extensionsCopy);
989 
990             this.unicodeAttributes = Collections.unmodifiableSet(attribsCopy);
991             this.unicodeKeywords = Collections.unmodifiableMap(keywordsCopy);
992             this.extensions = Collections.unmodifiableMap(extensionsCopy);
993         } else {
994 
995             // The locales ja_JP_JP and th_TH_TH are ill formed since their variant is too
996             // short, however they have been used to represent a locale with the japanese imperial
997             // calendar and thai numbering respectively. We add an extension in their constructor
998             // to modernize them.
999             if ("ja".equals(language) && "JP".equals(country) && "JP".equals(variant)) {
1000                 Map<String, String> keywordsCopy = new TreeMap<>(unicodeKeywords);
1001                 keywordsCopy.put("ca", "japanese");
1002                 unicodeKeywords = keywordsCopy;
1003             } else if ("th".equals(language) && "TH".equals(country) && "TH".equals(variant)) {
1004                 Map<String, String> keywordsCopy = new TreeMap<>(unicodeKeywords);
1005                 keywordsCopy.put("nu", "thai");
1006                 unicodeKeywords = keywordsCopy;
1007             }
1008 
1009             if (!unicodeKeywords.isEmpty() || !unicodeAttributes.isEmpty()) {
1010                 Map<Character, String> extensionsCopy = new TreeMap<>(extensions);
1011                 addUnicodeExtensionToExtensionsMap(unicodeAttributes, unicodeKeywords, extensionsCopy);
1012                 extensions = extensionsCopy;
1013             }
1014 
1015             this.unicodeAttributes = unicodeAttributes;
1016             this.unicodeKeywords = unicodeKeywords;
1017             this.extensions = extensions;
1018         }
1019 
1020         this.hasValidatedFields = hasValidatedFields;
1021     }
1022 
1023     /**
1024      * Constructs a new {@code Locale} using the specified language, country,
1025      * and variant codes.
1026      */
Locale(String language, String country, String variant)1027     public Locale(String language, String country, String variant) {
1028         this(language, country, variant, "", Collections.EMPTY_SET,
1029                 Collections.EMPTY_MAP, Collections.EMPTY_MAP,
1030                 false /* has validated fields */);
1031     }
1032 
clone()1033     @Override public Object clone() {
1034         try {
1035             return super.clone();
1036         } catch (CloneNotSupportedException e) {
1037             throw new AssertionError(e);
1038         }
1039     }
1040 
1041     /**
1042      * Returns true if {@code object} is a locale with the same language,
1043      * country and variant.
1044      */
equals(Object object)1045     @Override public boolean equals(Object object) {
1046         if (object == this) {
1047             return true;
1048         }
1049         if (object instanceof Locale) {
1050             Locale o = (Locale) object;
1051             return languageCode.equals(o.languageCode)
1052                     && countryCode.equals(o.countryCode)
1053                     && variantCode.equals(o.variantCode)
1054                     && scriptCode.equals(o.scriptCode)
1055                     && extensions.equals(o.extensions);
1056 
1057         }
1058         return false;
1059     }
1060 
1061     /**
1062      * Returns the system's installed locales. This array always includes {@code
1063      * Locale.US}, and usually several others. Most locale-sensitive classes
1064      * offer their own {@code getAvailableLocales} method, which should be
1065      * preferred over this general purpose method.
1066      *
1067      * @see java.text.BreakIterator#getAvailableLocales()
1068      * @see java.text.Collator#getAvailableLocales()
1069      * @see java.text.DateFormat#getAvailableLocales()
1070      * @see java.text.DateFormatSymbols#getAvailableLocales()
1071      * @see java.text.DecimalFormatSymbols#getAvailableLocales()
1072      * @see java.text.NumberFormat#getAvailableLocales()
1073      * @see java.util.Calendar#getAvailableLocales()
1074      */
getAvailableLocales()1075     public static Locale[] getAvailableLocales() {
1076         return ICU.getAvailableLocales();
1077     }
1078 
1079     /**
1080      * Returns the country code for this locale, or {@code ""} if this locale
1081      * doesn't correspond to a specific country.
1082      */
getCountry()1083     public String getCountry() {
1084         return countryCode;
1085     }
1086 
1087     /**
1088      * Returns the user's preferred locale. This may have been overridden for
1089      * this process with {@link #setDefault}.
1090      *
1091      * <p>Since the user's locale changes dynamically, avoid caching this value.
1092      * Instead, use this method to look it up for each use.
1093      */
getDefault()1094     public static Locale getDefault() {
1095         return NoImagePreloadHolder.defaultLocale;
1096     }
1097 
1098     /**
1099      * Equivalent to {@code getDisplayCountry(Locale.getDefault())}.
1100      */
getDisplayCountry()1101     public final String getDisplayCountry() {
1102         return getDisplayCountry(getDefault());
1103     }
1104 
1105     /**
1106      * Returns the name of this locale's country, localized to {@code locale}.
1107      * Returns the empty string if this locale does not correspond to a specific
1108      * country.
1109      */
getDisplayCountry(Locale locale)1110     public String getDisplayCountry(Locale locale) {
1111         if (countryCode.isEmpty()) {
1112             return "";
1113         }
1114 
1115         final String normalizedRegion = Builder.normalizeAndValidateRegion(
1116                 countryCode, false /* strict */);
1117         if (normalizedRegion.isEmpty()) {
1118             return countryCode;
1119         }
1120 
1121         String result = ICU.getDisplayCountry(this, locale);
1122         if (result == null) { // TODO: do we need to do this, or does ICU do it for us?
1123             result = ICU.getDisplayCountry(this, Locale.getDefault());
1124         }
1125         return result;
1126     }
1127 
1128     /**
1129      * Equivalent to {@code getDisplayLanguage(Locale.getDefault())}.
1130      */
getDisplayLanguage()1131     public final String getDisplayLanguage() {
1132         return getDisplayLanguage(getDefault());
1133     }
1134 
1135     /**
1136      * Returns the name of this locale's language, localized to {@code locale}.
1137      * If the language name is unknown, the language code is returned.
1138      */
getDisplayLanguage(Locale locale)1139     public String getDisplayLanguage(Locale locale) {
1140         if (languageCode.isEmpty()) {
1141             return "";
1142         }
1143 
1144         // Hacks for backward compatibility.
1145         //
1146         // Our language tag will contain "und" if the languageCode is invalid
1147         // or missing. ICU will then return "langue indéterminée" or the equivalent
1148         // display language for the indeterminate language code.
1149         //
1150         // Sigh... ugh... and what not.
1151         final String normalizedLanguage = Builder.normalizeAndValidateLanguage(
1152                 languageCode, false /* strict */);
1153         if (UNDETERMINED_LANGUAGE.equals(normalizedLanguage)) {
1154             return languageCode;
1155         }
1156 
1157         // TODO: We need a new hack or a complete fix for http://b/8049507 --- We would
1158         // cover the frameworks' tracks when they were using "tl" instead of "fil".
1159         String result = ICU.getDisplayLanguage(this, locale);
1160         if (result == null) { // TODO: do we need to do this, or does ICU do it for us?
1161             result = ICU.getDisplayLanguage(this, Locale.getDefault());
1162         }
1163         return result;
1164     }
1165 
1166     /**
1167      * Equivalent to {@code getDisplayName(Locale.getDefault())}.
1168      */
getDisplayName()1169     public final String getDisplayName() {
1170         return getDisplayName(getDefault());
1171     }
1172 
1173     /**
1174      * Returns this locale's language name, country name, and variant, localized
1175      * to {@code locale}. The exact output form depends on whether this locale
1176      * corresponds to a specific language, script, country and variant.
1177      *
1178      * <p>For example:
1179      * <ul>
1180      * <li>{@code new Locale("en").getDisplayName(Locale.US)} -> {@code English}
1181      * <li>{@code new Locale("en", "US").getDisplayName(Locale.US)} -> {@code English (United States)}
1182      * <li>{@code new Locale("en", "US", "POSIX").getDisplayName(Locale.US)} -> {@code English (United States,Computer)}
1183      * <li>{@code Locale.fromLanguageTag("zh-Hant-CN").getDisplayName(Locale.US)} -> {@code Chinese (Traditional Han,China)}
1184      * <li>{@code new Locale("en").getDisplayName(Locale.FRANCE)} -> {@code anglais}
1185      * <li>{@code new Locale("en", "US").getDisplayName(Locale.FRANCE)} -> {@code anglais (États-Unis)}
1186      * <li>{@code new Locale("en", "US", "POSIX").getDisplayName(Locale.FRANCE)} -> {@code anglais (États-Unis,informatique)}.
1187      * </ul>
1188      */
getDisplayName(Locale locale)1189     public String getDisplayName(Locale locale) {
1190         int count = 0;
1191         StringBuilder buffer = new StringBuilder();
1192         if (!languageCode.isEmpty()) {
1193             String displayLanguage = getDisplayLanguage(locale);
1194             buffer.append(displayLanguage.isEmpty() ? languageCode : displayLanguage);
1195             ++count;
1196         }
1197         if (!scriptCode.isEmpty()) {
1198             if (count == 1) {
1199                 buffer.append(" (");
1200             }
1201             String displayScript = getDisplayScript(locale);
1202             buffer.append(displayScript.isEmpty() ? scriptCode : displayScript);
1203             ++count;
1204         }
1205         if (!countryCode.isEmpty()) {
1206             if (count == 1) {
1207                 buffer.append(" (");
1208             } else if (count == 2) {
1209                 buffer.append(",");
1210             }
1211             String displayCountry = getDisplayCountry(locale);
1212             buffer.append(displayCountry.isEmpty() ? countryCode : displayCountry);
1213             ++count;
1214         }
1215         if (!variantCode.isEmpty()) {
1216             if (count == 1) {
1217                 buffer.append(" (");
1218             } else if (count == 2 || count == 3) {
1219                 buffer.append(",");
1220             }
1221             String displayVariant = getDisplayVariant(locale);
1222             buffer.append(displayVariant.isEmpty() ? variantCode : displayVariant);
1223             ++count;
1224         }
1225         if (count > 1) {
1226             buffer.append(")");
1227         }
1228         return buffer.toString();
1229     }
1230 
1231     /**
1232      * Returns the full variant name in the default {@code Locale} for the variant code of
1233      * this {@code Locale}. If there is no matching variant name, the variant code is
1234      * returned.
1235      *
1236      * @since 1.7
1237      */
getDisplayVariant()1238     public final String getDisplayVariant() {
1239         return getDisplayVariant(getDefault());
1240     }
1241 
1242     /**
1243      * Returns the full variant name in the specified {@code Locale} for the variant code
1244      * of this {@code Locale}. If there is no matching variant name, the variant code is
1245      * returned.
1246      *
1247      * @since 1.7
1248      */
getDisplayVariant(Locale locale)1249     public String getDisplayVariant(Locale locale) {
1250         if (variantCode.isEmpty()) {
1251             return "";
1252         }
1253 
1254         try {
1255             Builder.normalizeAndValidateVariant(variantCode);
1256         } catch (IllformedLocaleException ilfe) {
1257             return variantCode;
1258         }
1259 
1260         String result = ICU.getDisplayVariant(this, locale);
1261         if (result == null) { // TODO: do we need to do this, or does ICU do it for us?
1262             result = ICU.getDisplayVariant(this, Locale.getDefault());
1263         }
1264 
1265         // The "old style" locale constructors allow us to pass in variants that aren't
1266         // valid BCP-47 variant subtags. When that happens, toLanguageTag will not emit
1267         // them. Note that we know variantCode.length() > 0 due to the isEmpty check at
1268         // the beginning of this function.
1269         if (result.isEmpty()) {
1270             return variantCode;
1271         }
1272         return result;
1273     }
1274 
1275     /**
1276      * Returns the three-letter ISO 3166 country code which corresponds to the country
1277      * code for this {@code Locale}.
1278      * @throws MissingResourceException if there's no 3-letter country code for this locale.
1279      */
getISO3Country()1280     public String getISO3Country() {
1281         // The results of getISO3Country do not depend on the languageCode,
1282         // so we pass an arbitrarily selected language code here. This guards
1283         // against errors caused by malformed or invalid language codes.
1284         String code = ICU.getISO3Country("en-" + countryCode);
1285         if (!countryCode.isEmpty() && code.isEmpty()) {
1286             throw new MissingResourceException("No 3-letter country code for locale: " + this, "FormatData_" + this, "ShortCountry");
1287         }
1288         return code;
1289     }
1290 
1291     /**
1292      * Returns the three-letter ISO 639-2/T language code which corresponds to the language
1293      * code for this {@code Locale}.
1294      * @throws MissingResourceException if there's no 3-letter language code for this locale.
1295      */
getISO3Language()1296     public String getISO3Language() {
1297         // For backward compatibility, we must return "" for an empty language
1298         // code and not "und" which is the accurate ISO-639-3 code for an
1299         // undetermined language.
1300         if (languageCode.isEmpty()) {
1301             return "";
1302         }
1303 
1304         // The results of getISO3Language do not depend on the country code
1305         // or any of the other locale fields, so we pass just the language here.
1306         String code = ICU.getISO3Language(languageCode);
1307         if (!languageCode.isEmpty() && code.isEmpty()) {
1308             throw new MissingResourceException("No 3-letter language code for locale: " + this, "FormatData_" + this, "ShortLanguage");
1309         }
1310         return code;
1311     }
1312 
1313     /**
1314      * Returns an array of strings containing all the two-letter ISO 3166 country codes that can be
1315      * used as the country code when constructing a {@code Locale}.
1316      */
getISOCountries()1317     public static String[] getISOCountries() {
1318         return ICU.getISOCountries();
1319     }
1320 
1321     /**
1322      * Returns an array of strings containing all the two-letter ISO 639-1 language codes that can be
1323      * used as the language code when constructing a {@code Locale}.
1324      */
getISOLanguages()1325     public static String[] getISOLanguages() {
1326         return ICU.getISOLanguages();
1327     }
1328 
1329     /**
1330      * Returns the language code for this {@code Locale} or the empty string if no language
1331      * was set.
1332      */
getLanguage()1333     public String getLanguage() {
1334         return languageCode;
1335     }
1336 
1337     /**
1338      * Returns the variant code for this {@code Locale} or an empty {@code String} if no variant
1339      * was set.
1340      */
getVariant()1341     public String getVariant() {
1342         return variantCode;
1343     }
1344 
1345     /**
1346      * Returns the script code for this {@code Locale} or an empty {@code String} if no script
1347      * was set.
1348      *
1349      * If set, the script code will be a title cased string of length 4, as per the ISO 15924
1350      * specification.
1351      *
1352      * @since 1.7
1353      */
getScript()1354     public String getScript() {
1355         return scriptCode;
1356     }
1357 
1358     /**
1359      * Equivalent to {@code getDisplayScript(Locale.getDefault()))}
1360      *
1361      * @since 1.7
1362      */
getDisplayScript()1363     public String getDisplayScript() {
1364         return getDisplayScript(getDefault());
1365     }
1366 
1367     /**
1368      * Returns the name of this locale's script code, localized to {@link Locale}. If the
1369      * script code is unknown, the return value of this method is the same as that of
1370      * {@link #getScript()}.
1371      *
1372      * @since 1.7
1373      */
getDisplayScript(Locale locale)1374     public String getDisplayScript(Locale locale) {
1375         if (scriptCode.isEmpty()) {
1376             return "";
1377         }
1378 
1379         String result = ICU.getDisplayScript(this, locale);
1380         if (result == null) { // TODO: do we need to do this, or does ICU do it for us?
1381             result = ICU.getDisplayScript(this, Locale.getDefault());
1382         }
1383 
1384         return result;
1385 
1386     }
1387 
1388     /**
1389      * Returns a well formed BCP-47 language tag that identifies this locale.
1390      *
1391      * Note that this locale itself might consist of ill formed fields, since the
1392      * public {@code Locale} constructors do not perform validity checks to maintain
1393      * backwards compatibility. When this is the case, this method will either replace
1394      * ill formed fields with standard BCP-47 subtags (For eg. "und" (undetermined)
1395      * for invalid languages) or omit them altogether.
1396      *
1397      * Additionally, ill formed variants will result in the remainder of the tag
1398      * (both variants and extensions) being moved to the private use extension,
1399      * where they will appear after a subtag whose value is {@code "lvariant"}.
1400      *
1401      * It's also important to note that the BCP-47 tag is well formed in the sense
1402      * that it is unambiguously parseable into its specified components. We do not
1403      * require that any of the components are registered with the applicable registries.
1404      * For example, we do not require scripts to be a registered ISO 15924 scripts or
1405      * languages to appear in the ISO-639-2 code list.
1406      *
1407      * @since 1.7
1408      */
toLanguageTag()1409     public String toLanguageTag() {
1410         if (cachedLanguageTag == null) {
1411             cachedLanguageTag = makeLanguageTag();
1412         }
1413 
1414         return cachedLanguageTag;
1415     }
1416 
1417     /**
1418      * Constructs a valid BCP-47 language tag from locale fields. Additional validation
1419      * is required when this Locale was not constructed using a Builder and variants
1420      * set this way are treated specially.
1421      *
1422      * In both cases, we convert empty language tags to "und", omit invalid country tags
1423      * and perform a special case conversion of "no-NO-NY" to "nn-NO".
1424      */
makeLanguageTag()1425     private String makeLanguageTag() {
1426         // We only need to revalidate the language, country and variant because
1427         // the rest of the fields can only be set via the builder which validates
1428         // them anyway.
1429         String language = "";
1430         String region = "";
1431         String variant = "";
1432         String illFormedVariantSubtags = "";
1433 
1434         if (hasValidatedFields) {
1435             language = languageCode;
1436             region = countryCode;
1437             // Note that we are required to normalize hyphens to underscores
1438             // in the builder, but we must use hyphens in the BCP-47 language tag.
1439             variant = variantCode.replace('_', '-');
1440         } else {
1441             language = Builder.normalizeAndValidateLanguage(languageCode, false /* strict */);
1442             region = Builder.normalizeAndValidateRegion(countryCode, false /* strict */);
1443 
1444             try {
1445                 variant = Builder.normalizeAndValidateVariant(variantCode);
1446             } catch (IllformedLocaleException ilfe) {
1447                 // If our variant is ill formed, we must attempt to split it into
1448                 // its constituent subtags and preserve the well formed bits and
1449                 // move the rest to the private use extension (if they're well
1450                 // formed extension subtags).
1451                 String split[] = splitIllformedVariant(variantCode);
1452 
1453                 variant = split[0];
1454                 illFormedVariantSubtags = split[1];
1455             }
1456         }
1457 
1458         if (language.isEmpty()) {
1459             language = UNDETERMINED_LANGUAGE;
1460         }
1461 
1462         if ("no".equals(language) && "NO".equals(region) && "NY".equals(variant)) {
1463             language = "nn";
1464             region = "NO";
1465             variant = "";
1466         }
1467 
1468         final StringBuilder sb = new StringBuilder(16);
1469         sb.append(language);
1470 
1471         if (!scriptCode.isEmpty()) {
1472             sb.append('-');
1473             sb.append(scriptCode);
1474         }
1475 
1476         if (!region.isEmpty()) {
1477             sb.append('-');
1478             sb.append(region);
1479         }
1480 
1481         if (!variant.isEmpty()) {
1482             sb.append('-');
1483             sb.append(variant);
1484         }
1485 
1486         // Extensions (optional, omitted if empty). Note that we don't
1487         // emit the private use extension here, but add it in the end.
1488         for (Map.Entry<Character, String> extension : extensions.entrySet()) {
1489             if (!extension.getKey().equals('x')) {
1490                 sb.append('-').append(extension.getKey());
1491                 sb.append('-').append(extension.getValue());
1492             }
1493         }
1494 
1495         // The private use extension comes right at the very end.
1496         final String privateUse = extensions.get('x');
1497         if (privateUse != null) {
1498             sb.append("-x-");
1499             sb.append(privateUse);
1500         }
1501 
1502         // If we have any ill-formed variant subtags, we append them to the
1503         // private use extension (or add a private use extension if one doesn't
1504         // exist).
1505         if (!illFormedVariantSubtags.isEmpty()) {
1506             if (privateUse == null) {
1507                 sb.append("-x-lvariant-");
1508             } else {
1509                 sb.append('-');
1510             }
1511             sb.append(illFormedVariantSubtags);
1512         }
1513 
1514         return sb.toString();
1515     }
1516 
1517     /**
1518      * Splits ill formed variants into a set of valid variant subtags (which
1519      * can be used directly in language tag construction) and a set of invalid
1520      * variant subtags (which can be appended to the private use extension),
1521      * provided that each subtag is a valid private use extension subtag.
1522      *
1523      * This method returns a two element String array. The first element is a string
1524      * containing the concatenation of valid variant subtags which can be appended
1525      * to a BCP-47 tag directly and the second containing the concatenation of
1526      * invalid variant subtags which can be appended to the private use extension
1527      * directly.
1528      *
1529      * This method assumes that {@code variant} contains at least one ill formed
1530      * variant subtag.
1531      */
splitIllformedVariant(String variant)1532     private static String[] splitIllformedVariant(String variant) {
1533         final String normalizedVariant = variant.replace('_', '-');
1534         final String[] subTags = normalizedVariant.split("-");
1535 
1536         final String[] split = new String[] { "", "" };
1537 
1538         // First go through the list of variant subtags and check if they're
1539         // valid private use extension subtags. If they're not, we will omit
1540         // the first such subtag and all subtags after.
1541         //
1542         // NOTE: |firstInvalidSubtag| is the index of the first variant
1543         // subtag we decide to omit altogether, whereas |firstIllformedSubtag| is the
1544         // index of the first subtag we decide to append to the private use extension.
1545         //
1546         // In other words:
1547         // [0, firstIllformedSubtag) => expressed as variant subtags.
1548         // [firstIllformedSubtag, firstInvalidSubtag) => expressed as private use
1549         // extension subtags.
1550         // [firstInvalidSubtag, subTags.length) => omitted.
1551         int firstInvalidSubtag = subTags.length;
1552         for (int i = 0; i < subTags.length; ++i) {
1553             if (!isValidBcp47Alphanum(subTags[i], 1, 8)) {
1554                 firstInvalidSubtag = i;
1555                 break;
1556             }
1557         }
1558 
1559         if (firstInvalidSubtag == 0) {
1560             return split;
1561         }
1562 
1563         // We now consider each subtag that could potentially be appended to
1564         // the private use extension and check if it's valid.
1565         int firstIllformedSubtag = firstInvalidSubtag;
1566         for (int i = 0; i < firstInvalidSubtag; ++i) {
1567             final String subTag = subTags[i];
1568             // The BCP-47 spec states that :
1569             // - Subtags can be between [5, 8] alphanumeric chars in length.
1570             // - Subtags that start with a number are allowed to be 4 chars in length.
1571             if (subTag.length() >= 5 && subTag.length() <= 8) {
1572                 if (!isAsciiAlphaNum(subTag)) {
1573                     firstIllformedSubtag = i;
1574                 }
1575             } else if (subTag.length() == 4) {
1576                 final char firstChar = subTag.charAt(0);
1577                 if (!(firstChar >= '0' && firstChar <= '9') || !isAsciiAlphaNum(subTag)) {
1578                     firstIllformedSubtag = i;
1579                 }
1580             } else {
1581                 firstIllformedSubtag = i;
1582             }
1583         }
1584 
1585         split[0] = concatenateRange(subTags, 0, firstIllformedSubtag);
1586         split[1] = concatenateRange(subTags, firstIllformedSubtag, firstInvalidSubtag);
1587 
1588         return split;
1589     }
1590 
1591     /**
1592      * Builds a string by concatenating array elements within the range [start, end).
1593      * The supplied range is assumed to be valid and no checks are performed.
1594      */
concatenateRange(String[] array, int start, int end)1595     private static String concatenateRange(String[] array, int start, int end) {
1596         StringBuilder builder = new StringBuilder(32);
1597         for (int i = start; i < end; ++i) {
1598             if (i != start) {
1599                 builder.append('-');
1600             }
1601             builder.append(array[i]);
1602         }
1603 
1604         return builder.toString();
1605     }
1606 
1607     /**
1608      * Returns the set of BCP-47 extensions this locale contains.
1609      *
1610      * See <a href="https://tools.ietf.org/html/bcp47#section-2.1">
1611      *     the IETF BCP-47 specification</a> (Section 2.2.6) for details.
1612      *
1613      * @since 1.7
1614      */
getExtensionKeys()1615     public Set<Character> getExtensionKeys() {
1616         return extensions.keySet();
1617     }
1618 
1619     /**
1620      * Returns the BCP-47 extension whose key is {@code extensionKey}, or {@code null}
1621      * if this locale does not contain the extension.
1622      *
1623      * Individual Keywords and attributes for the unicode
1624      * locale extension can be fetched using {@link #getUnicodeLocaleAttributes()},
1625      * {@link #getUnicodeLocaleKeys()}  and {@link #getUnicodeLocaleType}.
1626      *
1627      * @since 1.7
1628      */
getExtension(char extensionKey)1629     public String getExtension(char extensionKey) {
1630         return extensions.get(extensionKey);
1631     }
1632 
1633     /**
1634      * Returns the {@code type} for the specified unicode locale extension {@code key}.
1635      *
1636      * For more information about types and keywords, see {@link Builder#setUnicodeLocaleKeyword}
1637      * and <a href="http://www.unicode.org/reports/tr35/#BCP47">Unicode Technical Standard #35</a>
1638      *
1639      * @since 1.7
1640      */
getUnicodeLocaleType(String keyWord)1641     public String getUnicodeLocaleType(String keyWord) {
1642         return unicodeKeywords.get(keyWord);
1643     }
1644 
1645     /**
1646      * Returns the set of unicode locale extension attributes this locale contains.
1647      *
1648      * For more information about attributes, see {@link Builder#addUnicodeLocaleAttribute}
1649      * and <a href="http://www.unicode.org/reports/tr35/#BCP47">Unicode Technical Standard #35</a>
1650      *
1651      * @since 1.7
1652      */
getUnicodeLocaleAttributes()1653     public Set<String> getUnicodeLocaleAttributes() {
1654         return unicodeAttributes;
1655     }
1656 
1657     /**
1658      * Returns the set of unicode locale extension keywords this locale contains.
1659      *
1660      * For more information about types and keywords, see {@link Builder#setUnicodeLocaleKeyword}
1661      * and <a href="http://www.unicode.org/reports/tr35/#BCP47">Unicode Technical Standard #35</a>
1662      *
1663      * @since 1.7
1664      */
getUnicodeLocaleKeys()1665     public Set<String> getUnicodeLocaleKeys() {
1666         return unicodeKeywords.keySet();
1667     }
1668 
1669     @Override
hashCode()1670     public synchronized int hashCode() {
1671         return countryCode.hashCode()
1672                 + languageCode.hashCode() + variantCode.hashCode()
1673                 + scriptCode.hashCode() + extensions.hashCode();
1674     }
1675 
1676     /**
1677      * Overrides the default locale. This does not affect system configuration,
1678      * and attempts to override the system-provided default locale may
1679      * themselves be overridden by actual changes to the system configuration.
1680      * Code that calls this method is usually incorrect, and should be fixed by
1681      * passing the appropriate locale to each locale-sensitive method that's
1682      * called.
1683      */
setDefault(Locale locale)1684     public synchronized static void setDefault(Locale locale) {
1685         if (locale == null) {
1686             throw new NullPointerException("locale == null");
1687         }
1688         String languageTag = locale.toLanguageTag();
1689         NoImagePreloadHolder.defaultLocale = locale;
1690         ICU.setDefaultLocale(languageTag);
1691     }
1692 
1693     /**
1694      * Returns the string representation of this {@code Locale}. It consists of the
1695      * language code, country code and variant separated by underscores.
1696      * If the language is missing the string begins
1697      * with an underscore. If the country is missing there are 2 underscores
1698      * between the language and the variant. The variant cannot stand alone
1699      * without a language and/or country code: in this case this method would
1700      * return the empty string.
1701      *
1702      * <p>Examples: "en", "en_US", "_US", "en__POSIX", "en_US_POSIX"
1703      */
1704     @Override
toString()1705     public final String toString() {
1706         String result = cachedToStringResult;
1707         if (result == null) {
1708             result = cachedToStringResult = toNewString(languageCode, countryCode, variantCode,
1709                                                         scriptCode, extensions);
1710         }
1711         return result;
1712     }
1713 
toNewString(String languageCode, String countryCode, String variantCode, String scriptCode, Map<Character, String> extensions)1714     private static String toNewString(String languageCode, String countryCode,
1715             String variantCode, String scriptCode, Map<Character, String> extensions) {
1716         // The string form of a locale that only has a variant is the empty string.
1717         if (languageCode.length() == 0 && countryCode.length() == 0) {
1718             return "";
1719         }
1720 
1721         // Otherwise, the output format is "ll_cc_variant", where language and country are always
1722         // two letters, but the variant is an arbitrary length. A size of 11 characters has room
1723         // for "en_US_POSIX", the largest "common" value. (In practice, the string form is almost
1724         // always 5 characters: "ll_cc".)
1725         StringBuilder result = new StringBuilder(11);
1726         result.append(languageCode);
1727 
1728         final boolean hasScriptOrExtensions = !scriptCode.isEmpty() || !extensions.isEmpty();
1729 
1730         if (!countryCode.isEmpty() || !variantCode.isEmpty() || hasScriptOrExtensions) {
1731             result.append('_');
1732         }
1733         result.append(countryCode);
1734         if (!variantCode.isEmpty() || hasScriptOrExtensions) {
1735             result.append('_');
1736         }
1737         result.append(variantCode);
1738 
1739         if (hasScriptOrExtensions) {
1740             if (!variantCode.isEmpty()) {
1741                 result.append('_');
1742             }
1743 
1744             // Note that this is notably different from the BCP-47 spec (for
1745             // backwards compatibility). We are forced to append a "#" before the script tag.
1746             // and also put the script code right at the end.
1747             result.append("#");
1748             if (!scriptCode.isEmpty() ) {
1749                 result.append(scriptCode);
1750             }
1751 
1752             // Note the use of "-" instead of "_" before the extensions.
1753             if (!extensions.isEmpty()) {
1754                 if (!scriptCode.isEmpty()) {
1755                     result.append('-');
1756                 }
1757                 result.append(serializeExtensions(extensions));
1758             }
1759         }
1760 
1761         return result.toString();
1762     }
1763 
1764     private static final ObjectStreamField[] serialPersistentFields = {
1765         new ObjectStreamField("country", String.class),
1766         new ObjectStreamField("hashcode", int.class),
1767         new ObjectStreamField("language", String.class),
1768         new ObjectStreamField("variant", String.class),
1769         new ObjectStreamField("script", String.class),
1770         new ObjectStreamField("extensions", String.class),
1771     };
1772 
writeObject(ObjectOutputStream stream)1773     private void writeObject(ObjectOutputStream stream) throws IOException {
1774         ObjectOutputStream.PutField fields = stream.putFields();
1775         fields.put("country", countryCode);
1776         fields.put("hashcode", -1);
1777         fields.put("language", languageCode);
1778         fields.put("variant", variantCode);
1779         fields.put("script", scriptCode);
1780 
1781         if (!extensions.isEmpty()) {
1782             fields.put("extensions", serializeExtensions(extensions));
1783         }
1784 
1785         stream.writeFields();
1786     }
1787 
readObject(ObjectInputStream stream)1788     private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException {
1789         ObjectInputStream.GetField fields = stream.readFields();
1790         countryCode = (String) fields.get("country", "");
1791         languageCode = (String) fields.get("language", "");
1792         variantCode = (String) fields.get("variant", "");
1793         scriptCode = (String) fields.get("script", "");
1794 
1795         this.unicodeKeywords = Collections.EMPTY_MAP;
1796         this.unicodeAttributes = Collections.EMPTY_SET;
1797         this.extensions = Collections.EMPTY_MAP;
1798 
1799         String extensions = (String) fields.get("extensions", null);
1800         if (extensions != null) {
1801             readExtensions(extensions);
1802         }
1803     }
1804 
readExtensions(String extensions)1805     private void readExtensions(String extensions) {
1806         Map<Character, String> extensionsMap = new TreeMap<Character, String>();
1807         parseSerializedExtensions(extensions, extensionsMap);
1808         this.extensions = Collections.unmodifiableMap(extensionsMap);
1809 
1810         if (extensionsMap.containsKey(UNICODE_LOCALE_EXTENSION)) {
1811             String unicodeExtension = extensionsMap.get(UNICODE_LOCALE_EXTENSION);
1812             String[] subTags = unicodeExtension.split("-");
1813 
1814             Map<String, String> unicodeKeywords = new TreeMap<String, String>();
1815             Set<String> unicodeAttributes = new TreeSet<String>();
1816             parseUnicodeExtension(subTags, unicodeKeywords, unicodeAttributes);
1817 
1818             this.unicodeKeywords = Collections.unmodifiableMap(unicodeKeywords);
1819             this.unicodeAttributes = Collections.unmodifiableSet(unicodeAttributes);
1820         }
1821     }
1822 
1823     /**
1824      * The serialized form for extensions is straightforward. It's simply
1825      * of the form key1-value1-key2-value2 where each value might in turn contain
1826      * multiple subtags separated by hyphens. Each key is guaranteed to be a single
1827      * character in length.
1828      *
1829      * This method assumes that {@code extensionsMap} is non-empty.
1830      *
1831      * Visible for testing.
1832      *
1833      * @hide
1834      */
serializeExtensions(Map<Character, String> extensionsMap)1835     public static String serializeExtensions(Map<Character, String> extensionsMap) {
1836         Iterator<Map.Entry<Character, String>> entryIterator = extensionsMap.entrySet().iterator();
1837         StringBuilder sb = new StringBuilder(64);
1838 
1839         while (true) {
1840             final Map.Entry<Character, String> entry = entryIterator.next();
1841             sb.append(entry.getKey());
1842             sb.append('-');
1843             sb.append(entry.getValue());
1844 
1845             if (entryIterator.hasNext()) {
1846                 sb.append('-');
1847             } else {
1848                 break;
1849             }
1850         }
1851 
1852         return sb.toString();
1853     }
1854 
1855     /**
1856      * Visible for testing.
1857      *
1858      * @hide
1859      */
parseSerializedExtensions(String extString, Map<Character, String> outputMap)1860     public static void parseSerializedExtensions(String extString, Map<Character, String> outputMap) {
1861         // This probably isn't the most efficient approach, but it's the
1862         // most straightforward to code.
1863         //
1864         // Start by splitting the string on "-". We will then keep track of
1865         // where each of the extension keys (single characters) appear in the
1866         // original string and then use those indices to construct substrings
1867         // representing the values.
1868         final String[] subTags = extString.split("-");
1869         final int[] typeStartIndices = new int[subTags.length / 2];
1870 
1871         int length = 0;
1872         int count = 0;
1873         for (String subTag : subTags) {
1874             if (subTag.length() > 0) {
1875                 // Account for the length of the "-" at the end of each subtag.
1876                 length += (subTag.length() + 1);
1877             }
1878 
1879             if (subTag.length() == 1) {
1880                 typeStartIndices[count++] = length;
1881             }
1882         }
1883 
1884         for (int i = 0; i < count; ++i) {
1885             final int valueStart = typeStartIndices[i];
1886             // Since the start Index points to the beginning of the next type
1887             // ....prev-k-next.....
1888             //            |_ here
1889             // (idx - 2) is the index of the next key
1890             // (idx - 3) is the (non inclusive) end of the previous type.
1891             final int valueEnd = (i == (count - 1)) ?
1892                     extString.length() : (typeStartIndices[i + 1] - 3);
1893 
1894             outputMap.put(extString.charAt(typeStartIndices[i] - 2),
1895                     extString.substring(valueStart, valueEnd));
1896         }
1897     }
1898 
1899 
1900     /**
1901      * A UN M.49 is a 3 digit numeric code.
1902      */
isUnM49AreaCode(String code)1903     private static boolean isUnM49AreaCode(String code) {
1904         if (code.length() != 3) {
1905             return false;
1906         }
1907 
1908         for (int i = 0; i < 3; ++i) {
1909             final char character = code.charAt(i);
1910             if (!(character >= '0' && character <= '9')) {
1911                 return false;
1912             }
1913         }
1914 
1915         return true;
1916     }
1917 
1918     /*
1919      * Checks whether a given string is an ASCII alphanumeric string.
1920      */
isAsciiAlphaNum(String string)1921     private static boolean isAsciiAlphaNum(String string) {
1922         for (int i = 0; i < string.length(); i++) {
1923             final char character = string.charAt(i);
1924             if (!(character >= 'a' && character <= 'z' ||
1925                     character >= 'A' && character <= 'Z' ||
1926                     character >= '0' && character <= '9')) {
1927                 return false;
1928             }
1929         }
1930 
1931         return true;
1932     }
1933 
isValidBcp47Alpha(String string, int lowerBound, int upperBound)1934     private static boolean isValidBcp47Alpha(String string, int lowerBound, int upperBound) {
1935         final int length = string.length();
1936         if (length < lowerBound || length > upperBound) {
1937             return false;
1938         }
1939 
1940         for (int i = 0; i < length; ++i) {
1941             final char character = string.charAt(i);
1942             if (!(character >= 'a' && character <= 'z' ||
1943                     character >= 'A' && character <= 'Z')) {
1944                 return false;
1945             }
1946         }
1947 
1948         return true;
1949     }
1950 
isValidBcp47Alphanum(String attributeOrType, int lowerBound, int upperBound)1951     private static boolean isValidBcp47Alphanum(String attributeOrType,
1952             int lowerBound, int upperBound) {
1953         if (attributeOrType.length() < lowerBound || attributeOrType.length() > upperBound) {
1954             return false;
1955         }
1956 
1957         return isAsciiAlphaNum(attributeOrType);
1958     }
1959 
titleCaseAsciiWord(String word)1960     private static String titleCaseAsciiWord(String word) {
1961         try {
1962             byte[] chars = word.toLowerCase(Locale.ROOT).getBytes(StandardCharsets.US_ASCII);
1963             chars[0] = (byte) ((int) chars[0] + 'A' - 'a');
1964             return new String(chars, StandardCharsets.US_ASCII);
1965         } catch (UnsupportedOperationException uoe) {
1966             throw new AssertionError(uoe);
1967         }
1968     }
1969 
1970     /**
1971      * A type list must contain one or more alphanumeric subtags whose lengths
1972      * are between 3 and 8.
1973      */
isValidTypeList(String lowerCaseTypeList)1974     private static boolean isValidTypeList(String lowerCaseTypeList) {
1975         final String[] splitList = lowerCaseTypeList.split("-");
1976         for (String type : splitList) {
1977             if (!isValidBcp47Alphanum(type, 3, 8)) {
1978                 return false;
1979             }
1980         }
1981 
1982         return true;
1983     }
1984 
addUnicodeExtensionToExtensionsMap( Set<String> attributes, Map<String, String> keywords, Map<Character, String> extensions)1985     private static void addUnicodeExtensionToExtensionsMap(
1986             Set<String> attributes, Map<String, String> keywords,
1987             Map<Character, String> extensions) {
1988         if (attributes.isEmpty() && keywords.isEmpty()) {
1989             return;
1990         }
1991 
1992         // Assume that the common case is a low number of keywords & attributes
1993         // (usually one or two).
1994         final StringBuilder sb = new StringBuilder(32);
1995 
1996         // All attributes must appear before keywords, in lexical order.
1997         if (!attributes.isEmpty()) {
1998             Iterator<String> attributesIterator = attributes.iterator();
1999             while (true) {
2000                 sb.append(attributesIterator.next());
2001                 if (attributesIterator.hasNext()) {
2002                     sb.append('-');
2003                 } else {
2004                     break;
2005                 }
2006             }
2007         }
2008 
2009         if (!keywords.isEmpty()) {
2010             if (!attributes.isEmpty()) {
2011                 sb.append('-');
2012             }
2013 
2014             Iterator<Map.Entry<String, String>> keywordsIterator = keywords.entrySet().iterator();
2015             while (true) {
2016                 final Map.Entry<String, String> keyWord = keywordsIterator.next();
2017                 sb.append(keyWord.getKey());
2018                 if (!keyWord.getValue().isEmpty()) {
2019                     sb.append('-');
2020                     sb.append(keyWord.getValue());
2021                 }
2022                 if (keywordsIterator.hasNext()) {
2023                     sb.append('-');
2024                 } else {
2025                     break;
2026                 }
2027             }
2028         }
2029 
2030         extensions.put(UNICODE_LOCALE_EXTENSION, sb.toString());
2031     }
2032 
2033     /**
2034      * This extension is described by http://www.unicode.org/reports/tr35/#RFC5234
2035      * unicode_locale_extensions = sep "u" (1*(sep keyword) / 1*(sep attribute) *(sep keyword)).
2036      *
2037      * It must contain at least one keyword or attribute and attributes (if any)
2038      * must appear before keywords. Attributes can't appear after keywords because
2039      * they will be indistinguishable from a subtag of the keyword type.
2040      *
2041      * Visible for testing.
2042      *
2043      * @hide
2044      */
parseUnicodeExtension(String[] subtags, Map<String, String> keywords, Set<String> attributes)2045     public static void parseUnicodeExtension(String[] subtags,
2046             Map<String, String> keywords, Set<String> attributes)  {
2047         String lastKeyword = null;
2048         List<String> subtagsForKeyword = new ArrayList<String>();
2049         for (String subtag : subtags) {
2050             if (subtag.length() == 2) {
2051                 if (subtagsForKeyword.size() > 0) {
2052                     keywords.put(lastKeyword, joinBcp47Subtags(subtagsForKeyword));
2053                     subtagsForKeyword.clear();
2054                 }
2055 
2056                 lastKeyword = subtag;
2057             } else if (subtag.length() > 2) {
2058                 if (lastKeyword == null) {
2059                     attributes.add(subtag);
2060                 } else {
2061                     subtagsForKeyword.add(subtag);
2062                 }
2063             }
2064         }
2065 
2066         if (subtagsForKeyword.size() > 0) {
2067             keywords.put(lastKeyword, joinBcp47Subtags(subtagsForKeyword));
2068         } else if (lastKeyword != null) {
2069             keywords.put(lastKeyword, "");
2070         }
2071     }
2072 
2073     /**
2074      * Joins a list of subtags into a BCP-47 tag using the standard separator
2075      * ("-").
2076      */
joinBcp47Subtags(List<String> strings)2077     private static String joinBcp47Subtags(List<String> strings) {
2078         final int size = strings.size();
2079 
2080         StringBuilder sb = new StringBuilder(strings.get(0).length());
2081         for (int i = 0; i < size; ++i) {
2082             sb.append(strings.get(i));
2083             if (i != size - 1) {
2084                 sb.append('-');
2085             }
2086         }
2087 
2088         return sb.toString();
2089     }
2090 
2091     /**
2092      * @hide for internal use only.
2093      */
adjustLanguageCode(String languageCode)2094     public static String adjustLanguageCode(String languageCode) {
2095         String adjusted = languageCode.toLowerCase(Locale.US);
2096         // Map new language codes to the obsolete language
2097         // codes so the correct resource bundles will be used.
2098         if (languageCode.equals("he")) {
2099             adjusted = "iw";
2100         } else if (languageCode.equals("id")) {
2101             adjusted = "in";
2102         } else if (languageCode.equals("yi")) {
2103             adjusted = "ji";
2104         }
2105 
2106         return adjusted;
2107     }
2108 
convertGrandfatheredTag(String original)2109     private static String convertGrandfatheredTag(String original) {
2110         final String converted = GRANDFATHERED_LOCALES.get(original);
2111         return converted != null ? converted : original;
2112     }
2113 
2114     /**
2115      * Scans elements of {@code subtags} in the range {@code [startIndex, endIndex)}
2116      * and appends valid variant subtags upto the first invalid subtag  (if any) to
2117      * {@code normalizedVariants}.
2118      */
extractVariantSubtags(String[] subtags, int startIndex, int endIndex, List<String> normalizedVariants)2119     private static void extractVariantSubtags(String[] subtags, int startIndex, int endIndex,
2120             List<String> normalizedVariants) {
2121         for (int i = startIndex; i < endIndex; i++) {
2122             final String subtag = subtags[i];
2123 
2124             if (Builder.isValidVariantSubtag(subtag)) {
2125                 normalizedVariants.add(subtag);
2126             } else {
2127                 break;
2128             }
2129         }
2130     }
2131 
2132     /**
2133      * Scans elements of {@code subtags} in the range {@code [startIndex, endIndex)}
2134      * and inserts valid extensions into {@code extensions}. The scan is aborted
2135      * when an invalid extension is encountered. Returns the index of the first
2136      * unparsable element of {@code subtags}.
2137      */
extractExtensions(String[] subtags, int startIndex, int endIndex, Map<Character, String> extensions)2138     private static int extractExtensions(String[] subtags, int startIndex, int endIndex,
2139             Map<Character, String> extensions) {
2140         int privateUseExtensionIndex = -1;
2141         int extensionKeyIndex = -1;
2142 
2143         int i = startIndex;
2144         for (; i < endIndex; i++) {
2145             final String subtag = subtags[i];
2146 
2147             final boolean parsingPrivateUse = (privateUseExtensionIndex != -1) &&
2148                     (extensionKeyIndex == privateUseExtensionIndex);
2149 
2150             // Note that private use extensions allow subtags of length 1.
2151             // Private use extensions *must* come last, so there's no ambiguity
2152             // in that case.
2153             if (subtag.length() == 1 && !parsingPrivateUse) {
2154                 // Emit the last extension we encountered if any. First check
2155                 // whether we encountered two keys in a row (which is an error).
2156                 // Also checks if we already have an extension with the same key,
2157                 // which is again an error.
2158                 if (extensionKeyIndex != -1) {
2159                     if ((i - 1) == extensionKeyIndex) {
2160                         return extensionKeyIndex;
2161                     }
2162 
2163                     final String key = subtags[extensionKeyIndex].toLowerCase(Locale.ROOT);
2164                     if (extensions.containsKey(key.charAt(0))) {
2165                         return extensionKeyIndex;
2166                     }
2167 
2168                     final String value = concatenateRange(subtags, extensionKeyIndex + 1, i);
2169                     extensions.put(key.charAt(0), value.toLowerCase(Locale.ROOT));
2170                 }
2171 
2172                 // Mark the start of the next extension. Also keep track of whether this
2173                 // is a private use extension, and throw an error if it doesn't come last.
2174                 extensionKeyIndex = i;
2175                 if ("x".equals(subtag.toLowerCase(Locale.ROOT))) {
2176                     privateUseExtensionIndex = i;
2177                 } else if (privateUseExtensionIndex != -1) {
2178                     // The private use extension must come last.
2179                     return privateUseExtensionIndex;
2180                 }
2181             } else if (extensionKeyIndex != -1) {
2182                 // We must have encountered a valid key in order to start parsing
2183                 // its subtags.
2184                 if (!isValidBcp47Alphanum(subtag, parsingPrivateUse ? 1 : 2, 8)) {
2185                     return i;
2186                 }
2187             } else {
2188                 // Encountered a value without a preceding key.
2189                 return i;
2190             }
2191         }
2192 
2193         if (extensionKeyIndex != -1) {
2194             if ((i - 1) == extensionKeyIndex) {
2195                 return extensionKeyIndex;
2196             }
2197 
2198             final String key = subtags[extensionKeyIndex].toLowerCase(Locale.ROOT);
2199             if (extensions.containsKey(key.charAt(0))) {
2200                 return extensionKeyIndex;
2201             }
2202 
2203             final String value = concatenateRange(subtags, extensionKeyIndex + 1, i);
2204             extensions.put(key.charAt(0), value.toLowerCase(Locale.ROOT));
2205         }
2206 
2207         return i;
2208     }
2209 
forLanguageTag( String tag, boolean strict)2210     private static Locale forLanguageTag(/* @Nonnull */ String tag, boolean strict) {
2211         final String converted = convertGrandfatheredTag(tag);
2212         final String[] subtags = converted.split("-");
2213 
2214         int lastSubtag = subtags.length;
2215         for (int i = 0; i < subtags.length; ++i) {
2216             final String subtag = subtags[i];
2217             if (subtag.isEmpty() || subtag.length() > 8) {
2218                 if (strict) {
2219                     throw new IllformedLocaleException("Invalid subtag at index: " + i
2220                             + " in tag: " + tag);
2221                 } else {
2222                     lastSubtag = (i - 1);
2223                 }
2224 
2225                 break;
2226             }
2227         }
2228 
2229         final String languageCode = Builder.normalizeAndValidateLanguage(subtags[0], strict);
2230         String scriptCode = "";
2231         int nextSubtag = 1;
2232         if (lastSubtag > nextSubtag) {
2233             scriptCode = Builder.normalizeAndValidateScript(subtags[nextSubtag], false /* strict */);
2234             if (!scriptCode.isEmpty()) {
2235                 nextSubtag++;
2236             }
2237         }
2238 
2239         String regionCode = "";
2240         if (lastSubtag > nextSubtag) {
2241             regionCode = Builder.normalizeAndValidateRegion(subtags[nextSubtag], false /* strict */);
2242             if (!regionCode.isEmpty()) {
2243                 nextSubtag++;
2244             }
2245         }
2246 
2247         List<String> variants = null;
2248         if (lastSubtag > nextSubtag) {
2249             variants = new ArrayList<String>();
2250             extractVariantSubtags(subtags, nextSubtag, lastSubtag, variants);
2251             nextSubtag += variants.size();
2252         }
2253 
2254         Map<Character, String> extensions = Collections.EMPTY_MAP;
2255         if (lastSubtag > nextSubtag) {
2256             extensions = new TreeMap<Character, String>();
2257             nextSubtag = extractExtensions(subtags, nextSubtag, lastSubtag, extensions);
2258         }
2259 
2260         if (nextSubtag != lastSubtag) {
2261             if (strict) {
2262                 throw new IllformedLocaleException("Unparseable subtag: " + subtags[nextSubtag]
2263                         + " from language tag: " + tag);
2264             }
2265         }
2266 
2267         Set<String> unicodeKeywords = Collections.EMPTY_SET;
2268         Map<String, String> unicodeAttributes = Collections.EMPTY_MAP;
2269         if (extensions.containsKey(UNICODE_LOCALE_EXTENSION)) {
2270             unicodeKeywords = new TreeSet<String>();
2271             unicodeAttributes = new TreeMap<String, String>();
2272             parseUnicodeExtension(extensions.get(UNICODE_LOCALE_EXTENSION).split("-"),
2273                     unicodeAttributes, unicodeKeywords);
2274         }
2275 
2276         String variantCode = "";
2277         if (variants != null && !variants.isEmpty()) {
2278             StringBuilder variantsBuilder = new StringBuilder(variants.size() * 8);
2279             for (int i = 0; i < variants.size(); ++i) {
2280                 if (i != 0) {
2281                     variantsBuilder.append('_');
2282                 }
2283                 variantsBuilder.append(variants.get(i));
2284             }
2285             variantCode = variantsBuilder.toString();
2286         }
2287 
2288         return new Locale(languageCode, regionCode, variantCode, scriptCode,
2289                 unicodeKeywords, unicodeAttributes, extensions, true /* has validated fields */);
2290     }
2291 }
2292