1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package java.util; 19 20 import java.io.IOException; 21 import java.io.ObjectInputStream; 22 import java.io.ObjectOutputStream; 23 import java.io.ObjectStreamField; 24 import java.io.Serializable; 25 import java.nio.charset.StandardCharsets; 26 import libcore.icu.ICU; 27 28 /** 29 * {@code Locale} represents a language/country/variant combination. Locales are used to 30 * alter the presentation of information such as numbers or dates to suit the conventions 31 * in the region they describe. 32 * 33 * <p>The language codes are two-letter lowercase ISO language codes (such as "en") as defined by 34 * <a href="http://en.wikipedia.org/wiki/ISO_639-1">ISO 639-1</a>. 35 * The country codes are two-letter uppercase ISO country codes (such as "US") as defined by 36 * <a href="http://en.wikipedia.org/wiki/ISO_3166-1_alpha-3">ISO 3166-1</a>. 37 * The variant codes are unspecified. 38 * 39 * <p>Note that Java uses several deprecated two-letter codes. The Hebrew ("he") language 40 * code is rewritten as "iw", Indonesian ("id") as "in", and Yiddish ("yi") as "ji". This 41 * rewriting happens even if you construct your own {@code Locale} object, not just for 42 * instances returned by the various lookup methods. 43 * 44 * <a name="available_locales"></a><h3>Available locales</h3> 45 * <p>This class' constructors do no error checking. You can create a {@code Locale} for languages 46 * and countries that don't exist, and you can create instances for combinations that don't 47 * exist (such as "de_US" for "German as spoken in the US"). 48 * 49 * <p>Note that locale data is not necessarily available for any of the locales pre-defined as 50 * constants in this class except for en_US, which is the only locale Java guarantees is always 51 * available. 52 * 53 * <p>It is also a mistake to assume that all devices have the same locales available. 54 * A device sold in the US will almost certainly support en_US and es_US, but not necessarily 55 * any locales with the same language but different countries (such as en_GB or es_ES), 56 * nor any locales for other languages (such as de_DE). The opposite may well be true for a device 57 * sold in Europe. 58 * 59 * <p>You can use {@link Locale#getDefault} to get an appropriate locale for the <i>user</i> of the 60 * device you're running on, or {@link Locale#getAvailableLocales} to get a list of all the locales 61 * available on the device you're running on. 62 * 63 * <a name="locale_data"></a><h3>Locale data</h3> 64 * <p>Note that locale data comes solely from ICU. User-supplied locale service providers (using 65 * the {@code java.text.spi} or {@code java.util.spi} mechanisms) are not supported. 66 * 67 * <p>Here are the versions of ICU (and the corresponding CLDR and Unicode versions) used in 68 * various Android releases: 69 * <table BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY=""> 70 * <tr><td>Android 1.5 (Cupcake)/Android 1.6 (Donut)/Android 2.0 (Eclair)</td> 71 * <td>ICU 3.8</td> 72 * <td><a href="http://cldr.unicode.org/index/downloads/cldr-1-5">CLDR 1.5</a></td> 73 * <td><a href="http://www.unicode.org/versions/Unicode5.0.0/">Unicode 5.0</a></td></tr> 74 * <tr><td>Android 2.2 (Froyo)</td> 75 * <td>ICU 4.2</td> 76 * <td><a href="http://cldr.unicode.org/index/downloads/cldr-1-7">CLDR 1.7</a></td> 77 * <td><a href="http://www.unicode.org/versions/Unicode5.1.0/">Unicode 5.1</a></td></tr> 78 * <tr><td>Android 2.3 (Gingerbread)/Android 3.0 (Honeycomb)</td> 79 * <td>ICU 4.4</td> 80 * <td><a href="http://cldr.unicode.org/index/downloads/cldr-1-8">CLDR 1.8</a></td> 81 * <td><a href="http://www.unicode.org/versions/Unicode5.2.0/">Unicode 5.2</a></td></tr> 82 * <tr><td>Android 4.0 (Ice Cream Sandwich)</td> 83 * <td><a href="http://site.icu-project.org/download/46">ICU 4.6</a></td> 84 * <td><a href="http://cldr.unicode.org/index/downloads/cldr-1-9">CLDR 1.9</a></td> 85 * <td><a href="http://www.unicode.org/versions/Unicode6.0.0/">Unicode 6.0</a></td></tr> 86 * <tr><td>Android 4.1 (Jelly Bean)</td> 87 * <td><a href="http://site.icu-project.org/download/48">ICU 4.8</a></td> 88 * <td><a href="http://cldr.unicode.org/index/downloads/cldr-2-0">CLDR 2.0</a></td> 89 * <td><a href="http://www.unicode.org/versions/Unicode6.0.0/">Unicode 6.0</a></td></tr> 90 * <tr><td>Android 4.3 (Jelly Bean MR2)</td> 91 * <td><a href="http://site.icu-project.org/download/50">ICU 50</a></td> 92 * <td><a href="http://cldr.unicode.org/index/downloads/cldr-22-1">CLDR 22.1</a></td> 93 * <td><a href="http://www.unicode.org/versions/Unicode6.2.0/">Unicode 6.2</a></td></tr> 94 * <tr><td>Android 4.4 (KitKat)</td> 95 * <td><a href="http://site.icu-project.org/download/51">ICU 51</a></td> 96 * <td><a href="http://cldr.unicode.org/index/downloads/cldr-23">CLDR 23</a></td> 97 * <td><a href="http://www.unicode.org/versions/Unicode6.2.0/">Unicode 6.2</a></td></tr> 98 * <tr><td>Android 5.0 (Lollipop)</td> 99 * <td><a href="http://site.icu-project.org/download/53">ICU 53</a></td> 100 * <td><a href="http://cldr.unicode.org/index/downloads/cldr-25">CLDR 25</a></td> 101 * <td><a href="http://www.unicode.org/versions/Unicode6.3.0/">Unicode 6.3</a></td></tr> 102 * <tr><td>Android 6.0 (Marshmallow)</td> 103 * <td><a href="http://site.icu-project.org/download/55">ICU 55.1</a></td> 104 * <td><a href="http://cldr.unicode.org/index/downloads/cldr-27">CLDR 27.0.1</a></td> 105 * <td><a href="http://www.unicode.org/versions/Unicode7.0.0/">Unicode 7.0</a></td></tr> 106 * </table> 107 * 108 * <a name="default_locale"></a><h3>Be wary of the default locale</h3> 109 * <p>Note that there are many convenience methods that automatically use the default locale, but 110 * using them may lead to subtle bugs. 111 * 112 * <p>The default locale is appropriate for tasks that involve presenting data to the user. In 113 * this case, you want to use the user's date/time formats, number 114 * formats, rules for conversion to lowercase, and so on. In this case, it's safe to use the 115 * convenience methods. 116 * 117 * <p>The default locale is <i>not</i> appropriate for machine-readable output. The best choice 118 * there is usually {@code Locale.US} – this locale is guaranteed to be available on all 119 * devices, and the fact that it has no surprising special cases and is frequently used (especially 120 * for computer-computer communication) means that it tends to be the most efficient choice too. 121 * 122 * <p>A common mistake is to implicitly use the default locale when producing output meant to be 123 * machine-readable. This tends to work on the developer's test devices (especially because so many 124 * developers use en_US), but fails when run on a device whose user is in a more complex locale. 125 * 126 * <p>For example, if you're formatting integers some locales will use non-ASCII decimal 127 * digits. As another example, if you're formatting floating-point numbers some locales will use 128 * {@code ','} as the decimal point and {@code '.'} for digit grouping. That's correct for 129 * human-readable output, but likely to cause problems if presented to another 130 * computer ({@link Double#parseDouble} can't parse such a number, for example). 131 * You should also be wary of the {@link String#toLowerCase} and 132 * {@link String#toUpperCase} overloads that don't take a {@code Locale}: in Turkey, for example, 133 * the characters {@code 'i'} and {@code 'I'} won't be converted to {@code 'I'} and {@code 'i'}. 134 * This is the correct behavior for Turkish text (such as user input), but inappropriate for, say, 135 * HTTP headers. 136 */ 137 public final class Locale implements Cloneable, Serializable { 138 139 private static final long serialVersionUID = 9149081749638150636L; 140 141 /** 142 * Locale constant for en_CA. 143 */ 144 public static final Locale CANADA = new Locale(true, "en", "CA"); 145 146 /** 147 * Locale constant for fr_CA. 148 */ 149 public static final Locale CANADA_FRENCH = new Locale(true, "fr", "CA"); 150 151 /** 152 * Locale constant for zh_CN. 153 */ 154 public static final Locale CHINA = new Locale(true, "zh", "CN"); 155 156 /** 157 * Locale constant for zh. 158 */ 159 public static final Locale CHINESE = new Locale(true, "zh", ""); 160 161 /** 162 * Locale constant for en. 163 */ 164 public static final Locale ENGLISH = new Locale(true, "en", ""); 165 166 /** 167 * Locale constant for fr_FR. 168 */ 169 public static final Locale FRANCE = new Locale(true, "fr", "FR"); 170 171 /** 172 * Locale constant for fr. 173 */ 174 public static final Locale FRENCH = new Locale(true, "fr", ""); 175 176 /** 177 * Locale constant for de. 178 */ 179 public static final Locale GERMAN = new Locale(true, "de", ""); 180 181 /** 182 * Locale constant for de_DE. 183 */ 184 public static final Locale GERMANY = new Locale(true, "de", "DE"); 185 186 /** 187 * Locale constant for it. 188 */ 189 public static final Locale ITALIAN = new Locale(true, "it", ""); 190 191 /** 192 * Locale constant for it_IT. 193 */ 194 public static final Locale ITALY = new Locale(true, "it", "IT"); 195 196 /** 197 * Locale constant for ja_JP. 198 */ 199 public static final Locale JAPAN = new Locale(true, "ja", "JP"); 200 201 /** 202 * Locale constant for ja. 203 */ 204 public static final Locale JAPANESE = new Locale(true, "ja", ""); 205 206 /** 207 * Locale constant for ko_KR. 208 */ 209 public static final Locale KOREA = new Locale(true, "ko", "KR"); 210 211 /** 212 * Locale constant for ko. 213 */ 214 public static final Locale KOREAN = new Locale(true, "ko", ""); 215 216 /** 217 * Locale constant for zh_CN. 218 */ 219 public static final Locale PRC = new Locale(true, "zh", "CN"); 220 221 /** 222 * Locale constant for the root locale. The root locale has an empty language, 223 * country, and variant. 224 * 225 * @since 1.6 226 */ 227 public static final Locale ROOT = new Locale(true, "", ""); 228 229 /** 230 * Locale constant for zh_CN. 231 */ 232 public static final Locale SIMPLIFIED_CHINESE = new Locale(true, "zh", "CN"); 233 234 /** 235 * Locale constant for zh_TW. 236 */ 237 public static final Locale TAIWAN = new Locale(true, "zh", "TW"); 238 239 /** 240 * Locale constant for zh_TW. 241 */ 242 public static final Locale TRADITIONAL_CHINESE = new Locale(true, "zh", "TW"); 243 244 /** 245 * Locale constant for en_GB. 246 */ 247 public static final Locale UK = new Locale(true, "en", "GB"); 248 249 /** 250 * Locale constant for en_US. 251 */ 252 public static final Locale US = new Locale(true, "en", "US"); 253 254 /** 255 * BCP-47 extension identifier (or "singleton") for the private 256 * use extension. 257 * 258 * See {@link #getExtension(char)} and {@link Builder#setExtension(char, String)}. 259 * 260 * @since 1.7 261 */ 262 public static final char PRIVATE_USE_EXTENSION = 'x'; 263 264 /** 265 * BCP-47 extension identifier (or "singleton") for the unicode locale extension. 266 * 267 * 268 * See {@link #getExtension(char)} and {@link Builder#setExtension(char, String)}. 269 * 270 * @since 1.7 271 */ 272 public static final char UNICODE_LOCALE_EXTENSION = 'u'; 273 274 /** 275 * ISO 639-3 generic code for undetermined languages. 276 */ 277 private static final String UNDETERMINED_LANGUAGE = "und"; 278 279 280 /** 281 * Map of grandfathered language tags to their modern replacements. 282 */ 283 private static final TreeMap<String, String> GRANDFATHERED_LOCALES; 284 285 static { 286 GRANDFATHERED_LOCALES = new TreeMap<String, String>(String.CASE_INSENSITIVE_ORDER); 287 288 // From http://tools.ietf.org/html/bcp47 289 // 290 // grandfathered = irregular ; non-redundant tags registered 291 // / regular ; during the RFC 3066 era 292 // irregular = 293 GRANDFATHERED_LOCALES.put("en-GB-oed", "en-GB-x-oed"); 294 GRANDFATHERED_LOCALES.put("i-ami", "ami"); 295 GRANDFATHERED_LOCALES.put("i-bnn", "bnn"); 296 GRANDFATHERED_LOCALES.put("i-default", "en-x-i-default"); 297 GRANDFATHERED_LOCALES.put("i-enochian", "und-x-i-enochian"); 298 GRANDFATHERED_LOCALES.put("i-hak", "hak"); 299 GRANDFATHERED_LOCALES.put("i-klingon", "tlh"); 300 GRANDFATHERED_LOCALES.put("i-lux", "lb"); 301 GRANDFATHERED_LOCALES.put("i-mingo", "see-x-i-mingo"); 302 GRANDFATHERED_LOCALES.put("i-navajo", "nv"); 303 GRANDFATHERED_LOCALES.put("i-pwn", "pwn"); 304 GRANDFATHERED_LOCALES.put("i-tao", "tao"); 305 GRANDFATHERED_LOCALES.put("i-tay", "tay"); 306 GRANDFATHERED_LOCALES.put("i-tsu", "tsu"); 307 GRANDFATHERED_LOCALES.put("sgn-BE-FR", "sfb"); 308 GRANDFATHERED_LOCALES.put("sgn-BE-NL", "vgt"); 309 GRANDFATHERED_LOCALES.put("sgn-CH-DE", "sgg"); 310 311 // regular = 312 GRANDFATHERED_LOCALES.put("art-lojban", "jbo"); 313 GRANDFATHERED_LOCALES.put("cel-gaulish", "xtg-x-cel-gaulish"); 314 GRANDFATHERED_LOCALES.put("no-bok", "nb"); 315 GRANDFATHERED_LOCALES.put("no-nyn", "nn"); 316 GRANDFATHERED_LOCALES.put("zh-guoyu", "cmn"); 317 GRANDFATHERED_LOCALES.put("zh-hakka", "hak"); 318 GRANDFATHERED_LOCALES.put("zh-min", "nan-x-zh-min"); 319 GRANDFATHERED_LOCALES.put("zh-min-nan", "nan"); 320 GRANDFATHERED_LOCALES.put("zh-xiang", "hsn"); 321 } 322 323 private static class NoImagePreloadHolder { 324 /** 325 * The default locale, returned by {@code Locale.getDefault()}. 326 * Initialize the default locale from the system properties. 327 */ 328 private static Locale defaultLocale = Locale.getDefaultLocaleFromSystemProperties(); 329 } 330 331 /** 332 * Returns the default locale from system properties. 333 * 334 * @hide visible for testing. 335 */ getDefaultLocaleFromSystemProperties()336 public static Locale getDefaultLocaleFromSystemProperties() { 337 final String languageTag = System.getProperty("user.locale", ""); 338 339 final Locale defaultLocale; 340 if (!languageTag.isEmpty()) { 341 defaultLocale = Locale.forLanguageTag(languageTag); 342 } else { 343 String language = System.getProperty("user.language", "en"); 344 String region = System.getProperty("user.region", "US"); 345 String variant = System.getProperty("user.variant", ""); 346 defaultLocale = new Locale(language, region, variant); 347 } 348 349 return defaultLocale; 350 } 351 352 /** 353 * A class that helps construct {@link Locale} instances. 354 * 355 * Unlike the public {@code Locale} constructors, the methods of this class 356 * perform much stricter checks on their input. 357 * 358 * Validity checks on the {@code language}, {@code country}, {@code variant} 359 * and {@code extension} values are carried out as per the 360 * <a href="https://tools.ietf.org/html/bcp47">BCP-47</a> specification. 361 * 362 * In addition, we treat the <a href="http://www.unicode.org/reports/tr35/"> 363 * Unicode locale extension</a> specially and provide methods to manipulate 364 * the structured state (keywords and attributes) specified therein. 365 * 366 * @since 1.7 367 */ 368 public static final class Builder { 369 private String language; 370 private String region; 371 private String variant; 372 private String script; 373 374 private final Set<String> attributes; 375 private final Map<String, String> keywords; 376 private final Map<Character, String> extensions; 377 Builder()378 public Builder() { 379 language = region = variant = script = ""; 380 381 // NOTE: We use sorted maps in the builder & the locale class itself 382 // because serialized forms of the unicode locale extension (and 383 // of the extension map itself) are specified to be in alphabetic 384 // order of keys. 385 attributes = new TreeSet<String>(); 386 keywords = new TreeMap<String, String>(); 387 extensions = new TreeMap<Character, String>(); 388 } 389 390 /** 391 * Sets the locale language. If {@code language} is {@code null} or empty, the 392 * previous value is cleared. 393 * 394 * As per BCP-47, the language must be between 2 and 3 ASCII characters 395 * in length and must only contain characters in the range {@code [a-zA-Z]}. 396 * 397 * This value is usually an <a href="http://www.loc.gov/standards/iso639-2/"> 398 * ISO-639-2</a> alpha-2 or alpha-3 code, though no explicit checks are 399 * carried out that it's a valid code in that namespace. 400 * 401 * Values are normalized to lower case. 402 * 403 * Note that we don't support BCP-47 "extlang" languages because they were 404 * only ever used to substitute for a lack of 3 letter language codes. 405 * 406 * @throws IllformedLocaleException if the language was invalid. 407 */ setLanguage(String language)408 public Builder setLanguage(String language) { 409 this.language = normalizeAndValidateLanguage(language, true /* strict */); 410 return this; 411 } 412 normalizeAndValidateLanguage(String language, boolean strict)413 private static String normalizeAndValidateLanguage(String language, boolean strict) { 414 if (language == null || language.isEmpty()) { 415 return ""; 416 } 417 418 final String lowercaseLanguage = language.toLowerCase(Locale.ROOT); 419 if (!isValidBcp47Alpha(lowercaseLanguage, 2, 3)) { 420 if (strict) { 421 throw new IllformedLocaleException("Invalid language: " + language); 422 } else { 423 return UNDETERMINED_LANGUAGE; 424 } 425 } 426 427 return lowercaseLanguage; 428 } 429 430 /** 431 * Set the state of this builder to the parsed contents of the BCP-47 language 432 * tag {@code languageTag}. 433 * 434 * This method is equivalent to a call to {@link #clear} if {@code languageTag} 435 * is {@code null} or empty. 436 * 437 * <b>NOTE:</b> In contrast to {@link Locale#forLanguageTag(String)}, which 438 * simply ignores malformed input, this method will throw an exception if 439 * its input is malformed. 440 * 441 * @throws IllformedLocaleException if {@code languageTag} is not a well formed 442 * BCP-47 tag. 443 */ setLanguageTag(String languageTag)444 public Builder setLanguageTag(String languageTag) { 445 if (languageTag == null || languageTag.isEmpty()) { 446 clear(); 447 return this; 448 } 449 450 final Locale fromIcu = forLanguageTag(languageTag, true /* strict */); 451 // When we ask ICU for strict parsing, it might return a null locale 452 // if the language tag is malformed. 453 if (fromIcu == null) { 454 throw new IllformedLocaleException("Invalid languageTag: " + languageTag); 455 } 456 457 setLocale(fromIcu); 458 return this; 459 } 460 461 /** 462 * Sets the locale region. If {@code region} is {@code null} or empty, the 463 * previous value is cleared. 464 * 465 * As per BCP-47, the region must either be a 2 character ISO-3166-1 code 466 * (each character in the range [a-zA-Z]) OR a 3 digit UN M.49 code. 467 * 468 * Values are normalized to upper case. 469 * 470 * @throws IllformedLocaleException if {@code} region is invalid. 471 */ setRegion(String region)472 public Builder setRegion(String region) { 473 this.region = normalizeAndValidateRegion(region, true /* strict */); 474 return this; 475 } 476 normalizeAndValidateRegion(String region, boolean strict)477 private static String normalizeAndValidateRegion(String region, boolean strict) { 478 if (region == null || region.isEmpty()) { 479 return ""; 480 } 481 482 final String uppercaseRegion = region.toUpperCase(Locale.ROOT); 483 if (!isValidBcp47Alpha(uppercaseRegion, 2, 2) && 484 !isUnM49AreaCode(uppercaseRegion)) { 485 if (strict) { 486 throw new IllformedLocaleException("Invalid region: " + region); 487 } else { 488 return ""; 489 } 490 } 491 492 return uppercaseRegion; 493 } 494 495 /** 496 * Sets the locale variant. If {@code variant} is {@code null} or empty, 497 * the previous value is cleared. 498 * 499 * The input string my consist of one or more variants separated by 500 * valid separators ('-' or '_'). 501 * 502 * As per BCP-47, each variant must be between 5 and 8 alphanumeric characters 503 * in length (each character in the range {@code [a-zA-Z0-9]}) but 504 * can be exactly 4 characters in length if the first character is a digit. 505 * 506 * Note that this is a much stricter interpretation of {@code variant} 507 * than the public {@code Locale} constructors. The latter allowed free form 508 * variants. 509 * 510 * Variants are case sensitive and all separators are normalized to {@code '_'}. 511 * 512 * @throws IllformedLocaleException if {@code} variant is invalid. 513 */ setVariant(String variant)514 public Builder setVariant(String variant) { 515 this.variant = normalizeAndValidateVariant(variant); 516 return this; 517 } 518 normalizeAndValidateVariant(String variant)519 private static String normalizeAndValidateVariant(String variant) { 520 if (variant == null || variant.isEmpty()) { 521 return ""; 522 } 523 524 // Note that unlike extensions, we canonicalize to lower case alphabets 525 // and underscores instead of hyphens. 526 final String normalizedVariant = variant.replace('-', '_'); 527 String[] subTags = normalizedVariant.split("_"); 528 529 for (String subTag : subTags) { 530 if (!isValidVariantSubtag(subTag)) { 531 throw new IllformedLocaleException("Invalid variant: " + variant); 532 } 533 } 534 535 return normalizedVariant; 536 } 537 isValidVariantSubtag(String subTag)538 private static boolean isValidVariantSubtag(String subTag) { 539 // The BCP-47 spec states that : 540 // - Subtags can be between [5, 8] alphanumeric chars in length. 541 // - Subtags that start with a number are allowed to be 4 chars in length. 542 if (subTag.length() >= 5 && subTag.length() <= 8) { 543 if (isAsciiAlphaNum(subTag)) { 544 return true; 545 } 546 } else if (subTag.length() == 4) { 547 final char firstChar = subTag.charAt(0); 548 if ((firstChar >= '0' && firstChar <= '9') && isAsciiAlphaNum(subTag)) { 549 return true; 550 } 551 } 552 553 return false; 554 } 555 556 /** 557 * Sets the locale script. If {@code script} is {@code null} or empty, 558 * the previous value is cleared. 559 * 560 * As per BCP-47, the script must be 4 characters in length, and 561 * each character in the range {@code [a-zA-Z]}. 562 * 563 * A script usually represents a valid ISO 15924 script code, though no 564 * other registry or validity checks are performed. 565 * 566 * Scripts are normalized to title cased values. 567 * 568 * @throws IllformedLocaleException if {@code script} is invalid. 569 */ setScript(String script)570 public Builder setScript(String script) { 571 this.script = normalizeAndValidateScript(script, true /* strict */); 572 return this; 573 } 574 normalizeAndValidateScript(String script, boolean strict)575 private static String normalizeAndValidateScript(String script, boolean strict) { 576 if (script == null || script.isEmpty()) { 577 return ""; 578 } 579 580 if (!isValidBcp47Alpha(script, 4, 4)) { 581 if (strict) { 582 throw new IllformedLocaleException("Invalid script: " + script); 583 } else { 584 return ""; 585 } 586 } 587 588 return titleCaseAsciiWord(script); 589 } 590 591 /** 592 * Sets the state of the builder to the {@link Locale} represented by 593 * {@code locale}. 594 * 595 * Note that the locale's language, region and variant are validated as per 596 * the rules specified in {@link #setLanguage}, {@link #setRegion} and 597 * {@link #setVariant}. 598 * 599 * All existing builder state is discarded. 600 * 601 * @throws IllformedLocaleException if {@code locale} is invalid. 602 * @throws NullPointerException if {@code locale} is null. 603 */ setLocale(Locale locale)604 public Builder setLocale(Locale locale) { 605 if (locale == null) { 606 throw new NullPointerException("locale == null"); 607 } 608 609 // Make copies of the existing values so that we don't partially 610 // update the state if we encounter an error. 611 final String backupLanguage = language; 612 final String backupRegion = region; 613 final String backupVariant = variant; 614 615 try { 616 setLanguage(locale.getLanguage()); 617 setRegion(locale.getCountry()); 618 setVariant(locale.getVariant()); 619 } catch (IllformedLocaleException ifle) { 620 language = backupLanguage; 621 region = backupRegion; 622 variant = backupVariant; 623 624 throw ifle; 625 } 626 627 // The following values can be set only via the builder class, so 628 // there's no need to normalize them or check their validity. 629 630 this.script = locale.getScript(); 631 632 extensions.clear(); 633 extensions.putAll(locale.extensions); 634 635 keywords.clear(); 636 keywords.putAll(locale.unicodeKeywords); 637 638 attributes.clear(); 639 attributes.addAll(locale.unicodeAttributes); 640 641 return this; 642 } 643 644 /** 645 * Adds the specified attribute to the list of attributes in the unicode 646 * locale extension. 647 * 648 * Attributes must be between 3 and 8 characters in length, and each character 649 * must be in the range {@code [a-zA-Z0-9]}. 650 * 651 * Attributes are normalized to lower case values. All added attributes and 652 * keywords are combined to form a complete unicode locale extension on 653 * {@link Locale} objects built by this builder, and accessible via 654 * {@link Locale#getExtension(char)} with the {@link Locale#UNICODE_LOCALE_EXTENSION} 655 * key. 656 * 657 * @throws IllformedLocaleException if {@code attribute} is invalid. 658 * @throws NullPointerException if {@code attribute} is null. 659 */ addUnicodeLocaleAttribute(String attribute)660 public Builder addUnicodeLocaleAttribute(String attribute) { 661 if (attribute == null) { 662 throw new NullPointerException("attribute == null"); 663 } 664 665 final String lowercaseAttribute = attribute.toLowerCase(Locale.ROOT); 666 if (!isValidBcp47Alphanum(lowercaseAttribute, 3, 8)) { 667 throw new IllformedLocaleException("Invalid locale attribute: " + attribute); 668 } 669 670 attributes.add(lowercaseAttribute); 671 672 return this; 673 } 674 675 /** 676 * Removes an attribute from the list of attributes in the unicode locale 677 * extension. 678 * 679 * {@code attribute} must be valid as per the rules specified in 680 * {@link #addUnicodeLocaleAttribute}. 681 * 682 * This method has no effect if {@code attribute} hasn't already been 683 * added. 684 * 685 * @throws IllformedLocaleException if {@code attribute} is invalid. 686 * @throws NullPointerException if {@code attribute} is null. 687 */ removeUnicodeLocaleAttribute(String attribute)688 public Builder removeUnicodeLocaleAttribute(String attribute) { 689 if (attribute == null) { 690 throw new NullPointerException("attribute == null"); 691 } 692 693 // Weirdly, remove is specified to check whether the attribute 694 // is valid, so we have to perform the full alphanumeric check here. 695 final String lowercaseAttribute = attribute.toLowerCase(Locale.ROOT); 696 if (!isValidBcp47Alphanum(lowercaseAttribute, 3, 8)) { 697 throw new IllformedLocaleException("Invalid locale attribute: " + attribute); 698 } 699 700 attributes.remove(attribute); 701 return this; 702 } 703 704 /** 705 * Sets the extension identified by {@code key} to {@code value}. 706 * 707 * {@code key} must be in the range {@code [a-zA-Z0-9]}. 708 * 709 * If {@code value} is {@code null} or empty, the extension is removed. 710 * 711 * In the general case, {@code value} must be a series of subtags separated 712 * by ({@code "-"} or {@code "_"}). Each subtag must be between 713 * 2 and 8 characters in length, and each character in the subtag must be in 714 * the range {@code [a-zA-Z0-9]}. 715 * 716 * <p> 717 * There are two special cases : 718 * <li> 719 * <ul> 720 * The unicode locale extension 721 * ({@code key == 'u'}, {@link Locale#UNICODE_LOCALE_EXTENSION}) : Setting 722 * the unicode locale extension results in all existing keyword and attribute 723 * state being replaced by the parsed result of {@code value}. For example, 724 * {@code builder.setExtension('u', "baaaz-baaar-fo-baar-ba-baaz")} 725 * is equivalent to: 726 * <pre> 727 * builder.addUnicodeLocaleAttribute("baaaz"); 728 * builder.addUnicodeLocaleAttribute("baaar"); 729 * builder.setUnicodeLocaleKeyword("fo", "baar"); 730 * builder.setUnicodeLocaleKeyword("ba", "baaa"); 731 * </pre> 732 * </ul> 733 * <ul> 734 * The private use extension 735 * ({@code key == 'x'}, {@link Locale#PRIVATE_USE_EXTENSION}) : Each subtag in a 736 * private use extension can be between 1 and 8 characters in length (in contrast 737 * to a minimum length of 2 for all other extensions). 738 * </ul> 739 * </li> 740 * 741 * @throws IllformedLocaleException if {@code value} is invalid. 742 */ setExtension(char key, String value)743 public Builder setExtension(char key, String value) { 744 if (value == null || value.isEmpty()) { 745 extensions.remove(key); 746 return this; 747 } 748 749 final String normalizedValue = value.toLowerCase(Locale.ROOT).replace('_', '-'); 750 final String[] subtags = normalizedValue.split("-"); 751 final char normalizedKey = Character.toLowerCase(key); 752 753 // Lengths for subtags in the private use extension should be [1, 8] chars. 754 // For all other extensions, they should be [2, 8] chars. 755 // 756 // http://www.rfc-editor.org/rfc/bcp/bcp47.txt 757 final int minimumLength = (normalizedKey == PRIVATE_USE_EXTENSION) ? 1 : 2; 758 for (String subtag : subtags) { 759 if (!isValidBcp47Alphanum(subtag, minimumLength, 8)) { 760 throw new IllformedLocaleException( 761 "Invalid private use extension : " + value); 762 } 763 } 764 765 // We need to take special action in the case of unicode extensions, 766 // since we claim to understand their keywords and attributes. 767 if (normalizedKey == UNICODE_LOCALE_EXTENSION) { 768 // First clear existing attributes and keywords. 769 extensions.clear(); 770 attributes.clear(); 771 772 parseUnicodeExtension(subtags, keywords, attributes); 773 } else { 774 extensions.put(normalizedKey, normalizedValue); 775 } 776 777 return this; 778 } 779 780 /** 781 * Clears all extensions from this builder. Note that this also implicitly 782 * clears all state related to the unicode locale extension; all attributes 783 * and keywords set by {@link #addUnicodeLocaleAttribute} and 784 * {@link #setUnicodeLocaleKeyword} are cleared. 785 */ clearExtensions()786 public Builder clearExtensions() { 787 extensions.clear(); 788 attributes.clear(); 789 keywords.clear(); 790 return this; 791 } 792 793 /** 794 * Adds a key / type pair to the list of unicode locale extension keys. 795 * 796 * {@code key} must be 2 characters in length, and each character must be 797 * in the range {@code [a-zA-Z0-9]}. 798 * 799 * {#code type} can either be empty, or a series of one or more subtags 800 * separated by a separator ({@code "-"} or {@code "_"}). Each subtag must 801 * be between 3 and 8 characters in length and each character in the subtag 802 * must be in the range {@code [a-zA-Z0-9]}. 803 * 804 * Note that the type is normalized to lower case, and all separators 805 * are normalized to {@code "-"}. All added attributes and 806 * keywords are combined to form a complete unicode locale extension on 807 * {@link Locale} objects built by this builder, and accessible via 808 * {@link Locale#getExtension(char)} with the {@link Locale#UNICODE_LOCALE_EXTENSION} 809 * key. 810 * 811 * @throws IllformedLocaleException if {@code key} or {@code value} are 812 * invalid. 813 */ setUnicodeLocaleKeyword(String key, String type)814 public Builder setUnicodeLocaleKeyword(String key, String type) { 815 if (key == null) { 816 throw new NullPointerException("key == null"); 817 } 818 819 if (type == null && keywords != null) { 820 keywords.remove(key); 821 return this; 822 } 823 824 final String lowerCaseKey = key.toLowerCase(Locale.ROOT); 825 // The key must be exactly two alphanumeric characters. 826 if (lowerCaseKey.length() != 2 || !isAsciiAlphaNum(lowerCaseKey)) { 827 throw new IllformedLocaleException("Invalid unicode locale keyword: " + key); 828 } 829 830 // The type can be one or more alphanumeric strings of length [3, 8] characters, 831 // separated by a separator char, which is one of "_" or "-". Though the spec 832 // doesn't require it, we normalize all "_" to "-" to make the rest of our 833 // processing easier. 834 final String lowerCaseType = type.toLowerCase(Locale.ROOT).replace("_", "-"); 835 if (!isValidTypeList(lowerCaseType)) { 836 throw new IllformedLocaleException("Invalid unicode locale type: " + type); 837 } 838 839 // Everything checks out fine, add the <key, type> mapping to the list. 840 keywords.put(lowerCaseKey, lowerCaseType); 841 842 return this; 843 } 844 845 /** 846 * Clears all existing state from this builder. 847 */ clear()848 public Builder clear() { 849 clearExtensions(); 850 language = region = variant = script = ""; 851 852 return this; 853 } 854 855 /** 856 * Constructs a locale from the existing state of the builder. Note that this 857 * method is guaranteed to succeed since field validity checks are performed 858 * at the point of setting them. 859 */ build()860 public Locale build() { 861 // NOTE: We need to make a copy of attributes, keywords and extensions 862 // because the RI allows this builder to reused. 863 return new Locale(language, region, variant, script, 864 attributes, keywords, extensions, 865 true /* has validated fields */); 866 } 867 } 868 869 /** 870 * Returns a locale for a given BCP-47 language tag. This method is more 871 * lenient than {@link Builder#setLanguageTag}. For a given language tag, parsing 872 * will proceed up to the first malformed subtag. All subsequent tags are discarded. 873 * Note that language tags use {@code -} rather than {@code _}, for example {@code en-US}. 874 * 875 * @throws NullPointerException if {@code languageTag} is {@code null}. 876 * 877 * @since 1.7 878 */ forLanguageTag(String languageTag)879 public static Locale forLanguageTag(String languageTag) { 880 if (languageTag == null) { 881 throw new NullPointerException("languageTag == null"); 882 } 883 884 return forLanguageTag(languageTag, false /* strict */); 885 } 886 887 private transient String countryCode; 888 private transient String languageCode; 889 private transient String variantCode; 890 private transient String scriptCode; 891 892 /* Sorted, Unmodifiable */ 893 private transient Set<String> unicodeAttributes; 894 /* Sorted, Unmodifiable */ 895 private transient Map<String, String> unicodeKeywords; 896 /* Sorted, Unmodifiable */ 897 private transient Map<Character, String> extensions; 898 899 /** 900 * Whether this instance was constructed from a builder. We can make 901 * stronger assumptions about the validity of Locale fields if this was 902 * constructed by a builder. 903 */ 904 private transient final boolean hasValidatedFields; 905 906 private transient String cachedToStringResult; 907 private transient String cachedLanguageTag; 908 private transient String cachedIcuLocaleId; 909 910 /** 911 * There's a circular dependency between toLowerCase/toUpperCase and 912 * Locale.US. Work around this by avoiding these methods when constructing 913 * the built-in locales. 914 */ Locale(boolean hasValidatedFields, String lowerCaseLanguageCode, String upperCaseCountryCode)915 private Locale(boolean hasValidatedFields, String lowerCaseLanguageCode, 916 String upperCaseCountryCode) { 917 this.languageCode = lowerCaseLanguageCode; 918 this.countryCode = upperCaseCountryCode; 919 this.variantCode = ""; 920 this.scriptCode = ""; 921 922 this.unicodeAttributes = Collections.EMPTY_SET; 923 this.unicodeKeywords = Collections.EMPTY_MAP; 924 this.extensions = Collections.EMPTY_MAP; 925 926 this.hasValidatedFields = hasValidatedFields; 927 } 928 929 /** 930 * Constructs a new {@code Locale} using the specified language. 931 */ Locale(String language)932 public Locale(String language) { 933 this(language, "", "", "", Collections.EMPTY_SET, Collections.EMPTY_MAP, 934 Collections.EMPTY_MAP, false /* has validated fields */); 935 } 936 937 /** 938 * Constructs a new {@code Locale} using the specified language and country codes. 939 */ Locale(String language, String country)940 public Locale(String language, String country) { 941 this(language, country, "", "", Collections.EMPTY_SET, Collections.EMPTY_MAP, 942 Collections.EMPTY_MAP, false /* has validated fields */); 943 } 944 945 /** 946 * Required by libcore.icu.ICU. 947 * 948 * @hide 949 */ Locale(String language, String country, String variant, String scriptCode, Set<String> unicodeAttributes, Map<String, String> unicodeKeywords, Map<Character, String> extensions, boolean hasValidatedFields)950 public Locale(String language, String country, String variant, String scriptCode, 951 /* nonnull */ Set<String> unicodeAttributes, 952 /* nonnull */ Map<String, String> unicodeKeywords, 953 /* nonnull */ Map<Character, String> extensions, 954 boolean hasValidatedFields) { 955 if (language == null || country == null || variant == null) { 956 throw new NullPointerException("language=" + language + 957 ",country=" + country + 958 ",variant=" + variant); 959 } 960 961 if (hasValidatedFields) { 962 this.languageCode = adjustLanguageCode(language); 963 this.countryCode = country; 964 this.variantCode = variant; 965 } else { 966 if (language.isEmpty() && country.isEmpty()) { 967 languageCode = ""; 968 countryCode = ""; 969 variantCode = variant; 970 } else { 971 languageCode = adjustLanguageCode(language); 972 countryCode = country.toUpperCase(Locale.US); 973 variantCode = variant; 974 } 975 } 976 977 this.scriptCode = scriptCode; 978 979 if (hasValidatedFields) { 980 Set<String> attribsCopy = new TreeSet<String>(unicodeAttributes); 981 Map<String, String> keywordsCopy = new TreeMap<String, String>(unicodeKeywords); 982 Map<Character, String> extensionsCopy = new TreeMap<Character, String>(extensions); 983 984 // We need to transform the list of attributes & keywords set on the 985 // builder to a unicode locale extension. i.e, if we have any keywords 986 // or attributes set, Locale#getExtension('u') should return a well 987 // formed extension. 988 addUnicodeExtensionToExtensionsMap(attribsCopy, keywordsCopy, extensionsCopy); 989 990 this.unicodeAttributes = Collections.unmodifiableSet(attribsCopy); 991 this.unicodeKeywords = Collections.unmodifiableMap(keywordsCopy); 992 this.extensions = Collections.unmodifiableMap(extensionsCopy); 993 } else { 994 995 // The locales ja_JP_JP and th_TH_TH are ill formed since their variant is too 996 // short, however they have been used to represent a locale with the japanese imperial 997 // calendar and thai numbering respectively. We add an extension in their constructor 998 // to modernize them. 999 if ("ja".equals(language) && "JP".equals(country) && "JP".equals(variant)) { 1000 Map<String, String> keywordsCopy = new TreeMap<>(unicodeKeywords); 1001 keywordsCopy.put("ca", "japanese"); 1002 unicodeKeywords = keywordsCopy; 1003 } else if ("th".equals(language) && "TH".equals(country) && "TH".equals(variant)) { 1004 Map<String, String> keywordsCopy = new TreeMap<>(unicodeKeywords); 1005 keywordsCopy.put("nu", "thai"); 1006 unicodeKeywords = keywordsCopy; 1007 } 1008 1009 if (!unicodeKeywords.isEmpty() || !unicodeAttributes.isEmpty()) { 1010 Map<Character, String> extensionsCopy = new TreeMap<>(extensions); 1011 addUnicodeExtensionToExtensionsMap(unicodeAttributes, unicodeKeywords, extensionsCopy); 1012 extensions = extensionsCopy; 1013 } 1014 1015 this.unicodeAttributes = unicodeAttributes; 1016 this.unicodeKeywords = unicodeKeywords; 1017 this.extensions = extensions; 1018 } 1019 1020 this.hasValidatedFields = hasValidatedFields; 1021 } 1022 1023 /** 1024 * Constructs a new {@code Locale} using the specified language, country, 1025 * and variant codes. 1026 */ Locale(String language, String country, String variant)1027 public Locale(String language, String country, String variant) { 1028 this(language, country, variant, "", Collections.EMPTY_SET, 1029 Collections.EMPTY_MAP, Collections.EMPTY_MAP, 1030 false /* has validated fields */); 1031 } 1032 clone()1033 @Override public Object clone() { 1034 try { 1035 return super.clone(); 1036 } catch (CloneNotSupportedException e) { 1037 throw new AssertionError(e); 1038 } 1039 } 1040 1041 /** 1042 * Returns true if {@code object} is a locale with the same language, 1043 * country and variant. 1044 */ equals(Object object)1045 @Override public boolean equals(Object object) { 1046 if (object == this) { 1047 return true; 1048 } 1049 if (object instanceof Locale) { 1050 Locale o = (Locale) object; 1051 return languageCode.equals(o.languageCode) 1052 && countryCode.equals(o.countryCode) 1053 && variantCode.equals(o.variantCode) 1054 && scriptCode.equals(o.scriptCode) 1055 && extensions.equals(o.extensions); 1056 1057 } 1058 return false; 1059 } 1060 1061 /** 1062 * Returns the system's installed locales. This array always includes {@code 1063 * Locale.US}, and usually several others. Most locale-sensitive classes 1064 * offer their own {@code getAvailableLocales} method, which should be 1065 * preferred over this general purpose method. 1066 * 1067 * @see java.text.BreakIterator#getAvailableLocales() 1068 * @see java.text.Collator#getAvailableLocales() 1069 * @see java.text.DateFormat#getAvailableLocales() 1070 * @see java.text.DateFormatSymbols#getAvailableLocales() 1071 * @see java.text.DecimalFormatSymbols#getAvailableLocales() 1072 * @see java.text.NumberFormat#getAvailableLocales() 1073 * @see java.util.Calendar#getAvailableLocales() 1074 */ getAvailableLocales()1075 public static Locale[] getAvailableLocales() { 1076 return ICU.getAvailableLocales(); 1077 } 1078 1079 /** 1080 * Returns the country code for this locale, or {@code ""} if this locale 1081 * doesn't correspond to a specific country. 1082 */ getCountry()1083 public String getCountry() { 1084 return countryCode; 1085 } 1086 1087 /** 1088 * Returns the user's preferred locale. This may have been overridden for 1089 * this process with {@link #setDefault}. 1090 * 1091 * <p>Since the user's locale changes dynamically, avoid caching this value. 1092 * Instead, use this method to look it up for each use. 1093 */ getDefault()1094 public static Locale getDefault() { 1095 return NoImagePreloadHolder.defaultLocale; 1096 } 1097 1098 /** 1099 * Equivalent to {@code getDisplayCountry(Locale.getDefault())}. 1100 */ getDisplayCountry()1101 public final String getDisplayCountry() { 1102 return getDisplayCountry(getDefault()); 1103 } 1104 1105 /** 1106 * Returns the name of this locale's country, localized to {@code locale}. 1107 * Returns the empty string if this locale does not correspond to a specific 1108 * country. 1109 */ getDisplayCountry(Locale locale)1110 public String getDisplayCountry(Locale locale) { 1111 if (countryCode.isEmpty()) { 1112 return ""; 1113 } 1114 1115 final String normalizedRegion = Builder.normalizeAndValidateRegion( 1116 countryCode, false /* strict */); 1117 if (normalizedRegion.isEmpty()) { 1118 return countryCode; 1119 } 1120 1121 String result = ICU.getDisplayCountry(this, locale); 1122 if (result == null) { // TODO: do we need to do this, or does ICU do it for us? 1123 result = ICU.getDisplayCountry(this, Locale.getDefault()); 1124 } 1125 return result; 1126 } 1127 1128 /** 1129 * Equivalent to {@code getDisplayLanguage(Locale.getDefault())}. 1130 */ getDisplayLanguage()1131 public final String getDisplayLanguage() { 1132 return getDisplayLanguage(getDefault()); 1133 } 1134 1135 /** 1136 * Returns the name of this locale's language, localized to {@code locale}. 1137 * If the language name is unknown, the language code is returned. 1138 */ getDisplayLanguage(Locale locale)1139 public String getDisplayLanguage(Locale locale) { 1140 if (languageCode.isEmpty()) { 1141 return ""; 1142 } 1143 1144 // Hacks for backward compatibility. 1145 // 1146 // Our language tag will contain "und" if the languageCode is invalid 1147 // or missing. ICU will then return "langue indéterminée" or the equivalent 1148 // display language for the indeterminate language code. 1149 // 1150 // Sigh... ugh... and what not. 1151 final String normalizedLanguage = Builder.normalizeAndValidateLanguage( 1152 languageCode, false /* strict */); 1153 if (UNDETERMINED_LANGUAGE.equals(normalizedLanguage)) { 1154 return languageCode; 1155 } 1156 1157 // TODO: We need a new hack or a complete fix for http://b/8049507 --- We would 1158 // cover the frameworks' tracks when they were using "tl" instead of "fil". 1159 String result = ICU.getDisplayLanguage(this, locale); 1160 if (result == null) { // TODO: do we need to do this, or does ICU do it for us? 1161 result = ICU.getDisplayLanguage(this, Locale.getDefault()); 1162 } 1163 return result; 1164 } 1165 1166 /** 1167 * Equivalent to {@code getDisplayName(Locale.getDefault())}. 1168 */ getDisplayName()1169 public final String getDisplayName() { 1170 return getDisplayName(getDefault()); 1171 } 1172 1173 /** 1174 * Returns this locale's language name, country name, and variant, localized 1175 * to {@code locale}. The exact output form depends on whether this locale 1176 * corresponds to a specific language, script, country and variant. 1177 * 1178 * <p>For example: 1179 * <ul> 1180 * <li>{@code new Locale("en").getDisplayName(Locale.US)} -> {@code English} 1181 * <li>{@code new Locale("en", "US").getDisplayName(Locale.US)} -> {@code English (United States)} 1182 * <li>{@code new Locale("en", "US", "POSIX").getDisplayName(Locale.US)} -> {@code English (United States,Computer)} 1183 * <li>{@code Locale.fromLanguageTag("zh-Hant-CN").getDisplayName(Locale.US)} -> {@code Chinese (Traditional Han,China)} 1184 * <li>{@code new Locale("en").getDisplayName(Locale.FRANCE)} -> {@code anglais} 1185 * <li>{@code new Locale("en", "US").getDisplayName(Locale.FRANCE)} -> {@code anglais (États-Unis)} 1186 * <li>{@code new Locale("en", "US", "POSIX").getDisplayName(Locale.FRANCE)} -> {@code anglais (États-Unis,informatique)}. 1187 * </ul> 1188 */ getDisplayName(Locale locale)1189 public String getDisplayName(Locale locale) { 1190 int count = 0; 1191 StringBuilder buffer = new StringBuilder(); 1192 if (!languageCode.isEmpty()) { 1193 String displayLanguage = getDisplayLanguage(locale); 1194 buffer.append(displayLanguage.isEmpty() ? languageCode : displayLanguage); 1195 ++count; 1196 } 1197 if (!scriptCode.isEmpty()) { 1198 if (count == 1) { 1199 buffer.append(" ("); 1200 } 1201 String displayScript = getDisplayScript(locale); 1202 buffer.append(displayScript.isEmpty() ? scriptCode : displayScript); 1203 ++count; 1204 } 1205 if (!countryCode.isEmpty()) { 1206 if (count == 1) { 1207 buffer.append(" ("); 1208 } else if (count == 2) { 1209 buffer.append(","); 1210 } 1211 String displayCountry = getDisplayCountry(locale); 1212 buffer.append(displayCountry.isEmpty() ? countryCode : displayCountry); 1213 ++count; 1214 } 1215 if (!variantCode.isEmpty()) { 1216 if (count == 1) { 1217 buffer.append(" ("); 1218 } else if (count == 2 || count == 3) { 1219 buffer.append(","); 1220 } 1221 String displayVariant = getDisplayVariant(locale); 1222 buffer.append(displayVariant.isEmpty() ? variantCode : displayVariant); 1223 ++count; 1224 } 1225 if (count > 1) { 1226 buffer.append(")"); 1227 } 1228 return buffer.toString(); 1229 } 1230 1231 /** 1232 * Returns the full variant name in the default {@code Locale} for the variant code of 1233 * this {@code Locale}. If there is no matching variant name, the variant code is 1234 * returned. 1235 * 1236 * @since 1.7 1237 */ getDisplayVariant()1238 public final String getDisplayVariant() { 1239 return getDisplayVariant(getDefault()); 1240 } 1241 1242 /** 1243 * Returns the full variant name in the specified {@code Locale} for the variant code 1244 * of this {@code Locale}. If there is no matching variant name, the variant code is 1245 * returned. 1246 * 1247 * @since 1.7 1248 */ getDisplayVariant(Locale locale)1249 public String getDisplayVariant(Locale locale) { 1250 if (variantCode.isEmpty()) { 1251 return ""; 1252 } 1253 1254 try { 1255 Builder.normalizeAndValidateVariant(variantCode); 1256 } catch (IllformedLocaleException ilfe) { 1257 return variantCode; 1258 } 1259 1260 String result = ICU.getDisplayVariant(this, locale); 1261 if (result == null) { // TODO: do we need to do this, or does ICU do it for us? 1262 result = ICU.getDisplayVariant(this, Locale.getDefault()); 1263 } 1264 1265 // The "old style" locale constructors allow us to pass in variants that aren't 1266 // valid BCP-47 variant subtags. When that happens, toLanguageTag will not emit 1267 // them. Note that we know variantCode.length() > 0 due to the isEmpty check at 1268 // the beginning of this function. 1269 if (result.isEmpty()) { 1270 return variantCode; 1271 } 1272 return result; 1273 } 1274 1275 /** 1276 * Returns the three-letter ISO 3166 country code which corresponds to the country 1277 * code for this {@code Locale}. 1278 * @throws MissingResourceException if there's no 3-letter country code for this locale. 1279 */ getISO3Country()1280 public String getISO3Country() { 1281 // The results of getISO3Country do not depend on the languageCode, 1282 // so we pass an arbitrarily selected language code here. This guards 1283 // against errors caused by malformed or invalid language codes. 1284 String code = ICU.getISO3Country("en-" + countryCode); 1285 if (!countryCode.isEmpty() && code.isEmpty()) { 1286 throw new MissingResourceException("No 3-letter country code for locale: " + this, "FormatData_" + this, "ShortCountry"); 1287 } 1288 return code; 1289 } 1290 1291 /** 1292 * Returns the three-letter ISO 639-2/T language code which corresponds to the language 1293 * code for this {@code Locale}. 1294 * @throws MissingResourceException if there's no 3-letter language code for this locale. 1295 */ getISO3Language()1296 public String getISO3Language() { 1297 // For backward compatibility, we must return "" for an empty language 1298 // code and not "und" which is the accurate ISO-639-3 code for an 1299 // undetermined language. 1300 if (languageCode.isEmpty()) { 1301 return ""; 1302 } 1303 1304 // The results of getISO3Language do not depend on the country code 1305 // or any of the other locale fields, so we pass just the language here. 1306 String code = ICU.getISO3Language(languageCode); 1307 if (!languageCode.isEmpty() && code.isEmpty()) { 1308 throw new MissingResourceException("No 3-letter language code for locale: " + this, "FormatData_" + this, "ShortLanguage"); 1309 } 1310 return code; 1311 } 1312 1313 /** 1314 * Returns an array of strings containing all the two-letter ISO 3166 country codes that can be 1315 * used as the country code when constructing a {@code Locale}. 1316 */ getISOCountries()1317 public static String[] getISOCountries() { 1318 return ICU.getISOCountries(); 1319 } 1320 1321 /** 1322 * Returns an array of strings containing all the two-letter ISO 639-1 language codes that can be 1323 * used as the language code when constructing a {@code Locale}. 1324 */ getISOLanguages()1325 public static String[] getISOLanguages() { 1326 return ICU.getISOLanguages(); 1327 } 1328 1329 /** 1330 * Returns the language code for this {@code Locale} or the empty string if no language 1331 * was set. 1332 */ getLanguage()1333 public String getLanguage() { 1334 return languageCode; 1335 } 1336 1337 /** 1338 * Returns the variant code for this {@code Locale} or an empty {@code String} if no variant 1339 * was set. 1340 */ getVariant()1341 public String getVariant() { 1342 return variantCode; 1343 } 1344 1345 /** 1346 * Returns the script code for this {@code Locale} or an empty {@code String} if no script 1347 * was set. 1348 * 1349 * If set, the script code will be a title cased string of length 4, as per the ISO 15924 1350 * specification. 1351 * 1352 * @since 1.7 1353 */ getScript()1354 public String getScript() { 1355 return scriptCode; 1356 } 1357 1358 /** 1359 * Equivalent to {@code getDisplayScript(Locale.getDefault()))} 1360 * 1361 * @since 1.7 1362 */ getDisplayScript()1363 public String getDisplayScript() { 1364 return getDisplayScript(getDefault()); 1365 } 1366 1367 /** 1368 * Returns the name of this locale's script code, localized to {@link Locale}. If the 1369 * script code is unknown, the return value of this method is the same as that of 1370 * {@link #getScript()}. 1371 * 1372 * @since 1.7 1373 */ getDisplayScript(Locale locale)1374 public String getDisplayScript(Locale locale) { 1375 if (scriptCode.isEmpty()) { 1376 return ""; 1377 } 1378 1379 String result = ICU.getDisplayScript(this, locale); 1380 if (result == null) { // TODO: do we need to do this, or does ICU do it for us? 1381 result = ICU.getDisplayScript(this, Locale.getDefault()); 1382 } 1383 1384 return result; 1385 1386 } 1387 1388 /** 1389 * Returns a well formed BCP-47 language tag that identifies this locale. 1390 * 1391 * Note that this locale itself might consist of ill formed fields, since the 1392 * public {@code Locale} constructors do not perform validity checks to maintain 1393 * backwards compatibility. When this is the case, this method will either replace 1394 * ill formed fields with standard BCP-47 subtags (For eg. "und" (undetermined) 1395 * for invalid languages) or omit them altogether. 1396 * 1397 * Additionally, ill formed variants will result in the remainder of the tag 1398 * (both variants and extensions) being moved to the private use extension, 1399 * where they will appear after a subtag whose value is {@code "lvariant"}. 1400 * 1401 * It's also important to note that the BCP-47 tag is well formed in the sense 1402 * that it is unambiguously parseable into its specified components. We do not 1403 * require that any of the components are registered with the applicable registries. 1404 * For example, we do not require scripts to be a registered ISO 15924 scripts or 1405 * languages to appear in the ISO-639-2 code list. 1406 * 1407 * @since 1.7 1408 */ toLanguageTag()1409 public String toLanguageTag() { 1410 if (cachedLanguageTag == null) { 1411 cachedLanguageTag = makeLanguageTag(); 1412 } 1413 1414 return cachedLanguageTag; 1415 } 1416 1417 /** 1418 * Constructs a valid BCP-47 language tag from locale fields. Additional validation 1419 * is required when this Locale was not constructed using a Builder and variants 1420 * set this way are treated specially. 1421 * 1422 * In both cases, we convert empty language tags to "und", omit invalid country tags 1423 * and perform a special case conversion of "no-NO-NY" to "nn-NO". 1424 */ makeLanguageTag()1425 private String makeLanguageTag() { 1426 // We only need to revalidate the language, country and variant because 1427 // the rest of the fields can only be set via the builder which validates 1428 // them anyway. 1429 String language = ""; 1430 String region = ""; 1431 String variant = ""; 1432 String illFormedVariantSubtags = ""; 1433 1434 if (hasValidatedFields) { 1435 language = languageCode; 1436 region = countryCode; 1437 // Note that we are required to normalize hyphens to underscores 1438 // in the builder, but we must use hyphens in the BCP-47 language tag. 1439 variant = variantCode.replace('_', '-'); 1440 } else { 1441 language = Builder.normalizeAndValidateLanguage(languageCode, false /* strict */); 1442 region = Builder.normalizeAndValidateRegion(countryCode, false /* strict */); 1443 1444 try { 1445 variant = Builder.normalizeAndValidateVariant(variantCode); 1446 } catch (IllformedLocaleException ilfe) { 1447 // If our variant is ill formed, we must attempt to split it into 1448 // its constituent subtags and preserve the well formed bits and 1449 // move the rest to the private use extension (if they're well 1450 // formed extension subtags). 1451 String split[] = splitIllformedVariant(variantCode); 1452 1453 variant = split[0]; 1454 illFormedVariantSubtags = split[1]; 1455 } 1456 } 1457 1458 if (language.isEmpty()) { 1459 language = UNDETERMINED_LANGUAGE; 1460 } 1461 1462 if ("no".equals(language) && "NO".equals(region) && "NY".equals(variant)) { 1463 language = "nn"; 1464 region = "NO"; 1465 variant = ""; 1466 } 1467 1468 final StringBuilder sb = new StringBuilder(16); 1469 sb.append(language); 1470 1471 if (!scriptCode.isEmpty()) { 1472 sb.append('-'); 1473 sb.append(scriptCode); 1474 } 1475 1476 if (!region.isEmpty()) { 1477 sb.append('-'); 1478 sb.append(region); 1479 } 1480 1481 if (!variant.isEmpty()) { 1482 sb.append('-'); 1483 sb.append(variant); 1484 } 1485 1486 // Extensions (optional, omitted if empty). Note that we don't 1487 // emit the private use extension here, but add it in the end. 1488 for (Map.Entry<Character, String> extension : extensions.entrySet()) { 1489 if (!extension.getKey().equals('x')) { 1490 sb.append('-').append(extension.getKey()); 1491 sb.append('-').append(extension.getValue()); 1492 } 1493 } 1494 1495 // The private use extension comes right at the very end. 1496 final String privateUse = extensions.get('x'); 1497 if (privateUse != null) { 1498 sb.append("-x-"); 1499 sb.append(privateUse); 1500 } 1501 1502 // If we have any ill-formed variant subtags, we append them to the 1503 // private use extension (or add a private use extension if one doesn't 1504 // exist). 1505 if (!illFormedVariantSubtags.isEmpty()) { 1506 if (privateUse == null) { 1507 sb.append("-x-lvariant-"); 1508 } else { 1509 sb.append('-'); 1510 } 1511 sb.append(illFormedVariantSubtags); 1512 } 1513 1514 return sb.toString(); 1515 } 1516 1517 /** 1518 * Splits ill formed variants into a set of valid variant subtags (which 1519 * can be used directly in language tag construction) and a set of invalid 1520 * variant subtags (which can be appended to the private use extension), 1521 * provided that each subtag is a valid private use extension subtag. 1522 * 1523 * This method returns a two element String array. The first element is a string 1524 * containing the concatenation of valid variant subtags which can be appended 1525 * to a BCP-47 tag directly and the second containing the concatenation of 1526 * invalid variant subtags which can be appended to the private use extension 1527 * directly. 1528 * 1529 * This method assumes that {@code variant} contains at least one ill formed 1530 * variant subtag. 1531 */ splitIllformedVariant(String variant)1532 private static String[] splitIllformedVariant(String variant) { 1533 final String normalizedVariant = variant.replace('_', '-'); 1534 final String[] subTags = normalizedVariant.split("-"); 1535 1536 final String[] split = new String[] { "", "" }; 1537 1538 // First go through the list of variant subtags and check if they're 1539 // valid private use extension subtags. If they're not, we will omit 1540 // the first such subtag and all subtags after. 1541 // 1542 // NOTE: |firstInvalidSubtag| is the index of the first variant 1543 // subtag we decide to omit altogether, whereas |firstIllformedSubtag| is the 1544 // index of the first subtag we decide to append to the private use extension. 1545 // 1546 // In other words: 1547 // [0, firstIllformedSubtag) => expressed as variant subtags. 1548 // [firstIllformedSubtag, firstInvalidSubtag) => expressed as private use 1549 // extension subtags. 1550 // [firstInvalidSubtag, subTags.length) => omitted. 1551 int firstInvalidSubtag = subTags.length; 1552 for (int i = 0; i < subTags.length; ++i) { 1553 if (!isValidBcp47Alphanum(subTags[i], 1, 8)) { 1554 firstInvalidSubtag = i; 1555 break; 1556 } 1557 } 1558 1559 if (firstInvalidSubtag == 0) { 1560 return split; 1561 } 1562 1563 // We now consider each subtag that could potentially be appended to 1564 // the private use extension and check if it's valid. 1565 int firstIllformedSubtag = firstInvalidSubtag; 1566 for (int i = 0; i < firstInvalidSubtag; ++i) { 1567 final String subTag = subTags[i]; 1568 // The BCP-47 spec states that : 1569 // - Subtags can be between [5, 8] alphanumeric chars in length. 1570 // - Subtags that start with a number are allowed to be 4 chars in length. 1571 if (subTag.length() >= 5 && subTag.length() <= 8) { 1572 if (!isAsciiAlphaNum(subTag)) { 1573 firstIllformedSubtag = i; 1574 } 1575 } else if (subTag.length() == 4) { 1576 final char firstChar = subTag.charAt(0); 1577 if (!(firstChar >= '0' && firstChar <= '9') || !isAsciiAlphaNum(subTag)) { 1578 firstIllformedSubtag = i; 1579 } 1580 } else { 1581 firstIllformedSubtag = i; 1582 } 1583 } 1584 1585 split[0] = concatenateRange(subTags, 0, firstIllformedSubtag); 1586 split[1] = concatenateRange(subTags, firstIllformedSubtag, firstInvalidSubtag); 1587 1588 return split; 1589 } 1590 1591 /** 1592 * Builds a string by concatenating array elements within the range [start, end). 1593 * The supplied range is assumed to be valid and no checks are performed. 1594 */ concatenateRange(String[] array, int start, int end)1595 private static String concatenateRange(String[] array, int start, int end) { 1596 StringBuilder builder = new StringBuilder(32); 1597 for (int i = start; i < end; ++i) { 1598 if (i != start) { 1599 builder.append('-'); 1600 } 1601 builder.append(array[i]); 1602 } 1603 1604 return builder.toString(); 1605 } 1606 1607 /** 1608 * Returns the set of BCP-47 extensions this locale contains. 1609 * 1610 * See <a href="https://tools.ietf.org/html/bcp47#section-2.1"> 1611 * the IETF BCP-47 specification</a> (Section 2.2.6) for details. 1612 * 1613 * @since 1.7 1614 */ getExtensionKeys()1615 public Set<Character> getExtensionKeys() { 1616 return extensions.keySet(); 1617 } 1618 1619 /** 1620 * Returns the BCP-47 extension whose key is {@code extensionKey}, or {@code null} 1621 * if this locale does not contain the extension. 1622 * 1623 * Individual Keywords and attributes for the unicode 1624 * locale extension can be fetched using {@link #getUnicodeLocaleAttributes()}, 1625 * {@link #getUnicodeLocaleKeys()} and {@link #getUnicodeLocaleType}. 1626 * 1627 * @since 1.7 1628 */ getExtension(char extensionKey)1629 public String getExtension(char extensionKey) { 1630 return extensions.get(extensionKey); 1631 } 1632 1633 /** 1634 * Returns the {@code type} for the specified unicode locale extension {@code key}. 1635 * 1636 * For more information about types and keywords, see {@link Builder#setUnicodeLocaleKeyword} 1637 * and <a href="http://www.unicode.org/reports/tr35/#BCP47">Unicode Technical Standard #35</a> 1638 * 1639 * @since 1.7 1640 */ getUnicodeLocaleType(String keyWord)1641 public String getUnicodeLocaleType(String keyWord) { 1642 return unicodeKeywords.get(keyWord); 1643 } 1644 1645 /** 1646 * Returns the set of unicode locale extension attributes this locale contains. 1647 * 1648 * For more information about attributes, see {@link Builder#addUnicodeLocaleAttribute} 1649 * and <a href="http://www.unicode.org/reports/tr35/#BCP47">Unicode Technical Standard #35</a> 1650 * 1651 * @since 1.7 1652 */ getUnicodeLocaleAttributes()1653 public Set<String> getUnicodeLocaleAttributes() { 1654 return unicodeAttributes; 1655 } 1656 1657 /** 1658 * Returns the set of unicode locale extension keywords this locale contains. 1659 * 1660 * For more information about types and keywords, see {@link Builder#setUnicodeLocaleKeyword} 1661 * and <a href="http://www.unicode.org/reports/tr35/#BCP47">Unicode Technical Standard #35</a> 1662 * 1663 * @since 1.7 1664 */ getUnicodeLocaleKeys()1665 public Set<String> getUnicodeLocaleKeys() { 1666 return unicodeKeywords.keySet(); 1667 } 1668 1669 @Override hashCode()1670 public synchronized int hashCode() { 1671 return countryCode.hashCode() 1672 + languageCode.hashCode() + variantCode.hashCode() 1673 + scriptCode.hashCode() + extensions.hashCode(); 1674 } 1675 1676 /** 1677 * Overrides the default locale. This does not affect system configuration, 1678 * and attempts to override the system-provided default locale may 1679 * themselves be overridden by actual changes to the system configuration. 1680 * Code that calls this method is usually incorrect, and should be fixed by 1681 * passing the appropriate locale to each locale-sensitive method that's 1682 * called. 1683 */ setDefault(Locale locale)1684 public synchronized static void setDefault(Locale locale) { 1685 if (locale == null) { 1686 throw new NullPointerException("locale == null"); 1687 } 1688 String languageTag = locale.toLanguageTag(); 1689 NoImagePreloadHolder.defaultLocale = locale; 1690 ICU.setDefaultLocale(languageTag); 1691 } 1692 1693 /** 1694 * Returns the string representation of this {@code Locale}. It consists of the 1695 * language code, country code and variant separated by underscores. 1696 * If the language is missing the string begins 1697 * with an underscore. If the country is missing there are 2 underscores 1698 * between the language and the variant. The variant cannot stand alone 1699 * without a language and/or country code: in this case this method would 1700 * return the empty string. 1701 * 1702 * <p>Examples: "en", "en_US", "_US", "en__POSIX", "en_US_POSIX" 1703 */ 1704 @Override toString()1705 public final String toString() { 1706 String result = cachedToStringResult; 1707 if (result == null) { 1708 result = cachedToStringResult = toNewString(languageCode, countryCode, variantCode, 1709 scriptCode, extensions); 1710 } 1711 return result; 1712 } 1713 toNewString(String languageCode, String countryCode, String variantCode, String scriptCode, Map<Character, String> extensions)1714 private static String toNewString(String languageCode, String countryCode, 1715 String variantCode, String scriptCode, Map<Character, String> extensions) { 1716 // The string form of a locale that only has a variant is the empty string. 1717 if (languageCode.length() == 0 && countryCode.length() == 0) { 1718 return ""; 1719 } 1720 1721 // Otherwise, the output format is "ll_cc_variant", where language and country are always 1722 // two letters, but the variant is an arbitrary length. A size of 11 characters has room 1723 // for "en_US_POSIX", the largest "common" value. (In practice, the string form is almost 1724 // always 5 characters: "ll_cc".) 1725 StringBuilder result = new StringBuilder(11); 1726 result.append(languageCode); 1727 1728 final boolean hasScriptOrExtensions = !scriptCode.isEmpty() || !extensions.isEmpty(); 1729 1730 if (!countryCode.isEmpty() || !variantCode.isEmpty() || hasScriptOrExtensions) { 1731 result.append('_'); 1732 } 1733 result.append(countryCode); 1734 if (!variantCode.isEmpty() || hasScriptOrExtensions) { 1735 result.append('_'); 1736 } 1737 result.append(variantCode); 1738 1739 if (hasScriptOrExtensions) { 1740 if (!variantCode.isEmpty()) { 1741 result.append('_'); 1742 } 1743 1744 // Note that this is notably different from the BCP-47 spec (for 1745 // backwards compatibility). We are forced to append a "#" before the script tag. 1746 // and also put the script code right at the end. 1747 result.append("#"); 1748 if (!scriptCode.isEmpty() ) { 1749 result.append(scriptCode); 1750 } 1751 1752 // Note the use of "-" instead of "_" before the extensions. 1753 if (!extensions.isEmpty()) { 1754 if (!scriptCode.isEmpty()) { 1755 result.append('-'); 1756 } 1757 result.append(serializeExtensions(extensions)); 1758 } 1759 } 1760 1761 return result.toString(); 1762 } 1763 1764 private static final ObjectStreamField[] serialPersistentFields = { 1765 new ObjectStreamField("country", String.class), 1766 new ObjectStreamField("hashcode", int.class), 1767 new ObjectStreamField("language", String.class), 1768 new ObjectStreamField("variant", String.class), 1769 new ObjectStreamField("script", String.class), 1770 new ObjectStreamField("extensions", String.class), 1771 }; 1772 writeObject(ObjectOutputStream stream)1773 private void writeObject(ObjectOutputStream stream) throws IOException { 1774 ObjectOutputStream.PutField fields = stream.putFields(); 1775 fields.put("country", countryCode); 1776 fields.put("hashcode", -1); 1777 fields.put("language", languageCode); 1778 fields.put("variant", variantCode); 1779 fields.put("script", scriptCode); 1780 1781 if (!extensions.isEmpty()) { 1782 fields.put("extensions", serializeExtensions(extensions)); 1783 } 1784 1785 stream.writeFields(); 1786 } 1787 readObject(ObjectInputStream stream)1788 private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException { 1789 ObjectInputStream.GetField fields = stream.readFields(); 1790 countryCode = (String) fields.get("country", ""); 1791 languageCode = (String) fields.get("language", ""); 1792 variantCode = (String) fields.get("variant", ""); 1793 scriptCode = (String) fields.get("script", ""); 1794 1795 this.unicodeKeywords = Collections.EMPTY_MAP; 1796 this.unicodeAttributes = Collections.EMPTY_SET; 1797 this.extensions = Collections.EMPTY_MAP; 1798 1799 String extensions = (String) fields.get("extensions", null); 1800 if (extensions != null) { 1801 readExtensions(extensions); 1802 } 1803 } 1804 readExtensions(String extensions)1805 private void readExtensions(String extensions) { 1806 Map<Character, String> extensionsMap = new TreeMap<Character, String>(); 1807 parseSerializedExtensions(extensions, extensionsMap); 1808 this.extensions = Collections.unmodifiableMap(extensionsMap); 1809 1810 if (extensionsMap.containsKey(UNICODE_LOCALE_EXTENSION)) { 1811 String unicodeExtension = extensionsMap.get(UNICODE_LOCALE_EXTENSION); 1812 String[] subTags = unicodeExtension.split("-"); 1813 1814 Map<String, String> unicodeKeywords = new TreeMap<String, String>(); 1815 Set<String> unicodeAttributes = new TreeSet<String>(); 1816 parseUnicodeExtension(subTags, unicodeKeywords, unicodeAttributes); 1817 1818 this.unicodeKeywords = Collections.unmodifiableMap(unicodeKeywords); 1819 this.unicodeAttributes = Collections.unmodifiableSet(unicodeAttributes); 1820 } 1821 } 1822 1823 /** 1824 * The serialized form for extensions is straightforward. It's simply 1825 * of the form key1-value1-key2-value2 where each value might in turn contain 1826 * multiple subtags separated by hyphens. Each key is guaranteed to be a single 1827 * character in length. 1828 * 1829 * This method assumes that {@code extensionsMap} is non-empty. 1830 * 1831 * Visible for testing. 1832 * 1833 * @hide 1834 */ serializeExtensions(Map<Character, String> extensionsMap)1835 public static String serializeExtensions(Map<Character, String> extensionsMap) { 1836 Iterator<Map.Entry<Character, String>> entryIterator = extensionsMap.entrySet().iterator(); 1837 StringBuilder sb = new StringBuilder(64); 1838 1839 while (true) { 1840 final Map.Entry<Character, String> entry = entryIterator.next(); 1841 sb.append(entry.getKey()); 1842 sb.append('-'); 1843 sb.append(entry.getValue()); 1844 1845 if (entryIterator.hasNext()) { 1846 sb.append('-'); 1847 } else { 1848 break; 1849 } 1850 } 1851 1852 return sb.toString(); 1853 } 1854 1855 /** 1856 * Visible for testing. 1857 * 1858 * @hide 1859 */ parseSerializedExtensions(String extString, Map<Character, String> outputMap)1860 public static void parseSerializedExtensions(String extString, Map<Character, String> outputMap) { 1861 // This probably isn't the most efficient approach, but it's the 1862 // most straightforward to code. 1863 // 1864 // Start by splitting the string on "-". We will then keep track of 1865 // where each of the extension keys (single characters) appear in the 1866 // original string and then use those indices to construct substrings 1867 // representing the values. 1868 final String[] subTags = extString.split("-"); 1869 final int[] typeStartIndices = new int[subTags.length / 2]; 1870 1871 int length = 0; 1872 int count = 0; 1873 for (String subTag : subTags) { 1874 if (subTag.length() > 0) { 1875 // Account for the length of the "-" at the end of each subtag. 1876 length += (subTag.length() + 1); 1877 } 1878 1879 if (subTag.length() == 1) { 1880 typeStartIndices[count++] = length; 1881 } 1882 } 1883 1884 for (int i = 0; i < count; ++i) { 1885 final int valueStart = typeStartIndices[i]; 1886 // Since the start Index points to the beginning of the next type 1887 // ....prev-k-next..... 1888 // |_ here 1889 // (idx - 2) is the index of the next key 1890 // (idx - 3) is the (non inclusive) end of the previous type. 1891 final int valueEnd = (i == (count - 1)) ? 1892 extString.length() : (typeStartIndices[i + 1] - 3); 1893 1894 outputMap.put(extString.charAt(typeStartIndices[i] - 2), 1895 extString.substring(valueStart, valueEnd)); 1896 } 1897 } 1898 1899 1900 /** 1901 * A UN M.49 is a 3 digit numeric code. 1902 */ isUnM49AreaCode(String code)1903 private static boolean isUnM49AreaCode(String code) { 1904 if (code.length() != 3) { 1905 return false; 1906 } 1907 1908 for (int i = 0; i < 3; ++i) { 1909 final char character = code.charAt(i); 1910 if (!(character >= '0' && character <= '9')) { 1911 return false; 1912 } 1913 } 1914 1915 return true; 1916 } 1917 1918 /* 1919 * Checks whether a given string is an ASCII alphanumeric string. 1920 */ isAsciiAlphaNum(String string)1921 private static boolean isAsciiAlphaNum(String string) { 1922 for (int i = 0; i < string.length(); i++) { 1923 final char character = string.charAt(i); 1924 if (!(character >= 'a' && character <= 'z' || 1925 character >= 'A' && character <= 'Z' || 1926 character >= '0' && character <= '9')) { 1927 return false; 1928 } 1929 } 1930 1931 return true; 1932 } 1933 isValidBcp47Alpha(String string, int lowerBound, int upperBound)1934 private static boolean isValidBcp47Alpha(String string, int lowerBound, int upperBound) { 1935 final int length = string.length(); 1936 if (length < lowerBound || length > upperBound) { 1937 return false; 1938 } 1939 1940 for (int i = 0; i < length; ++i) { 1941 final char character = string.charAt(i); 1942 if (!(character >= 'a' && character <= 'z' || 1943 character >= 'A' && character <= 'Z')) { 1944 return false; 1945 } 1946 } 1947 1948 return true; 1949 } 1950 isValidBcp47Alphanum(String attributeOrType, int lowerBound, int upperBound)1951 private static boolean isValidBcp47Alphanum(String attributeOrType, 1952 int lowerBound, int upperBound) { 1953 if (attributeOrType.length() < lowerBound || attributeOrType.length() > upperBound) { 1954 return false; 1955 } 1956 1957 return isAsciiAlphaNum(attributeOrType); 1958 } 1959 titleCaseAsciiWord(String word)1960 private static String titleCaseAsciiWord(String word) { 1961 try { 1962 byte[] chars = word.toLowerCase(Locale.ROOT).getBytes(StandardCharsets.US_ASCII); 1963 chars[0] = (byte) ((int) chars[0] + 'A' - 'a'); 1964 return new String(chars, StandardCharsets.US_ASCII); 1965 } catch (UnsupportedOperationException uoe) { 1966 throw new AssertionError(uoe); 1967 } 1968 } 1969 1970 /** 1971 * A type list must contain one or more alphanumeric subtags whose lengths 1972 * are between 3 and 8. 1973 */ isValidTypeList(String lowerCaseTypeList)1974 private static boolean isValidTypeList(String lowerCaseTypeList) { 1975 final String[] splitList = lowerCaseTypeList.split("-"); 1976 for (String type : splitList) { 1977 if (!isValidBcp47Alphanum(type, 3, 8)) { 1978 return false; 1979 } 1980 } 1981 1982 return true; 1983 } 1984 addUnicodeExtensionToExtensionsMap( Set<String> attributes, Map<String, String> keywords, Map<Character, String> extensions)1985 private static void addUnicodeExtensionToExtensionsMap( 1986 Set<String> attributes, Map<String, String> keywords, 1987 Map<Character, String> extensions) { 1988 if (attributes.isEmpty() && keywords.isEmpty()) { 1989 return; 1990 } 1991 1992 // Assume that the common case is a low number of keywords & attributes 1993 // (usually one or two). 1994 final StringBuilder sb = new StringBuilder(32); 1995 1996 // All attributes must appear before keywords, in lexical order. 1997 if (!attributes.isEmpty()) { 1998 Iterator<String> attributesIterator = attributes.iterator(); 1999 while (true) { 2000 sb.append(attributesIterator.next()); 2001 if (attributesIterator.hasNext()) { 2002 sb.append('-'); 2003 } else { 2004 break; 2005 } 2006 } 2007 } 2008 2009 if (!keywords.isEmpty()) { 2010 if (!attributes.isEmpty()) { 2011 sb.append('-'); 2012 } 2013 2014 Iterator<Map.Entry<String, String>> keywordsIterator = keywords.entrySet().iterator(); 2015 while (true) { 2016 final Map.Entry<String, String> keyWord = keywordsIterator.next(); 2017 sb.append(keyWord.getKey()); 2018 if (!keyWord.getValue().isEmpty()) { 2019 sb.append('-'); 2020 sb.append(keyWord.getValue()); 2021 } 2022 if (keywordsIterator.hasNext()) { 2023 sb.append('-'); 2024 } else { 2025 break; 2026 } 2027 } 2028 } 2029 2030 extensions.put(UNICODE_LOCALE_EXTENSION, sb.toString()); 2031 } 2032 2033 /** 2034 * This extension is described by http://www.unicode.org/reports/tr35/#RFC5234 2035 * unicode_locale_extensions = sep "u" (1*(sep keyword) / 1*(sep attribute) *(sep keyword)). 2036 * 2037 * It must contain at least one keyword or attribute and attributes (if any) 2038 * must appear before keywords. Attributes can't appear after keywords because 2039 * they will be indistinguishable from a subtag of the keyword type. 2040 * 2041 * Visible for testing. 2042 * 2043 * @hide 2044 */ parseUnicodeExtension(String[] subtags, Map<String, String> keywords, Set<String> attributes)2045 public static void parseUnicodeExtension(String[] subtags, 2046 Map<String, String> keywords, Set<String> attributes) { 2047 String lastKeyword = null; 2048 List<String> subtagsForKeyword = new ArrayList<String>(); 2049 for (String subtag : subtags) { 2050 if (subtag.length() == 2) { 2051 if (subtagsForKeyword.size() > 0) { 2052 keywords.put(lastKeyword, joinBcp47Subtags(subtagsForKeyword)); 2053 subtagsForKeyword.clear(); 2054 } 2055 2056 lastKeyword = subtag; 2057 } else if (subtag.length() > 2) { 2058 if (lastKeyword == null) { 2059 attributes.add(subtag); 2060 } else { 2061 subtagsForKeyword.add(subtag); 2062 } 2063 } 2064 } 2065 2066 if (subtagsForKeyword.size() > 0) { 2067 keywords.put(lastKeyword, joinBcp47Subtags(subtagsForKeyword)); 2068 } else if (lastKeyword != null) { 2069 keywords.put(lastKeyword, ""); 2070 } 2071 } 2072 2073 /** 2074 * Joins a list of subtags into a BCP-47 tag using the standard separator 2075 * ("-"). 2076 */ joinBcp47Subtags(List<String> strings)2077 private static String joinBcp47Subtags(List<String> strings) { 2078 final int size = strings.size(); 2079 2080 StringBuilder sb = new StringBuilder(strings.get(0).length()); 2081 for (int i = 0; i < size; ++i) { 2082 sb.append(strings.get(i)); 2083 if (i != size - 1) { 2084 sb.append('-'); 2085 } 2086 } 2087 2088 return sb.toString(); 2089 } 2090 2091 /** 2092 * @hide for internal use only. 2093 */ adjustLanguageCode(String languageCode)2094 public static String adjustLanguageCode(String languageCode) { 2095 String adjusted = languageCode.toLowerCase(Locale.US); 2096 // Map new language codes to the obsolete language 2097 // codes so the correct resource bundles will be used. 2098 if (languageCode.equals("he")) { 2099 adjusted = "iw"; 2100 } else if (languageCode.equals("id")) { 2101 adjusted = "in"; 2102 } else if (languageCode.equals("yi")) { 2103 adjusted = "ji"; 2104 } 2105 2106 return adjusted; 2107 } 2108 convertGrandfatheredTag(String original)2109 private static String convertGrandfatheredTag(String original) { 2110 final String converted = GRANDFATHERED_LOCALES.get(original); 2111 return converted != null ? converted : original; 2112 } 2113 2114 /** 2115 * Scans elements of {@code subtags} in the range {@code [startIndex, endIndex)} 2116 * and appends valid variant subtags upto the first invalid subtag (if any) to 2117 * {@code normalizedVariants}. 2118 */ extractVariantSubtags(String[] subtags, int startIndex, int endIndex, List<String> normalizedVariants)2119 private static void extractVariantSubtags(String[] subtags, int startIndex, int endIndex, 2120 List<String> normalizedVariants) { 2121 for (int i = startIndex; i < endIndex; i++) { 2122 final String subtag = subtags[i]; 2123 2124 if (Builder.isValidVariantSubtag(subtag)) { 2125 normalizedVariants.add(subtag); 2126 } else { 2127 break; 2128 } 2129 } 2130 } 2131 2132 /** 2133 * Scans elements of {@code subtags} in the range {@code [startIndex, endIndex)} 2134 * and inserts valid extensions into {@code extensions}. The scan is aborted 2135 * when an invalid extension is encountered. Returns the index of the first 2136 * unparsable element of {@code subtags}. 2137 */ extractExtensions(String[] subtags, int startIndex, int endIndex, Map<Character, String> extensions)2138 private static int extractExtensions(String[] subtags, int startIndex, int endIndex, 2139 Map<Character, String> extensions) { 2140 int privateUseExtensionIndex = -1; 2141 int extensionKeyIndex = -1; 2142 2143 int i = startIndex; 2144 for (; i < endIndex; i++) { 2145 final String subtag = subtags[i]; 2146 2147 final boolean parsingPrivateUse = (privateUseExtensionIndex != -1) && 2148 (extensionKeyIndex == privateUseExtensionIndex); 2149 2150 // Note that private use extensions allow subtags of length 1. 2151 // Private use extensions *must* come last, so there's no ambiguity 2152 // in that case. 2153 if (subtag.length() == 1 && !parsingPrivateUse) { 2154 // Emit the last extension we encountered if any. First check 2155 // whether we encountered two keys in a row (which is an error). 2156 // Also checks if we already have an extension with the same key, 2157 // which is again an error. 2158 if (extensionKeyIndex != -1) { 2159 if ((i - 1) == extensionKeyIndex) { 2160 return extensionKeyIndex; 2161 } 2162 2163 final String key = subtags[extensionKeyIndex].toLowerCase(Locale.ROOT); 2164 if (extensions.containsKey(key.charAt(0))) { 2165 return extensionKeyIndex; 2166 } 2167 2168 final String value = concatenateRange(subtags, extensionKeyIndex + 1, i); 2169 extensions.put(key.charAt(0), value.toLowerCase(Locale.ROOT)); 2170 } 2171 2172 // Mark the start of the next extension. Also keep track of whether this 2173 // is a private use extension, and throw an error if it doesn't come last. 2174 extensionKeyIndex = i; 2175 if ("x".equals(subtag.toLowerCase(Locale.ROOT))) { 2176 privateUseExtensionIndex = i; 2177 } else if (privateUseExtensionIndex != -1) { 2178 // The private use extension must come last. 2179 return privateUseExtensionIndex; 2180 } 2181 } else if (extensionKeyIndex != -1) { 2182 // We must have encountered a valid key in order to start parsing 2183 // its subtags. 2184 if (!isValidBcp47Alphanum(subtag, parsingPrivateUse ? 1 : 2, 8)) { 2185 return i; 2186 } 2187 } else { 2188 // Encountered a value without a preceding key. 2189 return i; 2190 } 2191 } 2192 2193 if (extensionKeyIndex != -1) { 2194 if ((i - 1) == extensionKeyIndex) { 2195 return extensionKeyIndex; 2196 } 2197 2198 final String key = subtags[extensionKeyIndex].toLowerCase(Locale.ROOT); 2199 if (extensions.containsKey(key.charAt(0))) { 2200 return extensionKeyIndex; 2201 } 2202 2203 final String value = concatenateRange(subtags, extensionKeyIndex + 1, i); 2204 extensions.put(key.charAt(0), value.toLowerCase(Locale.ROOT)); 2205 } 2206 2207 return i; 2208 } 2209 forLanguageTag( String tag, boolean strict)2210 private static Locale forLanguageTag(/* @Nonnull */ String tag, boolean strict) { 2211 final String converted = convertGrandfatheredTag(tag); 2212 final String[] subtags = converted.split("-"); 2213 2214 int lastSubtag = subtags.length; 2215 for (int i = 0; i < subtags.length; ++i) { 2216 final String subtag = subtags[i]; 2217 if (subtag.isEmpty() || subtag.length() > 8) { 2218 if (strict) { 2219 throw new IllformedLocaleException("Invalid subtag at index: " + i 2220 + " in tag: " + tag); 2221 } else { 2222 lastSubtag = (i - 1); 2223 } 2224 2225 break; 2226 } 2227 } 2228 2229 final String languageCode = Builder.normalizeAndValidateLanguage(subtags[0], strict); 2230 String scriptCode = ""; 2231 int nextSubtag = 1; 2232 if (lastSubtag > nextSubtag) { 2233 scriptCode = Builder.normalizeAndValidateScript(subtags[nextSubtag], false /* strict */); 2234 if (!scriptCode.isEmpty()) { 2235 nextSubtag++; 2236 } 2237 } 2238 2239 String regionCode = ""; 2240 if (lastSubtag > nextSubtag) { 2241 regionCode = Builder.normalizeAndValidateRegion(subtags[nextSubtag], false /* strict */); 2242 if (!regionCode.isEmpty()) { 2243 nextSubtag++; 2244 } 2245 } 2246 2247 List<String> variants = null; 2248 if (lastSubtag > nextSubtag) { 2249 variants = new ArrayList<String>(); 2250 extractVariantSubtags(subtags, nextSubtag, lastSubtag, variants); 2251 nextSubtag += variants.size(); 2252 } 2253 2254 Map<Character, String> extensions = Collections.EMPTY_MAP; 2255 if (lastSubtag > nextSubtag) { 2256 extensions = new TreeMap<Character, String>(); 2257 nextSubtag = extractExtensions(subtags, nextSubtag, lastSubtag, extensions); 2258 } 2259 2260 if (nextSubtag != lastSubtag) { 2261 if (strict) { 2262 throw new IllformedLocaleException("Unparseable subtag: " + subtags[nextSubtag] 2263 + " from language tag: " + tag); 2264 } 2265 } 2266 2267 Set<String> unicodeKeywords = Collections.EMPTY_SET; 2268 Map<String, String> unicodeAttributes = Collections.EMPTY_MAP; 2269 if (extensions.containsKey(UNICODE_LOCALE_EXTENSION)) { 2270 unicodeKeywords = new TreeSet<String>(); 2271 unicodeAttributes = new TreeMap<String, String>(); 2272 parseUnicodeExtension(extensions.get(UNICODE_LOCALE_EXTENSION).split("-"), 2273 unicodeAttributes, unicodeKeywords); 2274 } 2275 2276 String variantCode = ""; 2277 if (variants != null && !variants.isEmpty()) { 2278 StringBuilder variantsBuilder = new StringBuilder(variants.size() * 8); 2279 for (int i = 0; i < variants.size(); ++i) { 2280 if (i != 0) { 2281 variantsBuilder.append('_'); 2282 } 2283 variantsBuilder.append(variants.get(i)); 2284 } 2285 variantCode = variantsBuilder.toString(); 2286 } 2287 2288 return new Locale(languageCode, regionCode, variantCode, scriptCode, 2289 unicodeKeywords, unicodeAttributes, extensions, true /* has validated fields */); 2290 } 2291 } 2292