1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2011, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Mark Davis 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.util; 10 11 import java.io.BufferedReader; 12 import java.io.IOException; 13 import java.util.ArrayList; 14 import java.util.Arrays; 15 import java.util.Collections; 16 import java.util.Comparator; 17 import java.util.EnumMap; 18 import java.util.EnumSet; 19 import java.util.HashMap; 20 import java.util.HashSet; 21 import java.util.Iterator; 22 import java.util.LinkedHashMap; 23 import java.util.LinkedHashSet; 24 import java.util.List; 25 import java.util.Locale; 26 import java.util.Map; 27 import java.util.Map.Entry; 28 import java.util.Set; 29 import java.util.TreeMap; 30 import java.util.TreeSet; 31 import java.util.regex.Pattern; 32 33 import org.unicode.cldr.draft.ScriptMetadata; 34 import org.unicode.cldr.draft.ScriptMetadata.IdUsage; 35 import org.unicode.cldr.util.Iso639Data.Type; 36 import org.unicode.cldr.util.ZoneParser.ZoneLine; 37 38 import com.ibm.icu.impl.Relation; 39 import com.ibm.icu.lang.UCharacter; 40 import com.ibm.icu.text.UnicodeSet; 41 import com.ibm.icu.util.ICUUncheckedIOException; 42 import com.ibm.icu.util.Output; 43 44 /** 45 * Provides access to various codes used by CLDR: RFC 3066, ISO 4217, Olson 46 * tzids 47 */ 48 public class StandardCodes { 49 50 public enum CodeType { 51 language, script, territory, extlang, legacy, redundant, variant, currency, tzid; from(String name)52 public static CodeType from(String name) { 53 if ("region".equals(name)) { 54 return territory; 55 } 56 return CodeType.valueOf(name); 57 } 58 } 59 60 private static final Set<CodeType> TypeSet = Collections.unmodifiableSet(EnumSet.allOf(CodeType.class)); 61 62 private static final Set<String> TypeStringSet; 63 static { 64 LinkedHashSet<String> foo = new LinkedHashSet<>(); 65 for (CodeType x : CodeType.values()) { x.toString()66 foo.add(x.toString()); 67 } 68 TypeStringSet = Collections.unmodifiableSet(foo); 69 } 70 71 public static final String DESCRIPTION_SEPARATOR = "\u25AA"; 72 73 public static final String NO_COUNTRY = "001"; 74 75 private EnumMap<CodeType, Map<String, List<String>>> type_code_data = new EnumMap<>( 76 CodeType.class); 77 78 private EnumMap<CodeType, Map<String, List<String>>> type_name_codes = new EnumMap<>( 79 CodeType.class); 80 81 private EnumMap<CodeType, Map<String, String>> type_code_preferred = new EnumMap<>( 82 CodeType.class); 83 84 private Map<String, Set<String>> country_modernCurrency = new TreeMap<>(); 85 86 private Map<CodeType, Set<String>> goodCodes = new TreeMap<>(); 87 88 private static final boolean DEBUG = false; 89 90 private static final class StandardCodesHelper { 91 static final StandardCodes SINGLETON = new StandardCodes(); 92 } 93 /** 94 * Get the singleton copy of the standard codes. 95 */ make()96 static public synchronized StandardCodes make() { 97 return StandardCodesHelper.SINGLETON; 98 } 99 100 /** 101 * The data is the name in the case of RFC3066 codes, and the country code in 102 * the case of TZIDs and ISO currency codes. If the country code is missing, 103 * uses ZZ. 104 */ getData(String type, String code)105 public String getData(String type, String code) { 106 Map<String, List<String>> code_data = getCodeData(type); 107 if (code_data == null) 108 return null; 109 List<String> list = code_data.get(code); 110 if (list == null) 111 return null; 112 return list.get(0); 113 } 114 115 /** 116 * @return the full data for the type and code For the data in lstreg, it is 117 * description | date | canonical_value | recommended_prefix # 118 * comments 119 */ getFullData(String type, String code)120 public List<String> getFullData(String type, String code) { 121 Map<String, List<String>> code_data = getCodeData(type); 122 if (code_data == null) 123 return null; 124 return code_data.get(code); 125 } 126 127 /** 128 * @return the full data for the type and code For the data in lstreg, it is 129 * description | date | canonical_value | recommended_prefix # 130 * comments 131 */ getFullData(CodeType type, String code)132 public List<String> getFullData(CodeType type, String code) { 133 Map<String, List<String>> code_data = type_code_data.get(type); 134 if (code_data == null) 135 return null; 136 return code_data.get(code); 137 } 138 getCodeData(String type)139 private Map<String, List<String>> getCodeData(String type) { 140 return getCodeData(CodeType.from(type)); 141 } 142 getCodeData(CodeType type)143 private Map<String, List<String>> getCodeData(CodeType type) { 144 return type_code_data.get(type); 145 } 146 147 /** 148 * Get at the language registry values, as a Map from label to value. 149 * 150 * @param type 151 * @param code 152 * @return 153 */ getLangData(String type, String code)154 public Map<String, String> getLangData(String type, String code) { 155 try { 156 if (type.equals("territory")) 157 type = "region"; 158 else if (type.equals("variant")) code = code.toLowerCase(Locale.ENGLISH); 159 return (Map) ((Map) getLStreg().get(type)).get(code); 160 } catch (RuntimeException e) { 161 return null; 162 } 163 } 164 165 /** 166 * Return a replacement code, if available. If not, return null. 167 * 168 */ getReplacement(String type, String code)169 public String getReplacement(String type, String code) { 170 if (type.equals("currency")) 171 return null; // no replacement codes for currencies 172 List<String> data = getFullData(type, code); 173 if (data == null) 174 return null; 175 // if available, the replacement is a non-empty value other than --, in 176 // position 2. 177 if (data.size() < 3) 178 return null; 179 String replacement = data.get(2); 180 if (!replacement.equals("") && !replacement.equals("--")) 181 return replacement; 182 return null; 183 } 184 185 /** 186 * Return the list of codes that have the same data. For example, returns all 187 * currency codes for a country. If there is a preferred one, it is first. 188 * 189 * @param type 190 * @param data 191 * @return 192 */ 193 @Deprecated getCodes(String type, String data)194 public List<String> getCodes(String type, String data) { 195 return getCodes(CodeType.from(type), data); 196 } 197 198 /** 199 * Return the list of codes that have the same data. For example, returns all 200 * currency codes for a country. If there is a preferred one, it is first. 201 */ getCodes(CodeType type, String data)202 public List<String> getCodes(CodeType type, String data) { 203 Map<String, List<String>> data_codes = type_name_codes.get(type); 204 if (data_codes == null) 205 return null; 206 return Collections.unmodifiableList(data_codes.get(data)); 207 } 208 209 /** 210 * Where there is a preferred code, return it. 211 */ 212 @Deprecated getPreferred(String type, String code)213 public String getPreferred(String type, String code) { 214 return getPreferred(CodeType.from(type), code); 215 } 216 217 /** 218 * Where there is a preferred code, return it. 219 */ 220 getPreferred(CodeType type, String code)221 public String getPreferred(CodeType type, String code) { 222 Map<String, String> code_preferred = type_code_preferred.get(type); 223 if (code_preferred == null) 224 return code; 225 String newCode = code_preferred.get(code); 226 if (newCode == null) 227 return code; 228 return newCode; 229 } 230 231 /** 232 * Get all the available types 233 */ getAvailableTypes()234 public Set<String> getAvailableTypes() { 235 return TypeStringSet; 236 } 237 238 /** 239 * Get all the available types 240 */ getAvailableTypesEnum()241 public Set<CodeType> getAvailableTypesEnum() { 242 return TypeSet; 243 } 244 245 /** 246 * Get all the available codes for a given type 247 * 248 * @param type 249 * @return 250 */ getAvailableCodes(String type)251 public Set<String> getAvailableCodes(String type) { 252 return getAvailableCodes(CodeType.from(type)); 253 } 254 255 /** 256 * Get all the available codes for a given type 257 * 258 * @param type 259 * @return 260 */ getAvailableCodes(CodeType type)261 public Set<String> getAvailableCodes(CodeType type) { 262 Map<String, List<String>> code_name = type_code_data.get(type); 263 return Collections.unmodifiableSet(code_name.keySet()); 264 } 265 getGoodAvailableCodes(String stringType)266 public Set<String> getGoodAvailableCodes(String stringType) { 267 return getGoodAvailableCodes(CodeType.from(stringType)); 268 } 269 270 /** 271 * Get all the available "real" codes for a given type, excluding private use, 272 * but including some deprecated codes. Use SupplementalDataInfo getLocaleAliases to 273 * exclude others. 274 * 275 * @param type 276 * @return 277 */ getGoodAvailableCodes(CodeType type)278 public Set<String> getGoodAvailableCodes(CodeType type) { 279 Set<String> result = goodCodes.get(type); 280 if (result == null) { 281 synchronized (goodCodes) { 282 Map<String, List<String>> code_name = getCodeData(type); 283 SupplementalDataInfo sd = SupplementalDataInfo.getInstance(); 284 if (code_name == null) 285 return null; 286 result = new TreeSet<>(code_name.keySet()); 287 switch (type) { 288 case currency: 289 break; // nothing special 290 case language: 291 return sd.getCLDRLanguageCodes(); 292 case script: 293 return sd.getCLDRScriptCodes(); 294 case tzid: 295 break; // nothing special 296 default: 297 for (Iterator<String> it = result.iterator(); it.hasNext();) { 298 String code = it.next(); 299 if (code.equals("root") || code.equals("QO")) 300 continue; 301 List<String> data = getFullData(type, code); 302 if (data.size() < 3) { 303 if (DEBUG) 304 System.out.println(code + "\t" + data); 305 } 306 if ("PRIVATE USE".equalsIgnoreCase(data.get(0)) 307 || (!data.get(2).equals("") && !data.get(2).equals("--"))) { 308 // System.out.println("Removing: " + code); 309 it.remove(); 310 } 311 } 312 } 313 result = Collections.unmodifiableSet(result); 314 goodCodes.put(type, result); 315 } 316 } 317 return result; 318 } 319 320 private static Set<String> GOOD_COUNTRIES; 321 getGoodCountries()322 public Set<String> getGoodCountries() { 323 synchronized (goodCodes) { 324 if (GOOD_COUNTRIES == null) { 325 Set<String> temp = new LinkedHashSet<>(); 326 for (String s : getGoodAvailableCodes(CodeType.territory)) { 327 if (isCountry(s)) { 328 temp.add(s); 329 } 330 } 331 GOOD_COUNTRIES = Collections.unmodifiableSet(temp); 332 } 333 } 334 return GOOD_COUNTRIES; 335 } 336 337 /** 338 * Gets the modern currency. 339 */ getMainCurrencies(String countryCode)340 public Set<String> getMainCurrencies(String countryCode) { 341 return country_modernCurrency.get(countryCode); 342 } 343 344 private Map<Organization, Map<String, Level>> platform_locale_level = null; 345 private Map<Organization, Relation<Level, String>> platform_level_locale = null; 346 private Map<String, Map<String, String>> platform_locale_levelString = null; 347 348 // /** 349 // * Get rid of this 350 // * 351 // * @param type 352 // * @return 353 // * @throws IOException 354 // * @deprecated 355 // */ 356 // public String getEffectiveLocaleType(String type) throws IOException { 357 // if ((type != null) && (getLocaleCoverageOrganizations().contains(Organization.valueOf(type)))) { 358 // return type; 359 // } else { 360 // return null; // the default.. for now.. 361 // } 362 // } 363 364 static Comparator caseless = new Comparator() { 365 366 @Override 367 public int compare(Object arg0, Object arg1) { 368 String s1 = (String) arg0; 369 String s2 = (String) arg1; 370 return s1.compareToIgnoreCase(s2); 371 } 372 373 }; 374 375 /** 376 * Used for Locales.txt to mean "all" 377 */ 378 public static final String ALL_LOCALES = "*"; 379 380 /** 381 * Returns locales according to status. It returns a Map of Maps, key 1 is 382 * either IBM or Java (perhaps more later), key 2 is the Level. 383 * 384 * @deprecated 385 */ 386 @Deprecated getLocaleTypes()387 public Map<Organization, Map<String, Level>> getLocaleTypes() { 388 synchronized (StandardCodes.class) { 389 if (platform_locale_level == null) { 390 loadPlatformLocaleStatus(); 391 } 392 } 393 return platform_locale_level; 394 } 395 396 /** 397 * Return map of locales to levels 398 * @param org 399 * @return 400 */ getLocaleToLevel(Organization org)401 public Map<String, Level> getLocaleToLevel(Organization org) { 402 return getLocaleTypes().get(org); 403 } 404 getLocaleCoverageLevel(String organization, String desiredLocale)405 public Level getLocaleCoverageLevel(String organization, String desiredLocale) { 406 return getLocaleCoverageLevel(Organization.fromString(organization), desiredLocale); 407 } 408 getLocaleCoverageLevel(Organization organization, String desiredLocale)409 public Level getLocaleCoverageLevel(Organization organization, String desiredLocale) { 410 return getLocaleCoverageLevel(organization, desiredLocale, new Output<LocaleCoverageType>()); 411 } 412 413 public enum LocaleCoverageType { 414 explicit, parent, star, undetermined 415 } 416 417 /** 418 * Returns coverage level of locale according to organization. Returns Level.UNDETERMINED if information is missing. 419 * A locale of "*" in the data means "everything else". 420 */ getLocaleCoverageLevel(Organization organization, String desiredLocale, Output<LocaleCoverageType> coverageType)421 public Level getLocaleCoverageLevel(Organization organization, String desiredLocale, Output<LocaleCoverageType> coverageType) { 422 synchronized (StandardCodes.class) { 423 if (platform_locale_level == null) { 424 loadPlatformLocaleStatus(); 425 } 426 } 427 coverageType.value = LocaleCoverageType.undetermined; 428 if (organization == null) { 429 return Level.UNDETERMINED; 430 } 431 Map<String, Level> locale_status = platform_locale_level.get(organization); 432 if (locale_status == null) { 433 return Level.UNDETERMINED; 434 } 435 // see if there is a parent 436 String originalLocale = desiredLocale; 437 while (desiredLocale != null) { 438 Level status = locale_status.get(desiredLocale); 439 if (status != null && status != Level.UNDETERMINED) { 440 coverageType.value = originalLocale == desiredLocale ? LocaleCoverageType.explicit : LocaleCoverageType.parent; 441 return status; 442 } 443 desiredLocale = LocaleIDParser.getParent(desiredLocale); 444 } 445 Level status = locale_status.get(ALL_LOCALES); 446 if (status != null && status != Level.UNDETERMINED) { 447 coverageType.value = LocaleCoverageType.star; 448 return status; 449 } 450 return Level.UNDETERMINED; 451 } 452 453 /** 454 * Returns coverage level of locale according to organization. Returns Level.UNDETERMINED if information is missing. 455 */ getDefaultLocaleCoverageLevel(Organization organization)456 public Level getDefaultLocaleCoverageLevel(Organization organization) { 457 return getLocaleCoverageLevel(organization, ALL_LOCALES); 458 } 459 getLocaleCoverageOrganizations()460 public Set<Organization> getLocaleCoverageOrganizations() { 461 synchronized (StandardCodes.class) { 462 if (platform_locale_level == null) { 463 loadPlatformLocaleStatus(); 464 } 465 } 466 return platform_locale_level.keySet(); 467 } 468 getLocaleCoverageOrganizationStrings()469 public Set<String> getLocaleCoverageOrganizationStrings() { 470 synchronized (StandardCodes.class) { 471 if (platform_locale_level == null) { 472 loadPlatformLocaleStatus(); 473 } 474 } 475 return platform_locale_levelString.keySet(); 476 } 477 getLocaleCoverageLocales(String organization)478 public Set<String> getLocaleCoverageLocales(String organization) { 479 return getLocaleCoverageLocales(Organization.fromString(organization)); 480 } 481 getLocaleCoverageLocales(Organization organization)482 public Set<String> getLocaleCoverageLocales(Organization organization) { 483 synchronized (StandardCodes.class) { 484 if (platform_locale_level == null) { 485 loadPlatformLocaleStatus(); 486 } 487 } 488 return platform_locale_level.get(organization).keySet(); 489 } 490 getLevelsToLocalesFor(Organization organization)491 public Relation<Level, String> getLevelsToLocalesFor(Organization organization) { 492 synchronized (StandardCodes.class) { 493 if (platform_level_locale == null) { 494 loadPlatformLocaleStatus(); 495 } 496 } 497 return platform_level_locale.get(organization); 498 } 499 getLocaleCoverageLocales(Organization organization, Set<Level> choice)500 public Set<String> getLocaleCoverageLocales(Organization organization, Set<Level> choice) { 501 Set<String> result = new LinkedHashSet<>(); 502 for (String locale : getLocaleCoverageLocales(organization)) { 503 if (choice.contains(getLocaleCoverageLevel(organization, locale))) { 504 result.add(locale); 505 } 506 } 507 return result; 508 } 509 loadPlatformLocaleStatus()510 private void loadPlatformLocaleStatus() { 511 LocaleIDParser parser = new LocaleIDParser(); 512 platform_locale_level = new EnumMap<>(Organization.class); 513 SupplementalDataInfo sd = SupplementalDataInfo.getInstance(); 514 Set<String> defaultContentLocales = sd.getDefaultContentLocales(); 515 String line; 516 try { 517 BufferedReader lstreg = CldrUtility.getUTF8Data("Locales.txt"); 518 while (true) { 519 line = lstreg.readLine(); 520 if (line == null) 521 break; 522 int commentPos = line.indexOf('#'); 523 if (commentPos >= 0) { 524 line = line.substring(0, commentPos); 525 } 526 line = line.trim(); 527 if (line.length() == 0) 528 continue; 529 List<String> stuff = CldrUtility.splitList(line, ';', true); 530 Organization organization; 531 532 // verify that the organization is valid 533 try { 534 organization = Organization.fromString(stuff.get(0)); 535 } catch (Exception e) { 536 throw new IllegalArgumentException("Invalid organization in Locales.txt: " + line); 537 } 538 539 // verify that the locale is valid BCP47 540 String locale = stuff.get(1); 541 if (!locale.equals(ALL_LOCALES)) { 542 parser.set(locale); 543 String valid = validate(parser); 544 if (valid.length() != 0) { 545 throw new IllegalArgumentException("Invalid locale in Locales.txt: " + line); 546 } 547 locale = parser.toString(); // normalize 548 549 // verify that the locale is not a default content locale 550 if (defaultContentLocales.contains(locale)) { 551 throw new IllegalArgumentException("Cannot have default content locale in Locales.txt: " + line); 552 } 553 } 554 555 Level status = Level.get(stuff.get(2)); 556 if (status == Level.UNDETERMINED) { 557 System.out.println("Warning: Level unknown on: " + line); 558 } 559 Map<String, Level> locale_status = platform_locale_level.get(organization); 560 if (locale_status == null) { 561 platform_locale_level.put(organization, locale_status = new TreeMap<>()); 562 } 563 locale_status.put(locale, status); 564 if (!locale.equals(ALL_LOCALES)) { 565 String scriptLoc = parser.getLanguageScript(); 566 if (locale_status.get(scriptLoc) == null) 567 locale_status.put(scriptLoc, status); 568 String lang = parser.getLanguage(); 569 if (locale_status.get(lang) == null) 570 locale_status.put(lang, status); 571 } 572 } 573 } catch (IOException e) { 574 throw new ICUUncheckedIOException("Internal Error", e); 575 } 576 577 // now reset the parent to be the max of the children 578 for (Organization platform : platform_locale_level.keySet()) { 579 Map<String, Level> locale_level = platform_locale_level.get(platform); 580 for (String locale : locale_level.keySet()) { 581 parser.set(locale); 582 Level childLevel = locale_level.get(locale); 583 584 String language = parser.getLanguage(); 585 if (!language.equals(locale)) { 586 Level languageLevel = locale_level.get(language); 587 if (languageLevel == null || languageLevel.compareTo(childLevel) < 0) { 588 locale_level.put(language, childLevel); 589 } 590 } 591 String oldLanguage = language; 592 language = parser.getLanguageScript(); 593 if (!language.equals(oldLanguage)) { 594 Level languageLevel = locale_level.get(language); 595 if (languageLevel == null || languageLevel.compareTo(childLevel) < 0) { 596 locale_level.put(language, childLevel); 597 } 598 } 599 } 600 } 601 // backwards compat hack 602 platform_locale_levelString = new TreeMap<>(); 603 platform_level_locale = new EnumMap<>(Organization.class); 604 for (Organization platform : platform_locale_level.keySet()) { 605 Map<String, String> locale_levelString = new TreeMap<>(); 606 platform_locale_levelString.put(platform.toString(), locale_levelString); 607 Map<String, Level> locale_level = platform_locale_level.get(platform); 608 for (String locale : locale_level.keySet()) { 609 locale_levelString.put(locale, locale_level.get(locale).toString()); 610 } 611 Relation level_locale = Relation.of(new EnumMap(Level.class), HashSet.class); 612 level_locale.addAllInverted(locale_level).freeze(); 613 platform_level_locale.put(platform, level_locale); 614 } 615 CldrUtility.protectCollection(platform_level_locale); 616 platform_locale_level = CldrUtility.protectCollection(platform_locale_level); 617 platform_locale_levelString = CldrUtility.protectCollection(platform_locale_levelString); 618 } 619 validate(LocaleIDParser parser)620 private String validate(LocaleIDParser parser) { 621 String message = ""; 622 String lang = parser.getLanguage(); 623 if (lang.length() == 0) { 624 message += ", Missing language"; 625 } else if (!getAvailableCodes("language").contains(lang)) { 626 message += ", Invalid language code: " + lang; 627 } 628 String script = parser.getScript(); 629 if (script.length() != 0 && !getAvailableCodes("script").contains(script)) { 630 message += ", Invalid script code: " + script; 631 } 632 String territory = parser.getRegion(); 633 if (territory.length() != 0 && !getAvailableCodes("territory").contains(territory)) { 634 message += ", Invalid territory code: " + lang; 635 } 636 return message.length() == 0 ? message : message.substring(2); 637 } 638 639 /** 640 * Ascertain that the given locale in in the given group specified by the 641 * organization 642 * 643 * @param locale 644 * @param group 645 * @param org 646 * @return boolean 647 */ isLocaleInGroup(String locale, String group, Organization org)648 public boolean isLocaleInGroup(String locale, String group, Organization org) { 649 return group.equals(getGroup(locale, org)); 650 } 651 isLocaleInGroup(String locale, String group, String org)652 public boolean isLocaleInGroup(String locale, String group, String org) { 653 return isLocaleInGroup(locale, group, Organization.fromString(org)); 654 } 655 getGroup(String locale, String org)656 public String getGroup(String locale, String org) { 657 return getGroup(locale, Organization.fromString(org)); 658 } 659 660 /** 661 * Gets the coverage group given a locale and org 662 * 663 * @param locale 664 * @param org 665 * @return group if availble, null if not 666 */ getGroup(String locale, Organization org)667 private String getGroup(String locale, Organization org) { 668 Level l = getLocaleCoverageLevel(org, locale); 669 if (l.equals(Level.UNDETERMINED)) { 670 return null; 671 } else { 672 return l.toString(); 673 } 674 } 675 676 // ========== PRIVATES ========== 677 StandardCodes()678 private StandardCodes() { 679 String[] files = { "ISO4217.txt" }; // , "TZID.txt" 680 type_code_preferred.put(CodeType.tzid, new TreeMap<String, String>()); 681 add(CodeType.language, "root", "Root"); 682 String originalLine = null; 683 for (int fileIndex = 0; fileIndex < files.length; ++fileIndex) { 684 try { 685 BufferedReader lstreg = CldrUtility.getUTF8Data(files[fileIndex]); 686 while (true) { 687 String line = originalLine = lstreg.readLine(); 688 if (line == null) 689 break; 690 if (line.startsWith("\uFEFF")) { 691 line = line.substring(1); 692 } 693 line = line.trim(); 694 int commentPos = line.indexOf('#'); 695 String comment = ""; 696 if (commentPos >= 0) { 697 comment = line.substring(commentPos + 1).trim(); 698 line = line.substring(0, commentPos); 699 } 700 if (line.length() == 0) 701 continue; 702 List<String> pieces = CldrUtility.splitList(line, '|', true, 703 new ArrayList<String>()); 704 CodeType type = CodeType.from(pieces.get(0)); 705 pieces.remove(0); 706 707 String code = pieces.get(0); 708 pieces.remove(0); 709 if (type.equals("date")) { 710 continue; 711 } 712 713 String oldName = pieces.get(0); 714 int pos = oldName.indexOf(';'); 715 if (pos >= 0) { 716 oldName = oldName.substring(0, pos).trim(); 717 pieces.set(0, oldName); 718 } 719 720 List<String> data = pieces; 721 if (comment.indexOf("deprecated") >= 0) { 722 // System.out.println(originalLine); 723 if (data.get(2).toString().length() == 0) { 724 data.set(2, "--"); 725 } 726 } 727 if (oldName.equalsIgnoreCase("PRIVATE USE")) { 728 int separatorPos = code.indexOf(".."); 729 if (separatorPos < 0) { 730 add(type, code, data); 731 } else { 732 String current = code.substring(0, separatorPos); 733 String end = code.substring(separatorPos + 2); 734 // System.out.println(">>" + code + "\t" + current + "\t" + end); 735 for (; current.compareTo(end) <= 0; current = nextAlpha(current)) { 736 // System.out.println(">" + current); 737 add(type, current, data); 738 } 739 } 740 continue; 741 } 742 if (!type.equals("tzid")) { 743 add(type, code, data); 744 if (type.equals("currency")) { 745 // currency | TPE | Timor Escudo | TP | EAST TIMOR | O 746 if (data.get(3).equals("C")) { 747 String country = data.get(1); 748 Set<String> codes = country_modernCurrency.get(country); 749 if (codes == null) { 750 country_modernCurrency.put(country, codes = new TreeSet<>()); 751 } 752 codes.add(code); 753 } 754 } 755 continue; 756 } 757 // type = tzid 758 // List codes = (List) Utility.splitList(code, ',', true, new 759 // ArrayList()); 760 String preferred = null; 761 for (int i = 0; i < pieces.size(); ++i) { 762 code = pieces.get(i); 763 add(type, code, data); 764 if (preferred == null) 765 preferred = code; 766 else { 767 Map<String, String> code_preferred = type_code_preferred.get(type); 768 code_preferred.put(code, preferred); 769 } 770 } 771 } 772 lstreg.close(); 773 } catch (Exception e) { 774 System.err.println("WARNING: " + files[fileIndex] 775 + " may be a corrupted UTF-8 file. Please check."); 776 throw (IllegalArgumentException) new IllegalArgumentException( 777 "Can't read " + files[fileIndex] + "\t" + originalLine) 778 .initCause(e); 779 } 780 country_modernCurrency = CldrUtility.protectCollection(country_modernCurrency); 781 } 782 783 // data is: description | date | canonical_value | recommended_prefix # 784 // comments 785 // HACK, just rework 786 787 Map<String, Map<String, Map<String, String>>> languageRegistry = getLStreg(); 788 // languageRegistry = CldrUtility.protectCollection(languageRegistry); 789 790 for (String type : languageRegistry.keySet()) { 791 CodeType type2 = CodeType.from(type); 792 Map<String, Map<String, String>> m = languageRegistry.get(type); 793 for (String code : m.keySet()) { 794 Map<String, String> mm = m.get(code); 795 List<String> data = new ArrayList<>(0); 796 data.add(mm.get("Description")); 797 data.add(mm.get("Added")); 798 String pref = mm.get("Preferred-Value"); 799 if (pref == null) { 800 pref = mm.get("Deprecated"); 801 if (pref == null) 802 pref = ""; 803 else 804 pref = "deprecated"; 805 } 806 data.add(pref); 807 if (type.equals("variant")) { 808 code = code.toUpperCase(); 809 } 810 // data.add(mm.get("Recommended_Prefix")); 811 // {"region", "BQ", "Description", "British Antarctic Territory", 812 // "Preferred-Value", "AQ", "CLDR", "True", "Deprecated", "True"}, 813 add(type2, code, data); 814 } 815 } 816 817 Map<String, List<String>> m = getZoneData(); 818 for (Iterator<String> it = m.keySet().iterator(); it.hasNext();) { 819 String code = it.next(); 820 add(CodeType.tzid, code, m.get(code).toString()); 821 } 822 } 823 824 /** 825 * @param current 826 * @return 827 */ nextAlpha(String current)828 private static String nextAlpha(String current) { 829 // Don't care that this is inefficient 830 int value = 0; 831 for (int i = 0; i < current.length(); ++i) { 832 char c = current.charAt(i); 833 c -= c < 'a' ? 'A' : 'a'; 834 value = value * 26 + c; 835 } 836 value += 1; 837 String result = ""; 838 for (int i = 0; i < current.length(); ++i) { 839 result = (char) ((value % 26) + 'A') + result; 840 value = value / 26; 841 } 842 if (UCharacter.toLowerCase(current).equals(current)) { 843 result = UCharacter.toLowerCase(result); 844 } else if (UCharacter.toUpperCase(current).equals(current)) { 845 // do nothing 846 } else { 847 result = UCharacter.toTitleCase(result, null); 848 } 849 return result; 850 } 851 852 /** 853 * @param string 854 * @param string2 855 * @param string3 856 */ 857 private void add(CodeType type, String string2, String string3) { 858 List<String> l = new ArrayList<>(); 859 l.add(string3); 860 add(type, string2, l); 861 } 862 863 private void add(CodeType type, String code, List<String> otherData) { 864 // hack 865 if (type == CodeType.script) { 866 if (code.equals("Qaai")) { 867 otherData = new ArrayList<>(otherData); 868 otherData.set(0, "Inherited"); 869 } else if (code.equals("Zyyy")) { 870 otherData = new ArrayList<>(otherData); 871 otherData.set(0, "Common"); 872 } 873 } 874 875 // assume name is the first item 876 877 String name = otherData.get(0); 878 879 // add to main list 880 Map<String, List<String>> code_data = getCodeData(type); 881 if (code_data == null) { 882 code_data = new TreeMap<>(); 883 type_code_data.put(type, code_data); 884 } 885 List<String> lastData = code_data.get(code); 886 if (lastData != null) { 887 lastData.addAll(otherData); 888 } else { 889 code_data.put(code, otherData); 890 } 891 892 // now add mapping from name to codes 893 Map<String, List<String>> name_codes = type_name_codes.get(type); 894 if (name_codes == null) { 895 name_codes = new TreeMap<>(); 896 type_name_codes.put(type, name_codes); 897 } 898 List<String> codes = name_codes.get(name); 899 if (codes == null) { 900 codes = new ArrayList<>(); 901 name_codes.put(name, codes); 902 } 903 codes.add(code); 904 } 905 906 private List<String> DELETED3166 = Collections.unmodifiableList(Arrays 907 .asList(new String[] { "BQ", "BU", "CT", "DD", "DY", "FQ", "FX", "HV", 908 "JT", "MI", "NH", "NQ", "NT", "PC", "PU", "PZ", "RH", "SU", "TP", 909 "VD", "WK", "YD", "YU", "ZR" })); 910 911 public List<String> getOld3166() { 912 return DELETED3166; 913 } 914 915 private Map<String, List<String>> WorldBankInfo; 916 917 public Map<String, List<String>> getWorldBankInfo() { 918 if (WorldBankInfo == null) { 919 List<String> temp = fillFromCommaFile("WorldBankInfo.txt", false); 920 WorldBankInfo = new HashMap<>(); 921 for (String line : temp) { 922 List<String> row = CldrUtility.splitList(line, ';', true); 923 String key = row.get(0); 924 row.remove(0); 925 WorldBankInfo.put(key, row); 926 } 927 WorldBankInfo = CldrUtility.protectCollection(WorldBankInfo); 928 } 929 return WorldBankInfo; 930 } 931 932 Set<String> moribundLanguages; 933 934 public Set<String> getMoribundLanguages() { 935 if (moribundLanguages == null) { 936 List<String> temp = fillFromCommaFile("moribund_languages.txt", true); 937 moribundLanguages = new TreeSet<>(); 938 moribundLanguages.addAll(temp); 939 moribundLanguages = CldrUtility.protectCollection(moribundLanguages); 940 } 941 return moribundLanguages; 942 } 943 944 // produces a list of the 'clean' lines 945 private List<String> fillFromCommaFile(String filename, boolean trim) { 946 try { 947 List<String> result = new ArrayList<>(); 948 String line; 949 BufferedReader lstreg = CldrUtility.getUTF8Data(filename); 950 while (true) { 951 line = lstreg.readLine(); 952 if (line == null) 953 break; 954 int commentPos = line.indexOf('#'); 955 if (commentPos >= 0) { 956 line = line.substring(0, commentPos); 957 } 958 if (trim) { 959 line = line.trim(); 960 } 961 if (line.length() == 0) 962 continue; 963 result.add(line); 964 } 965 return result; 966 } catch (Exception e) { 967 throw (RuntimeException) new IllegalArgumentException( 968 "Can't process file: data/" + filename).initCause(e); 969 } 970 } 971 972 // return a complex map. language -> arn -> {"Comments" -> "x", 973 // "Description->y,...} 974 static String[][] extras = { 975 { "language", "root", "Description", "Root", "CLDR", "True" }, 976 // { "language", "cch", "Description", "Atsam", "CLDR", "True" }, 977 // { "language", "kaj", "Description", "Jju", "CLDR", "True" }, 978 // { "language", "kcg", "Description", "Tyap", "CLDR", "True" }, 979 // { "language", "kfo", "Description", "Koro", "CLDR", "True" }, 980 // { "language", "mfe", "Description", "Morisyen", "CLDR", "True" }, 981 // { "region", "172", "Description", "Commonwealth of Independent States", "CLDR", "True" }, 982 // { "region", "062", "Description", "South-Central Asia", "CLDR", "True" }, 983 // { "region", "003", "Description", "North America", "CLDR", "True" }, 984 // { "variant", "POLYTONI", "Description", "Polytonic Greek", "CLDR", "True", "Preferred-Value", "POLYTON" }, 985 { "variant", "REVISED", "Description", "Revised Orthography", "CLDR", "True" }, 986 { "variant", "SAAHO", "Description", "Dialect", "CLDR", "True" }, 987 { "variant", "POSIX", "Description", "Computer-Style", "CLDR", "True" }, 988 // {"region", "172", "Description", "Commonwealth of Independent States", 989 // "CLDR", "True"}, 990 // { "region", "", "Description", "European Union", "CLDR", "True" }, 991 { "region", "ZZ", "Description", "Unknown or Invalid Region", "CLDR", "True" }, 992 { "region", "QO", "Description", "Outlying Oceania", "CLDR", "True" }, 993 { "region", "XK", "Description", "Kosovo", "CLDR", "True" }, 994 { "script", "Qaai", "Description", "Inherited", "CLDR", "True" }, 995 // {"region", "003", "Description", "North America", "CLDR", "True"}, 996 // {"region", "062", "Description", "South-central Asia", "CLDR", "True"}, 997 // {"region", "200", "Description", "Czechoslovakia", "CLDR", "True"}, 998 // {"region", "830", "Description", "Channel Islands", "CLDR", "True"}, 999 // {"region", "833", "Description", "Isle of Man", "CLDR", "True"}, 1000 1001 // {"region", "NT", "Description", "Neutral Zone (formerly between Saudi 1002 // Arabia & Iraq)", "CLDR", "True", "Deprecated", "True"}, 1003 // {"region", "SU", "Description", "Union of Soviet Socialist Republics", 1004 // "CLDR", "True", "Deprecated", "True"}, 1005 // {"region", "BQ", "Description", "British Antarctic Territory", 1006 // "Preferred-Value", "AQ", "CLDR", "True", "Deprecated", "True"}, 1007 // {"region", "CT", "Description", "Canton and Enderbury Islands", 1008 // "Preferred-Value", "KI", "CLDR", "True", "Deprecated", "True"}, 1009 // {"region", "FQ", "Description", "French Southern and Antarctic Territories 1010 // (now split between AQ and TF)", "CLDR", "True", "Deprecated", "True"}, 1011 // {"region", "JT", "Description", "Johnston Island", "Preferred-Value", "UM", 1012 // "CLDR", "True", "Deprecated", "True"}, 1013 // {"region", "MI", "Description", "Midway Islands", "Preferred-Value", "UM", 1014 // "CLDR", "True", "Deprecated", "True"}, 1015 // {"region", "NQ", "Description", "Dronning Maud Land", "Preferred-Value", 1016 // "AQ", "CLDR", "True", "Deprecated", "True"}, 1017 // {"region", "PC", "Description", "Pacific Islands Trust Territory (divided 1018 // into FM, MH, MP, and PW)", "Preferred-Value", "AQ", "CLDR", "True", 1019 // "Deprecated", "True"}, 1020 // {"region", "PU", "Description", "U.S. Miscellaneous Pacific Islands", 1021 // "Preferred-Value", "UM", "CLDR", "True", "Deprecated", "True"}, 1022 // {"region", "PZ", "Description", "Panama Canal Zone", "Preferred-Value", 1023 // "PA", "CLDR", "True", "Deprecated", "True"}, 1024 // {"region", "VD", "Description", "North Vietnam", "Preferred-Value", "VN", 1025 // "CLDR", "True", "Deprecated", "True"}, 1026 // {"region", "WK", "Description", "Wake Island", "Preferred-Value", "UM", 1027 // "CLDR", "True", "Deprecated", "True"}, 1028 }; 1029 1030 static final String registryName = CldrUtility.getProperty("registry", "language-subtag-registry"); 1031 1032 public enum LstrType { 1033 language("und", "zxx", "mul", "mis", "root"), 1034 script("Zzzz", "Zsym", "Zxxx", "Zmth"), 1035 region("ZZ"), 1036 variant(), 1037 extlang(true, false), 1038 legacy(true, false), 1039 redundant(true, false), 1040 /** specialized codes for validity; TODO: rename LstrType **/ 1041 currency(false, true, "XXX"), 1042 subdivision(false, true), 1043 unit(false, true), 1044 usage(false, true), 1045 zone(false, true); 1046 1047 public final Set<String> specials; 1048 public final String unknown; 1049 public final boolean isLstr; 1050 public final boolean isUnicode; 1051 1052 private LstrType(String... unknownValue) { 1053 this(true, true, unknownValue); 1054 } 1055 1056 private LstrType(boolean lstr, boolean unicode, String... unknownValue) { 1057 unknown = unknownValue.length == 0 ? null : unknownValue[0]; 1058 LinkedHashSet<String> set = new LinkedHashSet<>(Arrays.asList(unknownValue)); 1059 if (unknown != null) { 1060 set.remove(unknown); 1061 } 1062 specials = Collections.unmodifiableSet(set); 1063 isLstr = lstr; 1064 isUnicode = unicode; 1065 } 1066 1067 // 1068 static final Pattern WELLFORMED = Pattern.compile("([0-9]{3}|[a-zA-Z]{2})[a-zA-Z0-9]{1,4}"); 1069 1070 boolean isWellFormed(String candidate) { 1071 switch (this) { 1072 case subdivision: 1073 return WELLFORMED.matcher(candidate).matches(); 1074 default: 1075 throw new UnsupportedOperationException(); 1076 } 1077 } 1078 1079 /** 1080 * Generate compatibility string, returning 'territory' instead of 'region', etc. 1081 */ 1082 public String toCompatString() { 1083 switch (this) { 1084 case region: return "territory"; 1085 case legacy: return "language"; 1086 case redundant: return "language"; 1087 default: return toString(); 1088 } 1089 } 1090 1091 /** 1092 * Create LstrType from string, allowing the compat string 'territory'. 1093 */ 1094 public static LstrType fromString(String rawType) { 1095 try { 1096 return valueOf(rawType); 1097 } catch (IllegalArgumentException e) { 1098 if ("territory".equals(rawType)) { 1099 return region; 1100 } 1101 throw e; 1102 } 1103 } 1104 } 1105 1106 public enum LstrField { 1107 Type, Subtag, Description, Added, Scope, Tag, Suppress_Script, Macrolanguage, Deprecated, Preferred_Value, Comments, Prefix, CLDR; 1108 public static LstrField from(String s) { 1109 return LstrField.valueOf(s.trim().replace("-", "_")); 1110 } 1111 } 1112 1113 static Map<String, Map<String, Map<String, String>>> LSTREG; 1114 static Map<LstrType, Map<String, Map<LstrField, String>>> LSTREG_ENUM; 1115 static Map<LstrType, Map<String, Map<LstrField, String>>> LSTREG_RAW; 1116 1117 /** 1118 * Returns a map like {extlang={aao={Added=2009-07-29, Description=Algerian Saharan Arabic, ...<br> 1119 * That is, type => subtype => map<tag,value>. Descriptions are concatenated together, separated by 1120 * DESCRIPTION_SEPARATOR. 1121 * 1122 * @return 1123 */ 1124 public static Map<String, Map<String, Map<String, String>>> getLStreg() { 1125 if (LSTREG == null) { 1126 initLstr(); 1127 } 1128 return LSTREG; 1129 } 1130 1131 /** 1132 * Returns a map like {extlang={aao={Added=2009-07-29, Description=Algerian Saharan Arabic, ...<br> 1133 * That is, type => subtype => map<tag,value>. Descriptions are concatenated together, separated by 1134 * DESCRIPTION_SEPARATOR. 1135 * 1136 * @return 1137 */ 1138 public static Map<LstrType, Map<String, Map<LstrField, String>>> getEnumLstreg() { 1139 if (LSTREG_ENUM == null) { 1140 initLstr(); 1141 } 1142 return LSTREG_ENUM; 1143 } 1144 1145 public static Map<LstrType, Map<String, Map<LstrField, String>>> getLstregEnumRaw() { 1146 if (LSTREG_ENUM == null) { 1147 initLstr(); 1148 } 1149 return LSTREG_RAW; 1150 } 1151 1152 private static void initLstr() { 1153 Map<LstrType, Map<String, Map<LstrField, String>>> result2 = new TreeMap<>(); 1154 1155 int lineNumber = 1; 1156 1157 Set<String> funnyTags = new TreeSet<>(); 1158 String line; 1159 try { 1160 BufferedReader lstreg = CldrUtility.getUTF8Data(registryName); 1161 LstrType lastType = null; 1162 String lastTag = null; 1163 Map<String, Map<LstrField, String>> subtagData = null; 1164 Map<LstrField, String> currentData = null; 1165 LstrField lastLabel = null; 1166 String lastRest = null; 1167 boolean inRealContent = false; 1168 // Map<String, String> translitCache = new HashMap<String, String>(); 1169 for (;; ++lineNumber) { 1170 line = lstreg.readLine(); 1171 if (line == null) 1172 break; 1173 if (line.length() == 0) 1174 continue; // skip blanks 1175 if (line.startsWith("File-Date: ")) { 1176 if (DEBUG) System.out.println("Language Subtag Registry: " + line); 1177 inRealContent = true; 1178 continue; 1179 } 1180 if (!inRealContent) { 1181 // skip until we get to real content 1182 continue; 1183 } 1184 // skip cruft 1185 if (line.startsWith("Internet-Draft")) { 1186 continue; 1187 } 1188 if (line.startsWith("Ewell")) { 1189 continue; 1190 } 1191 if (line.startsWith("\f")) { 1192 continue; 1193 } 1194 if (line.startsWith("4. Security Considerations")) { 1195 break; 1196 } 1197 1198 if (line.startsWith("%%")) 1199 continue; // skip separators (ok, since data starts with Type: 1200 if (line.startsWith(" ")) { 1201 currentData.put(lastLabel, lastRest + " " + line.trim()); 1202 continue; 1203 } 1204 1205 /* 1206 * Type: language Subtag: aa Description: Afar Added: 2005-10-16 1207 * Suppress-Script: Latn 1208 */ 1209 int pos2 = line.indexOf(':'); 1210 LstrField label = LstrField.from(line.substring(0, pos2)); 1211 String rest = line.substring(pos2 + 1).trim(); 1212 if (label == LstrField.Type) { 1213 lastType = rest.equals("grandfathered") ? 1214 LstrType.legacy : LstrType.fromString(rest); 1215 subtagData = CldrUtility.get(result2, lastType); 1216 if (subtagData == null) { 1217 result2.put(lastType, subtagData = new TreeMap<>()); 1218 } 1219 } else if (label == LstrField.Subtag 1220 || label == LstrField.Tag) { 1221 lastTag = rest; 1222 String endTag = null; 1223 // Subtag: qaa..qtz 1224 int pos = lastTag.indexOf(".."); 1225 if (pos >= 0) { 1226 endTag = lastTag.substring(pos + 2); 1227 lastTag = lastTag.substring(0, pos); 1228 } 1229 currentData = new TreeMap<>(); 1230 if (endTag == null) { 1231 putSubtagData(lastTag, subtagData, currentData); 1232 languageCount.add(lastType, 1); 1233 // System.out.println(languageCount.getCount(lastType) + "\t" + lastType + "\t" + lastTag); 1234 } else { 1235 for (; lastTag.compareTo(endTag) <= 0; lastTag = nextAlpha(lastTag)) { 1236 // System.out.println(">" + current); 1237 putSubtagData(lastTag, subtagData, currentData); 1238 languageCount.add(lastType, 1); 1239 // System.out.println(languageCount.getCount(lastType) + "\t" + lastType + "\t" + lastTag); 1240 } 1241 1242 } 1243 // label.equalsIgnoreCase("Added") || label.equalsIgnoreCase("Suppress-Script")) { 1244 // skip 1245 // } else if (pieces.length < 2) { 1246 // System.out.println("Odd Line: " + lastType + "\t" + lastTag + "\t" + line); 1247 } else { 1248 lastLabel = label; 1249 // The following code was removed because in the standard tests (TestAll) both lastRest and rest were always equal. 1250 // if(!translitCache.containsKey(rest)) { 1251 // lastRest = TransliteratorUtilities.fromXML.transliterate(rest); 1252 // translitCache.put(rest, lastRest); 1253 // if (!lastRest.equals(rest)) { 1254 // System.out.println(System.currentTimeMillis()+" initLStr: LastRest: '"+lastRest+"' Rest: '"+rest+"'"); 1255 // } 1256 // } else { 1257 // lastRest = translitCache.get(rest); 1258 // } 1259 lastRest = rest; 1260 String oldValue = CldrUtility.get(currentData, lastLabel); 1261 if (oldValue != null) { 1262 lastRest = oldValue + DESCRIPTION_SEPARATOR + lastRest; 1263 } 1264 currentData.put(lastLabel, lastRest); 1265 } 1266 } 1267 } catch (Exception e) { 1268 throw (RuntimeException) new IllegalArgumentException( 1269 "Can't process file: data/" 1270 + registryName + ";\t at line " + lineNumber).initCause(e); 1271 } finally { 1272 if (!funnyTags.isEmpty()) { 1273 if (DEBUG) 1274 System.out.println("Funny tags: " + funnyTags); 1275 } 1276 } 1277 // copy raw 1278 Map<LstrType, Map<String, Map<LstrField, String>>> rawLstreg = new TreeMap<>(); 1279 for (Entry<LstrType, Map<String, Map<LstrField, String>>> entry1 : result2.entrySet()) { 1280 LstrType key1 = entry1.getKey(); 1281 TreeMap<String, Map<LstrField, String>> raw1 = new TreeMap<>(); rawLstreg.put(key1, raw1)1282 rawLstreg.put(key1, raw1); 1283 for (Entry<String, Map<LstrField, String>> entry2 : entry1.getValue().entrySet()) { 1284 String key2 = entry2.getKey(); 1285 final Map<LstrField, String> value2 = entry2.getValue(); 1286 TreeMap<LstrField, String> raw2 = new TreeMap<>(); 1287 raw2.putAll(value2); raw1.put(key2, raw2)1288 raw1.put(key2, raw2); 1289 } 1290 } 1291 LSTREG_RAW = CldrUtility.protectCollection(rawLstreg); 1292 1293 // add extras 1294 for (int i = 0; i < extras.length; ++i) { 1295 Map<String, Map<LstrField, String>> subtagData = CldrUtility.get(result2, LstrType.fromString(extras[i][0])); 1296 if (subtagData == null) { LstrType.fromString(extras[i][0])1297 result2.put(LstrType.fromString(extras[i][0]), subtagData = new TreeMap<>()); 1298 } 1299 Map<LstrField, String> labelData = new TreeMap<>(); 1300 for (int j = 2; j < extras[i].length; j += 2) { LstrField.from(extras[i][j])1301 labelData.put(LstrField.from(extras[i][j]), extras[i][j + 1]); 1302 } 1303 Map<LstrField, String> old = CldrUtility.get(subtagData, extras[i][1]); 1304 if (old != null) { 1305 if (!"Private use".equals(CldrUtility.get(old, LstrField.Description))) { 1306 throw new IllegalArgumentException("REPLACING data for " + extras[i][1] + "\t" + old + "\twith" 1307 + labelData); 1308 } 1309 } 1310 if (false) { 1311 System.out.println((old != null ? "REPLACING" + "\t" + old : "ADDING") + 1312 " data for " + extras[i][1] + "\twith" + labelData); 1313 } subtagData.put(extras[i][1], labelData)1314 subtagData.put(extras[i][1], labelData); 1315 } 1316 // build compatibility map 1317 Map<String, Map<String, Map<String, String>>> result = new LinkedHashMap<>(); 1318 for (Entry<LstrType, Map<String, Map<LstrField, String>>> entry : result2.entrySet()) { 1319 Map<String, Map<String, String>> copy2 = new LinkedHashMap<>(); 1320 result.put(entry.getKey().toString(), copy2); 1321 for (Entry<String, Map<LstrField, String>> entry2 : entry.getValue().entrySet()) { 1322 Map<String, String> copy3 = new LinkedHashMap<>(); entry2.getKey()1323 copy2.put(entry2.getKey(), copy3); 1324 for (Entry<LstrField, String> entry3 : entry2.getValue().entrySet()) { entry3.getValue()1325 copy3.put(entry3.getKey().toString(), entry3.getValue()); 1326 } 1327 } 1328 } 1329 LSTREG = CldrUtility.protectCollection(result); 1330 LSTREG_ENUM = CldrUtility.protectCollection(result2); 1331 } 1332 1333 private static <K, K2, V> Map<K2, V> putSubtagData(K lastTag, Map<K, Map<K2, V>> subtagData, Map<K2, V> currentData) { 1334 Map<K2, V> oldData = subtagData.get(lastTag); 1335 if (oldData != null) { 1336 if (oldData.get("CLDR") != null) { 1337 System.out.println("overriding: " + lastTag + ", " + oldData); 1338 } else { 1339 throw new IllegalArgumentException("Duplicate tag: " + lastTag); 1340 } 1341 } 1342 return subtagData.put(lastTag, currentData); 1343 } 1344 1345 static Counter<LstrType> languageCount = new Counter<>(); 1346 1347 public static Counter<LstrType> getLanguageCount() { 1348 return languageCount; 1349 } 1350 1351 ZoneParser zoneParser = new ZoneParser(); 1352 1353 // static public final Set<String> MODERN_SCRIPTS = Collections 1354 // .unmodifiableSet(new TreeSet( 1355 // // "Bali " + 1356 // // "Bugi " + 1357 // // "Copt " + 1358 // // "Hano " + 1359 // // "Osma " + 1360 // // "Qaai " + 1361 // // "Sylo " + 1362 // // "Syrc " + 1363 // // "Tagb " + 1364 // // "Tglg " + 1365 // Arrays 1366 // .asList("Hans Hant Jpan Hrkt Kore Arab Armn Bali Beng Bopo Cans Cham Cher Cyrl Deva Ethi Geor Grek Gujr Guru Hani Hang Hebr Hira Knda Kana Kali Khmr Laoo Latn Lepc Limb Mlym Mong Mymr Talu Nkoo Olck Orya Saur Sinh Tale Taml Telu Thaa Thai Tibt Tfng Vaii Yiii" 1367 // .split("\\s+")))); 1368 1369 // updated to http://www.unicode.org/reports/tr31/tr31-9.html#Specific_Character_Adjustments 1370 1371 /** 1372 * @deprecated 1373 */ 1374 @Deprecated 1375 public Map<String, List<ZoneLine>> getZone_rules() { 1376 return zoneParser.getZone_rules(); 1377 } 1378 1379 /** 1380 * @deprecated 1381 */ 1382 @Deprecated 1383 public Map<String, List<String>> getZoneData() { 1384 return zoneParser.getZoneData(); 1385 } 1386 1387 /** 1388 * @deprecated 1389 */ 1390 @Deprecated 1391 public Set<String> getCanonicalTimeZones() { 1392 return zoneParser.getZoneData().keySet(); 1393 } 1394 1395 /** 1396 * @deprecated 1397 */ 1398 @Deprecated 1399 public Map<String, Set<String>> getCountryToZoneSet() { 1400 return zoneParser.getCountryToZoneSet(); 1401 } 1402 1403 /** 1404 * @deprecated 1405 */ 1406 @Deprecated 1407 public List<String> getDeprecatedZoneIDs() { 1408 return zoneParser.getDeprecatedZoneIDs(); 1409 } 1410 1411 /** 1412 * @deprecated 1413 */ 1414 @Deprecated 1415 public Comparator<String> getTZIDComparator() { 1416 return zoneParser.getTZIDComparator(); 1417 } 1418 1419 /** 1420 * @deprecated 1421 */ 1422 @Deprecated 1423 public Map<String, Set<String>> getZoneLinkNew_OldSet() { 1424 return zoneParser.getZoneLinkNew_OldSet(); 1425 } 1426 1427 /** 1428 * @deprecated 1429 */ 1430 @Deprecated 1431 public Map<String, String> getZoneLinkold_new() { 1432 return zoneParser.getZoneLinkold_new(); 1433 } 1434 1435 /** 1436 * @deprecated 1437 */ 1438 @Deprecated 1439 public Map getZoneRuleID_rules() { 1440 return zoneParser.getZoneRuleID_rules(); 1441 } 1442 1443 /** 1444 * @deprecated 1445 */ 1446 @Deprecated 1447 public Map<String, String> getZoneToCounty() { 1448 return zoneParser.getZoneToCounty(); 1449 } 1450 1451 /** 1452 * @deprecated 1453 */ 1454 @Deprecated 1455 public String getZoneVersion() { 1456 return zoneParser.getVersion(); 1457 } 1458 1459 public static String fixLanguageTag(String languageSubtag) { 1460 if (languageSubtag.equals("mo")) { // fix special cases 1461 return "ro"; 1462 } 1463 return languageSubtag; 1464 } 1465 1466 public boolean isModernLanguage(String languageCode) { 1467 if (getMoribundLanguages().contains(languageCode)) return false; 1468 Type type = Iso639Data.getType(languageCode); 1469 if (type == Type.Living) return true; 1470 if (languageCode.equals("eo")) return true; // exception for Esperanto 1471 // Scope scope = Iso639Data.getScope(languageCode); 1472 // if (scope == Scope.Collection) return false; 1473 return false; 1474 } 1475 1476 public static boolean isScriptModern(String script) { 1477 ScriptMetadata.Info info = ScriptMetadata.getInfo(script); 1478 if (info == null) { 1479 if (false) throw new IllegalArgumentException("No script metadata for: " + script); 1480 return false; 1481 } 1482 IdUsage idUsage = info.idUsage; 1483 return idUsage != IdUsage.EXCLUSION && idUsage != IdUsage.UNKNOWN; 1484 } 1485 1486 static final Pattern whitespace = PatternCache.get("\\s+"); 1487 static Set<String> filteredCurrencies = null; 1488 1489 public Set<String> getSurveyToolDisplayCodes(String type) { 1490 return getGoodAvailableCodes(type); 1491 } 1492 1493 static UnicodeSet COUNTRY = new UnicodeSet("[a-zA-Z]").freeze(); 1494 1495 /** 1496 * Quick check for whether valid country. Not complete: should use Validity 1497 * @param territory 1498 * @return 1499 */ 1500 public static boolean isCountry(String territory) { 1501 switch (territory) { 1502 case "ZZ": 1503 case "QO": 1504 case "EU": 1505 case "UN": 1506 case "EZ": 1507 return false; 1508 default: 1509 return territory.length() == 2 && COUNTRY.containsAll(territory); 1510 } 1511 } 1512 1513 public boolean isLstregPrivateUse(String type, String code) { 1514 Map<String, String> lStregData = getLStreg().get(type).get(code); 1515 return lStregData.get("Description").equalsIgnoreCase("private use"); 1516 } 1517 1518 public boolean isLstregDeprecated(String type, String code) { 1519 Map<String, String> lStregData = getLStreg().get(type).get(code); 1520 return lStregData.get("Deprecated") != null; 1521 } 1522 } 1523