1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2011, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Mark Davis 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.util; 10 11 import java.io.BufferedReader; 12 import java.io.IOException; 13 import java.util.ArrayList; 14 import java.util.Arrays; 15 import java.util.Collections; 16 import java.util.Comparator; 17 import java.util.EnumMap; 18 import java.util.EnumSet; 19 import java.util.HashMap; 20 import java.util.HashSet; 21 import java.util.Iterator; 22 import java.util.LinkedHashMap; 23 import java.util.LinkedHashSet; 24 import java.util.List; 25 import java.util.Locale; 26 import java.util.Map; 27 import java.util.Map.Entry; 28 import java.util.Set; 29 import java.util.TreeMap; 30 import java.util.TreeSet; 31 import java.util.regex.Pattern; 32 33 import org.unicode.cldr.draft.ScriptMetadata; 34 import org.unicode.cldr.draft.ScriptMetadata.IdUsage; 35 import org.unicode.cldr.util.Iso639Data.Type; 36 import org.unicode.cldr.util.ZoneParser.ZoneLine; 37 38 import com.ibm.icu.impl.Relation; 39 import com.ibm.icu.lang.UCharacter; 40 import com.ibm.icu.text.UnicodeSet; 41 import com.ibm.icu.util.ICUUncheckedIOException; 42 import com.ibm.icu.util.Output; 43 44 /** 45 * Provides access to various codes used by CLDR: RFC 3066, ISO 4217, Olson 46 * tzids 47 */ 48 public class StandardCodes { 49 50 public enum CodeType { 51 language, script, territory, extlang, legacy, redundant, variant, currency, tzid; from(String name)52 public static CodeType from(String name) { 53 if ("region".equals(name)) { 54 return territory; 55 } 56 return CodeType.valueOf(name); 57 } 58 } 59 60 private static final Set<CodeType> TypeSet = Collections.unmodifiableSet(EnumSet.allOf(CodeType.class)); 61 62 private static final Set<String> TypeStringSet; 63 static { 64 LinkedHashSet<String> foo = new LinkedHashSet<>(); 65 for (CodeType x : CodeType.values()) { x.toString()66 foo.add(x.toString()); 67 } 68 TypeStringSet = Collections.unmodifiableSet(foo); 69 } 70 71 public static final String DESCRIPTION_SEPARATOR = "\u25AA"; 72 73 public static final String NO_COUNTRY = "001"; 74 75 private static StandardCodes singleton; 76 77 private EnumMap<CodeType, Map<String, List<String>>> type_code_data = new EnumMap<>( 78 CodeType.class); 79 80 private EnumMap<CodeType, Map<String, List<String>>> type_name_codes = new EnumMap<>( 81 CodeType.class); 82 83 private EnumMap<CodeType, Map<String, String>> type_code_preferred = new EnumMap<>( 84 CodeType.class); 85 86 private Map<String, Set<String>> country_modernCurrency = new TreeMap<>(); 87 88 private Map<CodeType, Set<String>> goodCodes = new TreeMap<>(); 89 90 private static final boolean DEBUG = false; 91 92 /** 93 * Get the singleton copy of the standard codes. 94 */ make()95 static public synchronized StandardCodes make() { 96 if (singleton == null) 97 singleton = new StandardCodes(); 98 return singleton; 99 } 100 101 /** 102 * The data is the name in the case of RFC3066 codes, and the country code in 103 * the case of TZIDs and ISO currency codes. If the country code is missing, 104 * uses ZZ. 105 */ getData(String type, String code)106 public String getData(String type, String code) { 107 Map<String, List<String>> code_data = getCodeData(type); 108 if (code_data == null) 109 return null; 110 List<String> list = code_data.get(code); 111 if (list == null) 112 return null; 113 return list.get(0); 114 } 115 116 /** 117 * @return the full data for the type and code For the data in lstreg, it is 118 * description | date | canonical_value | recommended_prefix # 119 * comments 120 */ getFullData(String type, String code)121 public List<String> getFullData(String type, String code) { 122 Map<String, List<String>> code_data = getCodeData(type); 123 if (code_data == null) 124 return null; 125 return code_data.get(code); 126 } 127 128 /** 129 * @return the full data for the type and code For the data in lstreg, it is 130 * description | date | canonical_value | recommended_prefix # 131 * comments 132 */ getFullData(CodeType type, String code)133 public List<String> getFullData(CodeType type, String code) { 134 Map<String, List<String>> code_data = type_code_data.get(type); 135 if (code_data == null) 136 return null; 137 return code_data.get(code); 138 } 139 getCodeData(String type)140 private Map<String, List<String>> getCodeData(String type) { 141 return getCodeData(CodeType.from(type)); 142 } 143 getCodeData(CodeType type)144 private Map<String, List<String>> getCodeData(CodeType type) { 145 return type_code_data.get(type); 146 } 147 148 /** 149 * Get at the language registry values, as a Map from label to value. 150 * 151 * @param type 152 * @param code 153 * @return 154 */ getLangData(String type, String code)155 public Map<String, String> getLangData(String type, String code) { 156 try { 157 if (type.equals("territory")) 158 type = "region"; 159 else if (type.equals("variant")) code = code.toLowerCase(Locale.ENGLISH); 160 return (Map) ((Map) getLStreg().get(type)).get(code); 161 } catch (RuntimeException e) { 162 return null; 163 } 164 } 165 166 /** 167 * Return a replacement code, if available. If not, return null. 168 * 169 */ getReplacement(String type, String code)170 public String getReplacement(String type, String code) { 171 if (type.equals("currency")) 172 return null; // no replacement codes for currencies 173 List<String> data = getFullData(type, code); 174 if (data == null) 175 return null; 176 // if available, the replacement is a non-empty value other than --, in 177 // position 2. 178 if (data.size() < 3) 179 return null; 180 String replacement = data.get(2); 181 if (!replacement.equals("") && !replacement.equals("--")) 182 return replacement; 183 return null; 184 } 185 186 /** 187 * Return the list of codes that have the same data. For example, returns all 188 * currency codes for a country. If there is a preferred one, it is first. 189 * 190 * @param type 191 * @param data 192 * @return 193 */ 194 @Deprecated getCodes(String type, String data)195 public List<String> getCodes(String type, String data) { 196 return getCodes(CodeType.from(type), data); 197 } 198 199 /** 200 * Return the list of codes that have the same data. For example, returns all 201 * currency codes for a country. If there is a preferred one, it is first. 202 */ getCodes(CodeType type, String data)203 public List<String> getCodes(CodeType type, String data) { 204 Map<String, List<String>> data_codes = type_name_codes.get(type); 205 if (data_codes == null) 206 return null; 207 return Collections.unmodifiableList(data_codes.get(data)); 208 } 209 210 /** 211 * Where there is a preferred code, return it. 212 */ 213 @Deprecated getPreferred(String type, String code)214 public String getPreferred(String type, String code) { 215 return getPreferred(CodeType.from(type), code); 216 } 217 218 /** 219 * Where there is a preferred code, return it. 220 */ 221 getPreferred(CodeType type, String code)222 public String getPreferred(CodeType type, String code) { 223 Map<String, String> code_preferred = type_code_preferred.get(type); 224 if (code_preferred == null) 225 return code; 226 String newCode = code_preferred.get(code); 227 if (newCode == null) 228 return code; 229 return newCode; 230 } 231 232 /** 233 * Get all the available types 234 */ getAvailableTypes()235 public Set<String> getAvailableTypes() { 236 return TypeStringSet; 237 } 238 239 /** 240 * Get all the available types 241 */ getAvailableTypesEnum()242 public Set<CodeType> getAvailableTypesEnum() { 243 return TypeSet; 244 } 245 246 /** 247 * Get all the available codes for a given type 248 * 249 * @param type 250 * @return 251 */ getAvailableCodes(String type)252 public Set<String> getAvailableCodes(String type) { 253 return getAvailableCodes(CodeType.from(type)); 254 } 255 256 /** 257 * Get all the available codes for a given type 258 * 259 * @param type 260 * @return 261 */ getAvailableCodes(CodeType type)262 public Set<String> getAvailableCodes(CodeType type) { 263 Map<String, List<String>> code_name = type_code_data.get(type); 264 return Collections.unmodifiableSet(code_name.keySet()); 265 } 266 getGoodAvailableCodes(String stringType)267 public Set<String> getGoodAvailableCodes(String stringType) { 268 return getGoodAvailableCodes(CodeType.from(stringType)); 269 } 270 271 /** 272 * Get all the available "real" codes for a given type, excluding private use, 273 * but including some deprecated codes. Use SupplementalDataInfo getLocaleAliases to 274 * exclude others. 275 * 276 * @param type 277 * @return 278 */ getGoodAvailableCodes(CodeType type)279 public Set<String> getGoodAvailableCodes(CodeType type) { 280 Set<String> result = goodCodes.get(type); 281 if (result == null) { 282 synchronized (goodCodes) { 283 Map<String, List<String>> code_name = getCodeData(type); 284 SupplementalDataInfo sd = SupplementalDataInfo.getInstance(); 285 if (code_name == null) 286 return null; 287 result = new TreeSet<>(code_name.keySet()); 288 switch (type) { 289 case currency: 290 break; // nothing special 291 case language: 292 return sd.getCLDRLanguageCodes(); 293 case script: 294 return sd.getCLDRScriptCodes(); 295 case tzid: 296 break; // nothing special 297 default: 298 for (Iterator<String> it = result.iterator(); it.hasNext();) { 299 String code = it.next(); 300 if (code.equals("root") || code.equals("QO")) 301 continue; 302 List<String> data = getFullData(type, code); 303 if (data.size() < 3) { 304 if (DEBUG) 305 System.out.println(code + "\t" + data); 306 } 307 if ("PRIVATE USE".equalsIgnoreCase(data.get(0)) 308 || (!data.get(2).equals("") && !data.get(2).equals("--"))) { 309 // System.out.println("Removing: " + code); 310 it.remove(); 311 } 312 } 313 } 314 result = Collections.unmodifiableSet(result); 315 goodCodes.put(type, result); 316 } 317 } 318 return result; 319 } 320 321 private static Set<String> GOOD_COUNTRIES; 322 getGoodCountries()323 public Set<String> getGoodCountries() { 324 synchronized (goodCodes) { 325 if (GOOD_COUNTRIES == null) { 326 Set<String> temp = new LinkedHashSet<>(); 327 for (String s : getGoodAvailableCodes(CodeType.territory)) { 328 if (isCountry(s)) { 329 temp.add(s); 330 } 331 } 332 GOOD_COUNTRIES = Collections.unmodifiableSet(temp); 333 } 334 } 335 return GOOD_COUNTRIES; 336 } 337 338 /** 339 * Gets the modern currency. 340 */ getMainCurrencies(String countryCode)341 public Set<String> getMainCurrencies(String countryCode) { 342 return country_modernCurrency.get(countryCode); 343 } 344 345 private Map<Organization, Map<String, Level>> platform_locale_level = null; 346 private Map<Organization, Relation<Level, String>> platform_level_locale = null; 347 private Map<String, Map<String, String>> platform_locale_levelString = null; 348 349 // /** 350 // * Get rid of this 351 // * 352 // * @param type 353 // * @return 354 // * @throws IOException 355 // * @deprecated 356 // */ 357 // public String getEffectiveLocaleType(String type) throws IOException { 358 // if ((type != null) && (getLocaleCoverageOrganizations().contains(Organization.valueOf(type)))) { 359 // return type; 360 // } else { 361 // return null; // the default.. for now.. 362 // } 363 // } 364 365 static Comparator caseless = new Comparator() { 366 367 @Override 368 public int compare(Object arg0, Object arg1) { 369 String s1 = (String) arg0; 370 String s2 = (String) arg1; 371 return s1.compareToIgnoreCase(s2); 372 } 373 374 }; 375 376 /** 377 * Returns locales according to status. It returns a Map of Maps, key 1 is 378 * either IBM or Java (perhaps more later), key 2 is the Level. 379 * 380 * @deprecated 381 */ 382 @Deprecated getLocaleTypes()383 public Map<Organization, Map<String, Level>> getLocaleTypes() { 384 synchronized (StandardCodes.class) { 385 if (platform_locale_level == null) { 386 loadPlatformLocaleStatus(); 387 } 388 } 389 return platform_locale_level; 390 } 391 392 /** 393 * Return map of locales to levels 394 * @param org 395 * @return 396 */ getLocaleToLevel(Organization org)397 public Map<String, Level> getLocaleToLevel(Organization org) { 398 return getLocaleTypes().get(org); 399 } 400 getLocaleCoverageLevel(String organization, String desiredLocale)401 public Level getLocaleCoverageLevel(String organization, String desiredLocale) { 402 return getLocaleCoverageLevel(Organization.fromString(organization), desiredLocale); 403 } 404 getLocaleCoverageLevel(Organization organization, String desiredLocale)405 public Level getLocaleCoverageLevel(Organization organization, String desiredLocale) { 406 return getLocaleCoverageLevel(organization, desiredLocale, new Output<LocaleCoverageType>()); 407 } 408 409 public enum LocaleCoverageType { 410 explicit, parent, star, undetermined 411 } 412 413 /** 414 * Returns coverage level of locale according to organization. Returns Level.UNDETERMINED if information is missing. 415 * A locale of "*" in the data means "everything else". 416 */ getLocaleCoverageLevel(Organization organization, String desiredLocale, Output<LocaleCoverageType> coverageType)417 public Level getLocaleCoverageLevel(Organization organization, String desiredLocale, Output<LocaleCoverageType> coverageType) { 418 synchronized (StandardCodes.class) { 419 if (platform_locale_level == null) { 420 loadPlatformLocaleStatus(); 421 } 422 } 423 coverageType.value = LocaleCoverageType.undetermined; 424 if (organization == null) { 425 return Level.UNDETERMINED; 426 } 427 Map<String, Level> locale_status = platform_locale_level.get(organization); 428 if (locale_status == null) { 429 return Level.UNDETERMINED; 430 } 431 // see if there is a parent 432 String originalLocale = desiredLocale; 433 while (desiredLocale != null) { 434 Level status = locale_status.get(desiredLocale); 435 if (status != null && status != Level.UNDETERMINED) { 436 coverageType.value = originalLocale == desiredLocale ? LocaleCoverageType.explicit : LocaleCoverageType.parent; 437 return status; 438 } 439 desiredLocale = LocaleIDParser.getParent(desiredLocale); 440 } 441 Level status = locale_status.get("*"); 442 if (status != null && status != Level.UNDETERMINED) { 443 coverageType.value = LocaleCoverageType.star; 444 return status; 445 } 446 return Level.UNDETERMINED; 447 } 448 449 /** 450 * Returns coverage level of locale according to organization. Returns Level.UNDETERMINED if information is missing. 451 */ getDefaultLocaleCoverageLevel(Organization organization)452 public Level getDefaultLocaleCoverageLevel(Organization organization) { 453 return getLocaleCoverageLevel(organization, "*"); 454 } 455 getLocaleCoverageOrganizations()456 public Set<Organization> getLocaleCoverageOrganizations() { 457 synchronized (StandardCodes.class) { 458 if (platform_locale_level == null) { 459 loadPlatformLocaleStatus(); 460 } 461 } 462 return platform_locale_level.keySet(); 463 } 464 getLocaleCoverageOrganizationStrings()465 public Set<String> getLocaleCoverageOrganizationStrings() { 466 synchronized (StandardCodes.class) { 467 if (platform_locale_level == null) { 468 loadPlatformLocaleStatus(); 469 } 470 } 471 return platform_locale_levelString.keySet(); 472 } 473 getLocaleCoverageLocales(String organization)474 public Set<String> getLocaleCoverageLocales(String organization) { 475 return getLocaleCoverageLocales(Organization.fromString(organization)); 476 } 477 getLocaleCoverageLocales(Organization organization)478 public Set<String> getLocaleCoverageLocales(Organization organization) { 479 synchronized (StandardCodes.class) { 480 if (platform_locale_level == null) { 481 loadPlatformLocaleStatus(); 482 } 483 } 484 return platform_locale_level.get(organization).keySet(); 485 } 486 getLevelsToLocalesFor(Organization organization)487 public Relation<Level, String> getLevelsToLocalesFor(Organization organization) { 488 synchronized (StandardCodes.class) { 489 if (platform_level_locale == null) { 490 loadPlatformLocaleStatus(); 491 } 492 } 493 return platform_level_locale.get(organization); 494 } 495 getLocaleCoverageLocales(Organization organization, Set<Level> choice)496 public Set<String> getLocaleCoverageLocales(Organization organization, Set<Level> choice) { 497 Set<String> result = new LinkedHashSet<>(); 498 for (String locale : getLocaleCoverageLocales(organization)) { 499 if (choice.contains(getLocaleCoverageLevel(organization, locale))) { 500 result.add(locale); 501 } 502 } 503 return result; 504 } 505 loadPlatformLocaleStatus()506 private void loadPlatformLocaleStatus() { 507 LocaleIDParser parser = new LocaleIDParser(); 508 platform_locale_level = new EnumMap<>(Organization.class); 509 SupplementalDataInfo sd = SupplementalDataInfo.getInstance(); 510 Set<String> defaultContentLocales = sd.getDefaultContentLocales(); 511 String line; 512 try { 513 BufferedReader lstreg = CldrUtility.getUTF8Data("Locales.txt"); 514 while (true) { 515 line = lstreg.readLine(); 516 if (line == null) 517 break; 518 int commentPos = line.indexOf('#'); 519 if (commentPos >= 0) { 520 line = line.substring(0, commentPos); 521 } 522 line = line.trim(); 523 if (line.length() == 0) 524 continue; 525 List<String> stuff = CldrUtility.splitList(line, ';', true); 526 Organization organization; 527 528 // verify that the organization is valid 529 try { 530 organization = Organization.fromString(stuff.get(0)); 531 } catch (Exception e) { 532 throw new IllegalArgumentException("Invalid organization in Locales.txt: " + line); 533 } 534 535 // verify that the locale is valid BCP47 536 String locale = stuff.get(1); 537 if (!locale.equals("*")) { 538 parser.set(locale); 539 String valid = validate(parser); 540 if (valid.length() != 0) { 541 throw new IllegalArgumentException("Invalid locale in Locales.txt: " + line); 542 } 543 locale = parser.toString(); // normalize 544 545 // verify that the locale is not a default content locale 546 if (defaultContentLocales.contains(locale)) { 547 throw new IllegalArgumentException("Cannot have default content locale in Locales.txt: " + line); 548 } 549 } 550 551 Level status = Level.get(stuff.get(2)); 552 if (status == Level.UNDETERMINED) { 553 System.out.println("Warning: Level unknown on: " + line); 554 } 555 Map<String, Level> locale_status = platform_locale_level.get(organization); 556 if (locale_status == null) { 557 platform_locale_level.put(organization, locale_status = new TreeMap<>()); 558 } 559 locale_status.put(locale, status); 560 if (!locale.equals("*")) { 561 String scriptLoc = parser.getLanguageScript(); 562 if (locale_status.get(scriptLoc) == null) 563 locale_status.put(scriptLoc, status); 564 String lang = parser.getLanguage(); 565 if (locale_status.get(lang) == null) 566 locale_status.put(lang, status); 567 } 568 } 569 } catch (IOException e) { 570 throw new ICUUncheckedIOException("Internal Error", e); 571 } 572 573 // now reset the parent to be the max of the children 574 for (Organization platform : platform_locale_level.keySet()) { 575 Map<String, Level> locale_level = platform_locale_level.get(platform); 576 for (String locale : locale_level.keySet()) { 577 parser.set(locale); 578 Level childLevel = locale_level.get(locale); 579 580 String language = parser.getLanguage(); 581 if (!language.equals(locale)) { 582 Level languageLevel = locale_level.get(language); 583 if (languageLevel == null || languageLevel.compareTo(childLevel) < 0) { 584 locale_level.put(language, childLevel); 585 } 586 } 587 String oldLanguage = language; 588 language = parser.getLanguageScript(); 589 if (!language.equals(oldLanguage)) { 590 Level languageLevel = locale_level.get(language); 591 if (languageLevel == null || languageLevel.compareTo(childLevel) < 0) { 592 locale_level.put(language, childLevel); 593 } 594 } 595 } 596 } 597 // backwards compat hack 598 platform_locale_levelString = new TreeMap<>(); 599 platform_level_locale = new EnumMap<>(Organization.class); 600 for (Organization platform : platform_locale_level.keySet()) { 601 Map<String, String> locale_levelString = new TreeMap<>(); 602 platform_locale_levelString.put(platform.toString(), locale_levelString); 603 Map<String, Level> locale_level = platform_locale_level.get(platform); 604 for (String locale : locale_level.keySet()) { 605 locale_levelString.put(locale, locale_level.get(locale).toString()); 606 } 607 Relation level_locale = Relation.of(new EnumMap(Level.class), HashSet.class); 608 level_locale.addAllInverted(locale_level).freeze(); 609 platform_level_locale.put(platform, level_locale); 610 } 611 CldrUtility.protectCollection(platform_level_locale); 612 platform_locale_level = CldrUtility.protectCollection(platform_locale_level); 613 platform_locale_levelString = CldrUtility.protectCollection(platform_locale_levelString); 614 } 615 validate(LocaleIDParser parser)616 private String validate(LocaleIDParser parser) { 617 String message = ""; 618 String lang = parser.getLanguage(); 619 if (lang.length() == 0) { 620 message += ", Missing language"; 621 } else if (!getAvailableCodes("language").contains(lang)) { 622 message += ", Invalid language code: " + lang; 623 } 624 String script = parser.getScript(); 625 if (script.length() != 0 && !getAvailableCodes("script").contains(script)) { 626 message += ", Invalid script code: " + script; 627 } 628 String territory = parser.getRegion(); 629 if (territory.length() != 0 && !getAvailableCodes("territory").contains(territory)) { 630 message += ", Invalid territory code: " + lang; 631 } 632 return message.length() == 0 ? message : message.substring(2); 633 } 634 635 /** 636 * Ascertain that the given locale in in the given group specified by the 637 * organization 638 * 639 * @param locale 640 * @param group 641 * @param org 642 * @return boolean 643 */ isLocaleInGroup(String locale, String group, Organization org)644 public boolean isLocaleInGroup(String locale, String group, Organization org) { 645 return group.equals(getGroup(locale, org)); 646 } 647 isLocaleInGroup(String locale, String group, String org)648 public boolean isLocaleInGroup(String locale, String group, String org) { 649 return isLocaleInGroup(locale, group, Organization.fromString(org)); 650 } 651 getGroup(String locale, String org)652 public String getGroup(String locale, String org) { 653 return getGroup(locale, Organization.fromString(org)); 654 } 655 656 /** 657 * Gets the coverage group given a locale and org 658 * 659 * @param locale 660 * @param org 661 * @return group if availble, null if not 662 */ getGroup(String locale, Organization org)663 public String getGroup(String locale, Organization org) { 664 Level l = getLocaleCoverageLevel(org, locale); 665 if (l.equals(Level.UNDETERMINED)) { 666 return null; 667 } else { 668 return l.toString(); 669 } 670 } 671 672 // ========== PRIVATES ========== 673 StandardCodes()674 private StandardCodes() { 675 String[] files = { "ISO4217.txt" }; // , "TZID.txt" 676 type_code_preferred.put(CodeType.tzid, new TreeMap<String, String>()); 677 add(CodeType.language, "root", "Root"); 678 String originalLine = null; 679 for (int fileIndex = 0; fileIndex < files.length; ++fileIndex) { 680 try { 681 BufferedReader lstreg = CldrUtility.getUTF8Data(files[fileIndex]); 682 while (true) { 683 String line = originalLine = lstreg.readLine(); 684 if (line == null) 685 break; 686 if (line.startsWith("\uFEFF")) { 687 line = line.substring(1); 688 } 689 line = line.trim(); 690 int commentPos = line.indexOf('#'); 691 String comment = ""; 692 if (commentPos >= 0) { 693 comment = line.substring(commentPos + 1).trim(); 694 line = line.substring(0, commentPos); 695 } 696 if (line.length() == 0) 697 continue; 698 List<String> pieces = CldrUtility.splitList(line, '|', true, 699 new ArrayList<String>()); 700 CodeType type = CodeType.from(pieces.get(0)); 701 pieces.remove(0); 702 703 String code = pieces.get(0); 704 pieces.remove(0); 705 if (type.equals("date")) { 706 continue; 707 } 708 709 String oldName = pieces.get(0); 710 int pos = oldName.indexOf(';'); 711 if (pos >= 0) { 712 oldName = oldName.substring(0, pos).trim(); 713 pieces.set(0, oldName); 714 } 715 716 List<String> data = pieces; 717 if (comment.indexOf("deprecated") >= 0) { 718 // System.out.println(originalLine); 719 if (data.get(2).toString().length() == 0) { 720 data.set(2, "--"); 721 } 722 } 723 if (oldName.equalsIgnoreCase("PRIVATE USE")) { 724 int separatorPos = code.indexOf(".."); 725 if (separatorPos < 0) { 726 add(type, code, data); 727 } else { 728 String current = code.substring(0, separatorPos); 729 String end = code.substring(separatorPos + 2); 730 // System.out.println(">>" + code + "\t" + current + "\t" + end); 731 for (; current.compareTo(end) <= 0; current = nextAlpha(current)) { 732 // System.out.println(">" + current); 733 add(type, current, data); 734 } 735 } 736 continue; 737 } 738 if (!type.equals("tzid")) { 739 add(type, code, data); 740 if (type.equals("currency")) { 741 // currency | TPE | Timor Escudo | TP | EAST TIMOR | O 742 if (data.get(3).equals("C")) { 743 String country = data.get(1); 744 Set<String> codes = country_modernCurrency.get(country); 745 if (codes == null) { 746 country_modernCurrency.put(country, codes = new TreeSet<>()); 747 } 748 codes.add(code); 749 } 750 } 751 continue; 752 } 753 // type = tzid 754 // List codes = (List) Utility.splitList(code, ',', true, new 755 // ArrayList()); 756 String preferred = null; 757 for (int i = 0; i < pieces.size(); ++i) { 758 code = pieces.get(i); 759 add(type, code, data); 760 if (preferred == null) 761 preferred = code; 762 else { 763 Map<String, String> code_preferred = type_code_preferred.get(type); 764 code_preferred.put(code, preferred); 765 } 766 } 767 } 768 lstreg.close(); 769 } catch (Exception e) { 770 System.err.println("WARNING: " + files[fileIndex] 771 + " may be a corrupted UTF-8 file. Please check."); 772 throw (IllegalArgumentException) new IllegalArgumentException( 773 "Can't read " + files[fileIndex] + "\t" + originalLine) 774 .initCause(e); 775 } 776 country_modernCurrency = CldrUtility.protectCollection(country_modernCurrency); 777 } 778 779 // data is: description | date | canonical_value | recommended_prefix # 780 // comments 781 // HACK, just rework 782 783 Map<String, Map<String, Map<String, String>>> languageRegistry = getLStreg(); 784 // languageRegistry = CldrUtility.protectCollection(languageRegistry); 785 786 for (String type : languageRegistry.keySet()) { 787 CodeType type2 = CodeType.from(type); 788 Map<String, Map<String, String>> m = languageRegistry.get(type); 789 for (String code : m.keySet()) { 790 Map<String, String> mm = m.get(code); 791 List<String> data = new ArrayList<>(0); 792 data.add(mm.get("Description")); 793 data.add(mm.get("Added")); 794 String pref = mm.get("Preferred-Value"); 795 if (pref == null) { 796 pref = mm.get("Deprecated"); 797 if (pref == null) 798 pref = ""; 799 else 800 pref = "deprecated"; 801 } 802 data.add(pref); 803 if (type.equals("variant")) { 804 code = code.toUpperCase(); 805 } 806 // data.add(mm.get("Recommended_Prefix")); 807 // {"region", "BQ", "Description", "British Antarctic Territory", 808 // "Preferred-Value", "AQ", "CLDR", "True", "Deprecated", "True"}, 809 add(type2, code, data); 810 } 811 } 812 813 Map<String, List<String>> m = getZoneData(); 814 for (Iterator<String> it = m.keySet().iterator(); it.hasNext();) { 815 String code = it.next(); 816 add(CodeType.tzid, code, m.get(code).toString()); 817 } 818 } 819 820 /** 821 * @param current 822 * @return 823 */ nextAlpha(String current)824 private static String nextAlpha(String current) { 825 // Don't care that this is inefficient 826 int value = 0; 827 for (int i = 0; i < current.length(); ++i) { 828 char c = current.charAt(i); 829 c -= c < 'a' ? 'A' : 'a'; 830 value = value * 26 + c; 831 } 832 value += 1; 833 String result = ""; 834 for (int i = 0; i < current.length(); ++i) { 835 result = (char) ((value % 26) + 'A') + result; 836 value = value / 26; 837 } 838 if (UCharacter.toLowerCase(current).equals(current)) { 839 result = UCharacter.toLowerCase(result); 840 } else if (UCharacter.toUpperCase(current).equals(current)) { 841 // do nothing 842 } else { 843 result = UCharacter.toTitleCase(result, null); 844 } 845 return result; 846 } 847 848 /** 849 * @param string 850 * @param string2 851 * @param string3 852 */ 853 private void add(CodeType type, String string2, String string3) { 854 List<String> l = new ArrayList<>(); 855 l.add(string3); 856 add(type, string2, l); 857 } 858 859 private void add(CodeType type, String code, List<String> otherData) { 860 // hack 861 if (type == CodeType.script) { 862 if (code.equals("Qaai")) { 863 otherData = new ArrayList<>(otherData); 864 otherData.set(0, "Inherited"); 865 } else if (code.equals("Zyyy")) { 866 otherData = new ArrayList<>(otherData); 867 otherData.set(0, "Common"); 868 } 869 } 870 871 // assume name is the first item 872 873 String name = otherData.get(0); 874 875 // add to main list 876 Map<String, List<String>> code_data = getCodeData(type); 877 if (code_data == null) { 878 code_data = new TreeMap<>(); 879 type_code_data.put(type, code_data); 880 } 881 List<String> lastData = code_data.get(code); 882 if (lastData != null) { 883 lastData.addAll(otherData); 884 } else { 885 code_data.put(code, otherData); 886 } 887 888 // now add mapping from name to codes 889 Map<String, List<String>> name_codes = type_name_codes.get(type); 890 if (name_codes == null) { 891 name_codes = new TreeMap<>(); 892 type_name_codes.put(type, name_codes); 893 } 894 List<String> codes = name_codes.get(name); 895 if (codes == null) { 896 codes = new ArrayList<>(); 897 name_codes.put(name, codes); 898 } 899 codes.add(code); 900 } 901 902 private List<String> DELETED3166 = Collections.unmodifiableList(Arrays 903 .asList(new String[] { "BQ", "BU", "CT", "DD", "DY", "FQ", "FX", "HV", 904 "JT", "MI", "NH", "NQ", "NT", "PC", "PU", "PZ", "RH", "SU", "TP", 905 "VD", "WK", "YD", "YU", "ZR" })); 906 907 public List<String> getOld3166() { 908 return DELETED3166; 909 } 910 911 private Map<String, List<String>> WorldBankInfo; 912 913 public Map<String, List<String>> getWorldBankInfo() { 914 if (WorldBankInfo == null) { 915 List<String> temp = fillFromCommaFile("WorldBankInfo.txt", false); 916 WorldBankInfo = new HashMap<>(); 917 for (String line : temp) { 918 List<String> row = CldrUtility.splitList(line, ';', true); 919 String key = row.get(0); 920 row.remove(0); 921 WorldBankInfo.put(key, row); 922 } 923 WorldBankInfo = CldrUtility.protectCollection(WorldBankInfo); 924 } 925 return WorldBankInfo; 926 } 927 928 Set<String> moribundLanguages; 929 930 public Set<String> getMoribundLanguages() { 931 if (moribundLanguages == null) { 932 List<String> temp = fillFromCommaFile("moribund_languages.txt", true); 933 moribundLanguages = new TreeSet<>(); 934 moribundLanguages.addAll(temp); 935 moribundLanguages = CldrUtility.protectCollection(moribundLanguages); 936 } 937 return moribundLanguages; 938 } 939 940 // produces a list of the 'clean' lines 941 private List<String> fillFromCommaFile(String filename, boolean trim) { 942 try { 943 List<String> result = new ArrayList<>(); 944 String line; 945 BufferedReader lstreg = CldrUtility.getUTF8Data(filename); 946 while (true) { 947 line = lstreg.readLine(); 948 if (line == null) 949 break; 950 int commentPos = line.indexOf('#'); 951 if (commentPos >= 0) { 952 line = line.substring(0, commentPos); 953 } 954 if (trim) { 955 line = line.trim(); 956 } 957 if (line.length() == 0) 958 continue; 959 result.add(line); 960 } 961 return result; 962 } catch (Exception e) { 963 throw (RuntimeException) new IllegalArgumentException( 964 "Can't process file: data/" + filename).initCause(e); 965 } 966 } 967 968 // return a complex map. language -> arn -> {"Comments" -> "x", 969 // "Description->y,...} 970 static String[][] extras = { 971 { "language", "root", "Description", "Root", "CLDR", "True" }, 972 // { "language", "cch", "Description", "Atsam", "CLDR", "True" }, 973 // { "language", "kaj", "Description", "Jju", "CLDR", "True" }, 974 // { "language", "kcg", "Description", "Tyap", "CLDR", "True" }, 975 // { "language", "kfo", "Description", "Koro", "CLDR", "True" }, 976 // { "language", "mfe", "Description", "Morisyen", "CLDR", "True" }, 977 // { "region", "172", "Description", "Commonwealth of Independent States", "CLDR", "True" }, 978 // { "region", "062", "Description", "South-Central Asia", "CLDR", "True" }, 979 // { "region", "003", "Description", "North America", "CLDR", "True" }, 980 // { "variant", "POLYTONI", "Description", "Polytonic Greek", "CLDR", "True", "Preferred-Value", "POLYTON" }, 981 { "variant", "REVISED", "Description", "Revised Orthography", "CLDR", "True" }, 982 { "variant", "SAAHO", "Description", "Dialect", "CLDR", "True" }, 983 { "variant", "POSIX", "Description", "Computer-Style", "CLDR", "True" }, 984 // {"region", "172", "Description", "Commonwealth of Independent States", 985 // "CLDR", "True"}, 986 // { "region", "", "Description", "European Union", "CLDR", "True" }, 987 { "region", "ZZ", "Description", "Unknown or Invalid Region", "CLDR", "True" }, 988 { "region", "QO", "Description", "Outlying Oceania", "CLDR", "True" }, 989 { "region", "XK", "Description", "Kosovo", "CLDR", "True" }, 990 { "script", "Qaai", "Description", "Inherited", "CLDR", "True" }, 991 // {"region", "003", "Description", "North America", "CLDR", "True"}, 992 // {"region", "062", "Description", "South-central Asia", "CLDR", "True"}, 993 // {"region", "200", "Description", "Czechoslovakia", "CLDR", "True"}, 994 // {"region", "830", "Description", "Channel Islands", "CLDR", "True"}, 995 // {"region", "833", "Description", "Isle of Man", "CLDR", "True"}, 996 997 // {"region", "NT", "Description", "Neutral Zone (formerly between Saudi 998 // Arabia & Iraq)", "CLDR", "True", "Deprecated", "True"}, 999 // {"region", "SU", "Description", "Union of Soviet Socialist Republics", 1000 // "CLDR", "True", "Deprecated", "True"}, 1001 // {"region", "BQ", "Description", "British Antarctic Territory", 1002 // "Preferred-Value", "AQ", "CLDR", "True", "Deprecated", "True"}, 1003 // {"region", "CT", "Description", "Canton and Enderbury Islands", 1004 // "Preferred-Value", "KI", "CLDR", "True", "Deprecated", "True"}, 1005 // {"region", "FQ", "Description", "French Southern and Antarctic Territories 1006 // (now split between AQ and TF)", "CLDR", "True", "Deprecated", "True"}, 1007 // {"region", "JT", "Description", "Johnston Island", "Preferred-Value", "UM", 1008 // "CLDR", "True", "Deprecated", "True"}, 1009 // {"region", "MI", "Description", "Midway Islands", "Preferred-Value", "UM", 1010 // "CLDR", "True", "Deprecated", "True"}, 1011 // {"region", "NQ", "Description", "Dronning Maud Land", "Preferred-Value", 1012 // "AQ", "CLDR", "True", "Deprecated", "True"}, 1013 // {"region", "PC", "Description", "Pacific Islands Trust Territory (divided 1014 // into FM, MH, MP, and PW)", "Preferred-Value", "AQ", "CLDR", "True", 1015 // "Deprecated", "True"}, 1016 // {"region", "PU", "Description", "U.S. Miscellaneous Pacific Islands", 1017 // "Preferred-Value", "UM", "CLDR", "True", "Deprecated", "True"}, 1018 // {"region", "PZ", "Description", "Panama Canal Zone", "Preferred-Value", 1019 // "PA", "CLDR", "True", "Deprecated", "True"}, 1020 // {"region", "VD", "Description", "North Vietnam", "Preferred-Value", "VN", 1021 // "CLDR", "True", "Deprecated", "True"}, 1022 // {"region", "WK", "Description", "Wake Island", "Preferred-Value", "UM", 1023 // "CLDR", "True", "Deprecated", "True"}, 1024 }; 1025 1026 static final String registryName = CldrUtility.getProperty("registry", "language-subtag-registry"); 1027 1028 public enum LstrType { 1029 language("und", "zxx", "mul", "mis", "root"), 1030 script("Zzzz", "Zsym", "Zxxx", "Zmth"), 1031 region("ZZ"), 1032 variant(), 1033 extlang(true, false), 1034 legacy(true, false), 1035 redundant(true, false), 1036 /** specialized codes for validity; TODO: rename LstrType **/ 1037 currency(false, true, "XXX"), 1038 subdivision(false, true), 1039 unit(false, true), 1040 usage(false, true), 1041 zone(false, true); 1042 1043 public final Set<String> specials; 1044 public final String unknown; 1045 public final boolean isLstr; 1046 public final boolean isUnicode; 1047 1048 private LstrType(String... unknownValue) { 1049 this(true, true, unknownValue); 1050 } 1051 1052 private LstrType(boolean lstr, boolean unicode, String... unknownValue) { 1053 unknown = unknownValue.length == 0 ? null : unknownValue[0]; 1054 LinkedHashSet<String> set = new LinkedHashSet<>(Arrays.asList(unknownValue)); 1055 if (unknown != null) { 1056 set.remove(unknown); 1057 } 1058 specials = Collections.unmodifiableSet(set); 1059 isLstr = lstr; 1060 isUnicode = unicode; 1061 } 1062 1063 // 1064 static final Pattern WELLFORMED = Pattern.compile("([0-9]{3}|[a-zA-Z]{2})[a-zA-Z0-9]{1,4}"); 1065 1066 boolean isWellFormed(String candidate) { 1067 switch (this) { 1068 case subdivision: 1069 return WELLFORMED.matcher(candidate).matches(); 1070 default: 1071 throw new UnsupportedOperationException(); 1072 } 1073 } 1074 1075 /** 1076 * Generate compatibility string, returning 'territory' instead of 'region', etc. 1077 */ 1078 public String toCompatString() { 1079 switch (this) { 1080 case region: return "territory"; 1081 case legacy: return "language"; 1082 case redundant: return "language"; 1083 default: return toString(); 1084 } 1085 } 1086 1087 /** 1088 * Create LstrType from string, allowing the compat string 'territory'. 1089 */ 1090 public static LstrType fromString(String rawType) { 1091 try { 1092 return valueOf(rawType); 1093 } catch (IllegalArgumentException e) { 1094 if ("territory".equals(rawType)) { 1095 return region; 1096 } 1097 throw e; 1098 } 1099 } 1100 } 1101 1102 public enum LstrField { 1103 Type, Subtag, Description, Added, Scope, Tag, Suppress_Script, Macrolanguage, Deprecated, Preferred_Value, Comments, Prefix, CLDR; 1104 public static LstrField from(String s) { 1105 return LstrField.valueOf(s.trim().replace("-", "_")); 1106 } 1107 } 1108 1109 static Map<String, Map<String, Map<String, String>>> LSTREG; 1110 static Map<LstrType, Map<String, Map<LstrField, String>>> LSTREG_ENUM; 1111 static Map<LstrType, Map<String, Map<LstrField, String>>> LSTREG_RAW; 1112 1113 /** 1114 * Returns a map like {extlang={aao={Added=2009-07-29, Description=Algerian Saharan Arabic, ...<br> 1115 * That is, type => subtype => map<tag,value>. Descriptions are concatenated together, separated by 1116 * DESCRIPTION_SEPARATOR. 1117 * 1118 * @return 1119 */ 1120 public static Map<String, Map<String, Map<String, String>>> getLStreg() { 1121 if (LSTREG == null) { 1122 initLstr(); 1123 } 1124 return LSTREG; 1125 } 1126 1127 /** 1128 * Returns a map like {extlang={aao={Added=2009-07-29, Description=Algerian Saharan Arabic, ...<br> 1129 * That is, type => subtype => map<tag,value>. Descriptions are concatenated together, separated by 1130 * DESCRIPTION_SEPARATOR. 1131 * 1132 * @return 1133 */ 1134 public static Map<LstrType, Map<String, Map<LstrField, String>>> getEnumLstreg() { 1135 if (LSTREG_ENUM == null) { 1136 initLstr(); 1137 } 1138 return LSTREG_ENUM; 1139 } 1140 1141 public static Map<LstrType, Map<String, Map<LstrField, String>>> getLstregEnumRaw() { 1142 if (LSTREG_ENUM == null) { 1143 initLstr(); 1144 } 1145 return LSTREG_RAW; 1146 } 1147 1148 private static void initLstr() { 1149 Map<LstrType, Map<String, Map<LstrField, String>>> result2 = new TreeMap<>(); 1150 1151 int lineNumber = 1; 1152 1153 Set<String> funnyTags = new TreeSet<>(); 1154 String line; 1155 try { 1156 BufferedReader lstreg = CldrUtility.getUTF8Data(registryName); 1157 LstrType lastType = null; 1158 String lastTag = null; 1159 Map<String, Map<LstrField, String>> subtagData = null; 1160 Map<LstrField, String> currentData = null; 1161 LstrField lastLabel = null; 1162 String lastRest = null; 1163 boolean inRealContent = false; 1164 // Map<String, String> translitCache = new HashMap<String, String>(); 1165 for (;; ++lineNumber) { 1166 line = lstreg.readLine(); 1167 if (line == null) 1168 break; 1169 if (line.length() == 0) 1170 continue; // skip blanks 1171 if (line.startsWith("File-Date: ")) { 1172 if (DEBUG) System.out.println("Language Subtag Registry: " + line); 1173 inRealContent = true; 1174 continue; 1175 } 1176 if (!inRealContent) { 1177 // skip until we get to real content 1178 continue; 1179 } 1180 // skip cruft 1181 if (line.startsWith("Internet-Draft")) { 1182 continue; 1183 } 1184 if (line.startsWith("Ewell")) { 1185 continue; 1186 } 1187 if (line.startsWith("\f")) { 1188 continue; 1189 } 1190 if (line.startsWith("4. Security Considerations")) { 1191 break; 1192 } 1193 1194 if (line.startsWith("%%")) 1195 continue; // skip separators (ok, since data starts with Type: 1196 if (line.startsWith(" ")) { 1197 currentData.put(lastLabel, lastRest + " " + line.trim()); 1198 continue; 1199 } 1200 1201 /* 1202 * Type: language Subtag: aa Description: Afar Added: 2005-10-16 1203 * Suppress-Script: Latn 1204 */ 1205 int pos2 = line.indexOf(':'); 1206 LstrField label = LstrField.from(line.substring(0, pos2)); 1207 String rest = line.substring(pos2 + 1).trim(); 1208 if (label == LstrField.Type) { 1209 lastType = rest.equals("grandfathered") ? 1210 LstrType.legacy : LstrType.fromString(rest); 1211 subtagData = CldrUtility.get(result2, lastType); 1212 if (subtagData == null) { 1213 result2.put(lastType, subtagData = new TreeMap<>()); 1214 } 1215 } else if (label == LstrField.Subtag 1216 || label == LstrField.Tag) { 1217 lastTag = rest; 1218 String endTag = null; 1219 // Subtag: qaa..qtz 1220 int pos = lastTag.indexOf(".."); 1221 if (pos >= 0) { 1222 endTag = lastTag.substring(pos + 2); 1223 lastTag = lastTag.substring(0, pos); 1224 } 1225 currentData = new TreeMap<>(); 1226 if (endTag == null) { 1227 putSubtagData(lastTag, subtagData, currentData); 1228 languageCount.add(lastType, 1); 1229 // System.out.println(languageCount.getCount(lastType) + "\t" + lastType + "\t" + lastTag); 1230 } else { 1231 for (; lastTag.compareTo(endTag) <= 0; lastTag = nextAlpha(lastTag)) { 1232 // System.out.println(">" + current); 1233 putSubtagData(lastTag, subtagData, currentData); 1234 languageCount.add(lastType, 1); 1235 // System.out.println(languageCount.getCount(lastType) + "\t" + lastType + "\t" + lastTag); 1236 } 1237 1238 } 1239 // label.equalsIgnoreCase("Added") || label.equalsIgnoreCase("Suppress-Script")) { 1240 // skip 1241 // } else if (pieces.length < 2) { 1242 // System.out.println("Odd Line: " + lastType + "\t" + lastTag + "\t" + line); 1243 } else { 1244 lastLabel = label; 1245 // The following code was removed because in the standard tests (TestAll) both lastRest and rest were always equal. 1246 // if(!translitCache.containsKey(rest)) { 1247 // lastRest = TransliteratorUtilities.fromXML.transliterate(rest); 1248 // translitCache.put(rest, lastRest); 1249 // if (!lastRest.equals(rest)) { 1250 // System.out.println(System.currentTimeMillis()+" initLStr: LastRest: '"+lastRest+"' Rest: '"+rest+"'"); 1251 // } 1252 // } else { 1253 // lastRest = translitCache.get(rest); 1254 // } 1255 lastRest = rest; 1256 String oldValue = CldrUtility.get(currentData, lastLabel); 1257 if (oldValue != null) { 1258 lastRest = oldValue + DESCRIPTION_SEPARATOR + lastRest; 1259 } 1260 currentData.put(lastLabel, lastRest); 1261 } 1262 } 1263 } catch (Exception e) { 1264 throw (RuntimeException) new IllegalArgumentException( 1265 "Can't process file: data/" 1266 + registryName + ";\t at line " + lineNumber).initCause(e); 1267 } finally { 1268 if (!funnyTags.isEmpty()) { 1269 if (DEBUG) 1270 System.out.println("Funny tags: " + funnyTags); 1271 } 1272 } 1273 // copy raw 1274 Map<LstrType, Map<String, Map<LstrField, String>>> rawLstreg = new TreeMap<>(); 1275 for (Entry<LstrType, Map<String, Map<LstrField, String>>> entry1 : result2.entrySet()) { 1276 LstrType key1 = entry1.getKey(); 1277 TreeMap<String, Map<LstrField, String>> raw1 = new TreeMap<>(); rawLstreg.put(key1, raw1)1278 rawLstreg.put(key1, raw1); 1279 for (Entry<String, Map<LstrField, String>> entry2 : entry1.getValue().entrySet()) { 1280 String key2 = entry2.getKey(); 1281 final Map<LstrField, String> value2 = entry2.getValue(); 1282 TreeMap<LstrField, String> raw2 = new TreeMap<>(); 1283 raw2.putAll(value2); raw1.put(key2, raw2)1284 raw1.put(key2, raw2); 1285 } 1286 } 1287 LSTREG_RAW = CldrUtility.protectCollection(rawLstreg); 1288 1289 // add extras 1290 for (int i = 0; i < extras.length; ++i) { 1291 Map<String, Map<LstrField, String>> subtagData = CldrUtility.get(result2, LstrType.fromString(extras[i][0])); 1292 if (subtagData == null) { LstrType.fromString(extras[i][0])1293 result2.put(LstrType.fromString(extras[i][0]), subtagData = new TreeMap<>()); 1294 } 1295 Map<LstrField, String> labelData = new TreeMap<>(); 1296 for (int j = 2; j < extras[i].length; j += 2) { LstrField.from(extras[i][j])1297 labelData.put(LstrField.from(extras[i][j]), extras[i][j + 1]); 1298 } 1299 Map<LstrField, String> old = CldrUtility.get(subtagData, extras[i][1]); 1300 if (old != null) { 1301 if (!"Private use".equals(CldrUtility.get(old, LstrField.Description))) { 1302 throw new IllegalArgumentException("REPLACING data for " + extras[i][1] + "\t" + old + "\twith" 1303 + labelData); 1304 } 1305 } 1306 if (false) { 1307 System.out.println((old != null ? "REPLACING" + "\t" + old : "ADDING") + 1308 " data for " + extras[i][1] + "\twith" + labelData); 1309 } subtagData.put(extras[i][1], labelData)1310 subtagData.put(extras[i][1], labelData); 1311 } 1312 // build compatibility map 1313 Map<String, Map<String, Map<String, String>>> result = new LinkedHashMap<>(); 1314 for (Entry<LstrType, Map<String, Map<LstrField, String>>> entry : result2.entrySet()) { 1315 Map<String, Map<String, String>> copy2 = new LinkedHashMap<>(); 1316 result.put(entry.getKey().toString(), copy2); 1317 for (Entry<String, Map<LstrField, String>> entry2 : entry.getValue().entrySet()) { 1318 Map<String, String> copy3 = new LinkedHashMap<>(); entry2.getKey()1319 copy2.put(entry2.getKey(), copy3); 1320 for (Entry<LstrField, String> entry3 : entry2.getValue().entrySet()) { entry3.getValue()1321 copy3.put(entry3.getKey().toString(), entry3.getValue()); 1322 } 1323 } 1324 } 1325 LSTREG = CldrUtility.protectCollection(result); 1326 LSTREG_ENUM = CldrUtility.protectCollection(result2); 1327 } 1328 1329 private static <K, K2, V> Map<K2, V> putSubtagData(K lastTag, Map<K, Map<K2, V>> subtagData, Map<K2, V> currentData) { 1330 Map<K2, V> oldData = subtagData.get(lastTag); 1331 if (oldData != null) { 1332 if (oldData.get("CLDR") != null) { 1333 System.out.println("overriding: " + lastTag + ", " + oldData); 1334 } else { 1335 throw new IllegalArgumentException("Duplicate tag: " + lastTag); 1336 } 1337 } 1338 return subtagData.put(lastTag, currentData); 1339 } 1340 1341 static Counter<LstrType> languageCount = new Counter<>(); 1342 1343 public static Counter<LstrType> getLanguageCount() { 1344 return languageCount; 1345 } 1346 1347 ZoneParser zoneParser = new ZoneParser(); 1348 1349 // static public final Set<String> MODERN_SCRIPTS = Collections 1350 // .unmodifiableSet(new TreeSet( 1351 // // "Bali " + 1352 // // "Bugi " + 1353 // // "Copt " + 1354 // // "Hano " + 1355 // // "Osma " + 1356 // // "Qaai " + 1357 // // "Sylo " + 1358 // // "Syrc " + 1359 // // "Tagb " + 1360 // // "Tglg " + 1361 // Arrays 1362 // .asList("Hans Hant Jpan Hrkt Kore Arab Armn Bali Beng Bopo Cans Cham Cher Cyrl Deva Ethi Geor Grek Gujr Guru Hani Hang Hebr Hira Knda Kana Kali Khmr Laoo Latn Lepc Limb Mlym Mong Mymr Talu Nkoo Olck Orya Saur Sinh Tale Taml Telu Thaa Thai Tibt Tfng Vaii Yiii" 1363 // .split("\\s+")))); 1364 1365 // updated to http://www.unicode.org/reports/tr31/tr31-9.html#Specific_Character_Adjustments 1366 1367 /** 1368 * @deprecated 1369 */ 1370 @Deprecated 1371 public Map<String, List<ZoneLine>> getZone_rules() { 1372 return zoneParser.getZone_rules(); 1373 } 1374 1375 /** 1376 * @deprecated 1377 */ 1378 @Deprecated 1379 public Map<String, List<String>> getZoneData() { 1380 return zoneParser.getZoneData(); 1381 } 1382 1383 /** 1384 * @deprecated 1385 */ 1386 @Deprecated 1387 public Set<String> getCanonicalTimeZones() { 1388 return zoneParser.getZoneData().keySet(); 1389 } 1390 1391 /** 1392 * @deprecated 1393 */ 1394 @Deprecated 1395 public Map<String, Set<String>> getCountryToZoneSet() { 1396 return zoneParser.getCountryToZoneSet(); 1397 } 1398 1399 /** 1400 * @deprecated 1401 */ 1402 @Deprecated 1403 public List<String> getDeprecatedZoneIDs() { 1404 return zoneParser.getDeprecatedZoneIDs(); 1405 } 1406 1407 /** 1408 * @deprecated 1409 */ 1410 @Deprecated 1411 public Comparator<String> getTZIDComparator() { 1412 return zoneParser.getTZIDComparator(); 1413 } 1414 1415 /** 1416 * @deprecated 1417 */ 1418 @Deprecated 1419 public Map<String, Set<String>> getZoneLinkNew_OldSet() { 1420 return zoneParser.getZoneLinkNew_OldSet(); 1421 } 1422 1423 /** 1424 * @deprecated 1425 */ 1426 @Deprecated 1427 public Map<String, String> getZoneLinkold_new() { 1428 return zoneParser.getZoneLinkold_new(); 1429 } 1430 1431 /** 1432 * @deprecated 1433 */ 1434 @Deprecated 1435 public Map getZoneRuleID_rules() { 1436 return zoneParser.getZoneRuleID_rules(); 1437 } 1438 1439 /** 1440 * @deprecated 1441 */ 1442 @Deprecated 1443 public Map<String, String> getZoneToCounty() { 1444 return zoneParser.getZoneToCounty(); 1445 } 1446 1447 /** 1448 * @deprecated 1449 */ 1450 @Deprecated 1451 public String getZoneVersion() { 1452 return zoneParser.getVersion(); 1453 } 1454 1455 public static String fixLanguageTag(String languageSubtag) { 1456 if (languageSubtag.equals("mo")) { // fix special cases 1457 return "ro"; 1458 } else if (languageSubtag.equals("no")) { 1459 return "nb"; 1460 } 1461 return languageSubtag; 1462 } 1463 1464 public boolean isModernLanguage(String languageCode) { 1465 if (getMoribundLanguages().contains(languageCode)) return false; 1466 Type type = Iso639Data.getType(languageCode); 1467 if (type == Type.Living) return true; 1468 if (languageCode.equals("eo")) return true; // exception for Esperanto 1469 // Scope scope = Iso639Data.getScope(languageCode); 1470 // if (scope == Scope.Collection) return false; 1471 return false; 1472 } 1473 1474 public static boolean isScriptModern(String script) { 1475 ScriptMetadata.Info info = ScriptMetadata.getInfo(script); 1476 if (info == null) { 1477 if (false) throw new IllegalArgumentException("No script metadata for: " + script); 1478 return false; 1479 } 1480 IdUsage idUsage = info.idUsage; 1481 return idUsage != IdUsage.EXCLUSION && idUsage != IdUsage.UNKNOWN; 1482 } 1483 1484 static final Pattern whitespace = PatternCache.get("\\s+"); 1485 static Set<String> filteredCurrencies = null; 1486 1487 public Set<String> getSurveyToolDisplayCodes(String type) { 1488 return getGoodAvailableCodes(type); 1489 } 1490 1491 static UnicodeSet COUNTRY = new UnicodeSet("[a-zA-Z]").freeze(); 1492 1493 /** 1494 * Quick check for whether valid country. Not complete: should use Validity 1495 * @param territory 1496 * @return 1497 */ 1498 public static boolean isCountry(String territory) { 1499 switch (territory) { 1500 case "ZZ": 1501 case "QO": 1502 case "EU": 1503 case "UN": 1504 case "EZ": 1505 return false; 1506 default: 1507 return territory.length() == 2 && COUNTRY.containsAll(territory); 1508 } 1509 } 1510 1511 public boolean isLstregPrivateUse(String type, String code) { 1512 Map<String, String> lStregData = getLStreg().get(type).get(code); 1513 return lStregData.get("Description").equalsIgnoreCase("private use"); 1514 } 1515 1516 public boolean isLstregDeprecated(String type, String code) { 1517 Map<String, String> lStregData = getLStreg().get(type).get(code); 1518 return lStregData.get("Deprecated") != null; 1519 } 1520 } 1521