1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2011, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Mark Davis 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.util; 10 11 import java.io.BufferedReader; 12 import java.io.IOException; 13 import java.util.ArrayList; 14 import java.util.Arrays; 15 import java.util.Collections; 16 import java.util.Comparator; 17 import java.util.EnumMap; 18 import java.util.EnumSet; 19 import java.util.HashMap; 20 import java.util.HashSet; 21 import java.util.Iterator; 22 import java.util.LinkedHashMap; 23 import java.util.LinkedHashSet; 24 import java.util.List; 25 import java.util.Locale; 26 import java.util.Map; 27 import java.util.Map.Entry; 28 import java.util.Set; 29 import java.util.TreeMap; 30 import java.util.TreeSet; 31 import java.util.regex.Pattern; 32 33 import org.unicode.cldr.draft.ScriptMetadata; 34 import org.unicode.cldr.draft.ScriptMetadata.IdUsage; 35 import org.unicode.cldr.util.Iso639Data.Type; 36 import org.unicode.cldr.util.ZoneParser.ZoneLine; 37 38 import com.ibm.icu.impl.Relation; 39 import com.ibm.icu.lang.UCharacter; 40 import com.ibm.icu.text.UnicodeSet; 41 import com.ibm.icu.util.ICUUncheckedIOException; 42 import com.ibm.icu.util.Output; 43 44 /** 45 * Provides access to various codes used by CLDR: RFC 3066, ISO 4217, Olson 46 * tzids 47 */ 48 public class StandardCodes { 49 50 public enum CodeType { 51 language, script, territory, extlang, grandfathered, redundant, variant, currency, tzid; from(String name)52 public static CodeType from(String name) { 53 if ("region".equals(name)) { 54 return territory; 55 } 56 return CodeType.valueOf(name); 57 } 58 } 59 60 private static final Set<CodeType> TypeSet = Collections.unmodifiableSet(EnumSet.allOf(CodeType.class)); 61 62 private static final Set<String> TypeStringSet; 63 static { 64 LinkedHashSet<String> foo = new LinkedHashSet<String>(); 65 for (CodeType x : CodeType.values()) { x.toString()66 foo.add(x.toString()); 67 } 68 TypeStringSet = Collections.unmodifiableSet(foo); 69 } 70 71 public static final String DESCRIPTION_SEPARATOR = "\u25AA"; 72 73 public static final String NO_COUNTRY = "001"; 74 75 private static StandardCodes singleton; 76 77 private EnumMap<CodeType, Map<String, List<String>>> type_code_data = new EnumMap<CodeType, Map<String, List<String>>>( 78 CodeType.class); 79 80 private EnumMap<CodeType, Map<String, List<String>>> type_name_codes = new EnumMap<CodeType, Map<String, List<String>>>( 81 CodeType.class); 82 83 private EnumMap<CodeType, Map<String, String>> type_code_preferred = new EnumMap<CodeType, Map<String, String>>( 84 CodeType.class); 85 86 private Map<String, Set<String>> country_modernCurrency = new TreeMap<String, Set<String>>(); 87 88 private Map<CodeType, Set<String>> goodCodes = new TreeMap<CodeType, Set<String>>(); 89 90 private static final boolean DEBUG = false; 91 92 /** 93 * Get the singleton copy of the standard codes. 94 */ make()95 static public synchronized StandardCodes make() { 96 if (singleton == null) 97 singleton = new StandardCodes(); 98 return singleton; 99 } 100 101 /** 102 * The data is the name in the case of RFC3066 codes, and the country code in 103 * the case of TZIDs and ISO currency codes. If the country code is missing, 104 * uses ZZ. 105 */ getData(String type, String code)106 public String getData(String type, String code) { 107 Map<String, List<String>> code_data = getCodeData(type); 108 if (code_data == null) 109 return null; 110 List<String> list = code_data.get(code); 111 if (list == null) 112 return null; 113 return list.get(0); 114 } 115 116 /** 117 * @return the full data for the type and code For the data in lstreg, it is 118 * description | date | canonical_value | recommended_prefix # 119 * comments 120 */ getFullData(String type, String code)121 public List<String> getFullData(String type, String code) { 122 Map<String, List<String>> code_data = getCodeData(type); 123 if (code_data == null) 124 return null; 125 return code_data.get(code); 126 } 127 128 /** 129 * @return the full data for the type and code For the data in lstreg, it is 130 * description | date | canonical_value | recommended_prefix # 131 * comments 132 */ getFullData(CodeType type, String code)133 public List<String> getFullData(CodeType type, String code) { 134 Map<String, List<String>> code_data = type_code_data.get(type); 135 if (code_data == null) 136 return null; 137 return code_data.get(code); 138 } 139 getCodeData(String type)140 private Map<String, List<String>> getCodeData(String type) { 141 return getCodeData(CodeType.from(type)); 142 } 143 getCodeData(CodeType type)144 private Map<String, List<String>> getCodeData(CodeType type) { 145 return type_code_data.get(type); 146 } 147 148 /** 149 * Get at the language registry values, as a Map from label to value. 150 * 151 * @param type 152 * @param code 153 * @return 154 */ getLangData(String type, String code)155 public Map<String, String> getLangData(String type, String code) { 156 try { 157 if (type.equals("territory")) 158 type = "region"; 159 else if (type.equals("variant")) code = code.toLowerCase(Locale.ENGLISH); 160 return (Map) ((Map) getLStreg().get(type)).get(code); 161 } catch (RuntimeException e) { 162 return null; 163 } 164 } 165 166 /** 167 * Return a replacement code, if available. If not, return null. 168 * 169 */ getReplacement(String type, String code)170 public String getReplacement(String type, String code) { 171 if (type.equals("currency")) 172 return null; // no replacement codes for currencies 173 List<String> data = getFullData(type, code); 174 if (data == null) 175 return null; 176 // if available, the replacement is a non-empty value other than --, in 177 // position 2. 178 if (data.size() < 3) 179 return null; 180 String replacement = (String) data.get(2); 181 if (!replacement.equals("") && !replacement.equals("--")) 182 return replacement; 183 return null; 184 } 185 186 /** 187 * Return the list of codes that have the same data. For example, returns all 188 * currency codes for a country. If there is a preferred one, it is first. 189 * 190 * @param type 191 * @param data 192 * @return 193 */ 194 @Deprecated getCodes(String type, String data)195 public List<String> getCodes(String type, String data) { 196 return getCodes(CodeType.valueOf(type), data); 197 } 198 199 /** 200 * Return the list of codes that have the same data. For example, returns all 201 * currency codes for a country. If there is a preferred one, it is first. 202 */ getCodes(CodeType type, String data)203 public List<String> getCodes(CodeType type, String data) { 204 Map<String, List<String>> data_codes = type_name_codes.get(type); 205 if (data_codes == null) 206 return null; 207 return Collections.unmodifiableList(data_codes.get(data)); 208 } 209 210 /** 211 * Where there is a preferred code, return it. 212 */ 213 @Deprecated getPreferred(String type, String code)214 public String getPreferred(String type, String code) { 215 return getPreferred(CodeType.valueOf(type), code); 216 } 217 218 /** 219 * Where there is a preferred code, return it. 220 */ 221 getPreferred(CodeType type, String code)222 public String getPreferred(CodeType type, String code) { 223 Map<String, String> code_preferred = type_code_preferred.get(type); 224 if (code_preferred == null) 225 return code; 226 String newCode = code_preferred.get(code); 227 if (newCode == null) 228 return code; 229 return newCode; 230 } 231 232 /** 233 * Get all the available types 234 */ getAvailableTypes()235 public Set<String> getAvailableTypes() { 236 return TypeStringSet; 237 } 238 239 /** 240 * Get all the available types 241 */ getAvailableTypesEnum()242 public Set<CodeType> getAvailableTypesEnum() { 243 return TypeSet; 244 } 245 246 /** 247 * Get all the available codes for a given type 248 * 249 * @param type 250 * @return 251 */ getAvailableCodes(String type)252 public Set<String> getAvailableCodes(String type) { 253 return getAvailableCodes(CodeType.from(type)); 254 } 255 256 /** 257 * Get all the available codes for a given type 258 * 259 * @param type 260 * @return 261 */ getAvailableCodes(CodeType type)262 public Set<String> getAvailableCodes(CodeType type) { 263 Map<String, List<String>> code_name = type_code_data.get(type); 264 return Collections.unmodifiableSet(code_name.keySet()); 265 } 266 getGoodAvailableCodes(String stringType)267 public Set<String> getGoodAvailableCodes(String stringType) { 268 return getGoodAvailableCodes(CodeType.from(stringType)); 269 } 270 271 /** 272 * Get all the available "real" codes for a given type, excluding private use, 273 * but including some deprecated codes. Use SupplementalDataInfo getLocaleAliases to 274 * exclude others. 275 * 276 * @param type 277 * @return 278 */ getGoodAvailableCodes(CodeType type)279 public Set<String> getGoodAvailableCodes(CodeType type) { 280 Set<String> result = goodCodes.get(type); 281 if (result == null) { 282 synchronized (goodCodes) { 283 Map<String, List<String>> code_name = getCodeData(type); 284 SupplementalDataInfo sd = SupplementalDataInfo.getInstance(); 285 if (code_name == null) 286 return null; 287 result = new TreeSet<String>(code_name.keySet()); 288 switch (type) { 289 case currency: 290 break; // nothing special 291 case language: 292 return sd.getCLDRLanguageCodes(); 293 case script: 294 return sd.getCLDRScriptCodes(); 295 case tzid: 296 break; // nothing special 297 default: 298 for (Iterator<String> it = result.iterator(); it.hasNext();) { 299 String code = (String) it.next(); 300 if (code.equals("root") || code.equals("QO")) 301 continue; 302 List<String> data = getFullData(type, code); 303 if (data.size() < 3) { 304 if (DEBUG) 305 System.out.println(code + "\t" + data); 306 } 307 if ("PRIVATE USE".equalsIgnoreCase(data.get(0)) 308 || (!data.get(2).equals("") && !data.get(2).equals("--"))) { 309 // System.out.println("Removing: " + code); 310 it.remove(); 311 } 312 } 313 } 314 result = Collections.unmodifiableSet(result); 315 goodCodes.put(type, result); 316 } 317 } 318 return result; 319 } 320 321 private static Set<String> GOOD_COUNTRIES; 322 getGoodCountries()323 public Set<String> getGoodCountries() { 324 synchronized (goodCodes) { 325 if (GOOD_COUNTRIES == null) { 326 Set<String> temp = new LinkedHashSet<String>(); 327 for (String s : getGoodAvailableCodes(CodeType.territory)) { 328 if (isCountry(s)) { 329 temp.add(s); 330 } 331 } 332 GOOD_COUNTRIES = Collections.unmodifiableSet(temp); 333 } 334 } 335 return GOOD_COUNTRIES; 336 } 337 338 /** 339 * Gets the modern currency. 340 */ getMainCurrencies(String countryCode)341 public Set<String> getMainCurrencies(String countryCode) { 342 return country_modernCurrency.get(countryCode); 343 } 344 345 private EnumMap<Organization, Map<String, Level>> platform_locale_level = null; 346 private EnumMap<Organization, Relation<Level, String>> platform_level_locale = null; 347 private Map<String, Map<String, String>> platform_locale_levelString = null; 348 349 // /** 350 // * Get rid of this 351 // * 352 // * @param type 353 // * @return 354 // * @throws IOException 355 // * @deprecated 356 // */ 357 // public String getEffectiveLocaleType(String type) throws IOException { 358 // if ((type != null) && (getLocaleCoverageOrganizations().contains(Organization.valueOf(type)))) { 359 // return type; 360 // } else { 361 // return null; // the default.. for now.. 362 // } 363 // } 364 365 static Comparator caseless = new Comparator() { 366 367 public int compare(Object arg0, Object arg1) { 368 String s1 = (String) arg0; 369 String s2 = (String) arg1; 370 return s1.compareToIgnoreCase(s2); 371 } 372 373 }; 374 375 /** 376 * Returns locales according to status. It returns a Map of Maps, key 1 is 377 * either IBM or Java (perhaps more later), key 2 is the Level. 378 * 379 * @deprecated 380 */ getLocaleTypes()381 public Map<Organization, Map<String, Level>> getLocaleTypes() { 382 synchronized (StandardCodes.class) { 383 if (platform_locale_level == null) { 384 loadPlatformLocaleStatus(); 385 } 386 } 387 return platform_locale_level; 388 } 389 390 /** 391 * Return map of locales to levels 392 * @param org 393 * @return 394 */ getLocaleToLevel(Organization org)395 public Map<String, Level> getLocaleToLevel(Organization org) { 396 return getLocaleTypes().get(org); 397 } 398 getLocaleCoverageLevel(String organization, String desiredLocale)399 public Level getLocaleCoverageLevel(String organization, String desiredLocale) { 400 return getLocaleCoverageLevel(Organization.fromString(organization), desiredLocale); 401 } 402 getLocaleCoverageLevel(Organization organization, String desiredLocale)403 public Level getLocaleCoverageLevel(Organization organization, String desiredLocale) { 404 return getLocaleCoverageLevel(organization, desiredLocale, new Output<LocaleCoverageType>()); 405 } 406 407 public enum LocaleCoverageType { 408 explicit, parent, star, undetermined 409 } 410 411 /** 412 * Returns coverage level of locale according to organization. Returns Level.UNDETERMINED if information is missing. 413 * A locale of "*" in the data means "everything else". 414 */ getLocaleCoverageLevel(Organization organization, String desiredLocale, Output<LocaleCoverageType> coverageType)415 public Level getLocaleCoverageLevel(Organization organization, String desiredLocale, Output<LocaleCoverageType> coverageType) { 416 synchronized (StandardCodes.class) { 417 if (platform_locale_level == null) { 418 loadPlatformLocaleStatus(); 419 } 420 } 421 coverageType.value = LocaleCoverageType.undetermined; 422 if (organization == null) { 423 return Level.UNDETERMINED; 424 } 425 Map<String, Level> locale_status = platform_locale_level.get(organization); 426 if (locale_status == null) { 427 return Level.UNDETERMINED; 428 } 429 // see if there is a parent 430 String originalLocale = desiredLocale; 431 while (desiredLocale != null) { 432 Level status = locale_status.get(desiredLocale); 433 if (status != null && status != Level.UNDETERMINED) { 434 coverageType.value = originalLocale == desiredLocale ? LocaleCoverageType.explicit : LocaleCoverageType.parent; 435 return status; 436 } 437 desiredLocale = LocaleIDParser.getParent(desiredLocale); 438 } 439 Level status = locale_status.get("*"); 440 if (status != null && status != Level.UNDETERMINED) { 441 coverageType.value = LocaleCoverageType.star; 442 return status; 443 } 444 return Level.UNDETERMINED; 445 } 446 447 /** 448 * Returns coverage level of locale according to organization. Returns Level.UNDETERMINED if information is missing. 449 */ getDefaultLocaleCoverageLevel(Organization organization)450 public Level getDefaultLocaleCoverageLevel(Organization organization) { 451 return getLocaleCoverageLevel(organization, "*"); 452 } 453 getLocaleCoverageOrganizations()454 public Set<Organization> getLocaleCoverageOrganizations() { 455 synchronized (StandardCodes.class) { 456 if (platform_locale_level == null) { 457 loadPlatformLocaleStatus(); 458 } 459 } 460 return platform_locale_level.keySet(); 461 } 462 getLocaleCoverageOrganizationStrings()463 public Set<String> getLocaleCoverageOrganizationStrings() { 464 synchronized (StandardCodes.class) { 465 if (platform_locale_level == null) { 466 loadPlatformLocaleStatus(); 467 } 468 } 469 return platform_locale_levelString.keySet(); 470 } 471 getLocaleCoverageLocales(String organization)472 public Set<String> getLocaleCoverageLocales(String organization) { 473 return getLocaleCoverageLocales(Organization.fromString(organization)); 474 } 475 getLocaleCoverageLocales(Organization organization)476 public Set<String> getLocaleCoverageLocales(Organization organization) { 477 synchronized (StandardCodes.class) { 478 if (platform_locale_level == null) { 479 loadPlatformLocaleStatus(); 480 } 481 } 482 return platform_locale_level.get(organization).keySet(); 483 } 484 getLevelsToLocalesFor(Organization organization)485 public Relation<Level, String> getLevelsToLocalesFor(Organization organization) { 486 synchronized (StandardCodes.class) { 487 if (platform_level_locale == null) { 488 loadPlatformLocaleStatus(); 489 } 490 } 491 return platform_level_locale.get(organization); 492 } 493 getLocaleCoverageLocales(Organization organization, Set<Level> choice)494 public Set<String> getLocaleCoverageLocales(Organization organization, Set<Level> choice) { 495 Set<String> result = new LinkedHashSet<String>(); 496 for (String locale : getLocaleCoverageLocales(organization)) { 497 if (choice.contains(getLocaleCoverageLevel(organization, locale))) { 498 result.add(locale); 499 } 500 } 501 return result; 502 } 503 loadPlatformLocaleStatus()504 private void loadPlatformLocaleStatus() { 505 LocaleIDParser parser = new LocaleIDParser(); 506 platform_locale_level = new EnumMap<Organization, Map<String, Level>>(Organization.class); 507 SupplementalDataInfo sd = SupplementalDataInfo.getInstance(); 508 Set<String> defaultContentLocales = sd.getDefaultContentLocales(); 509 String line; 510 try { 511 BufferedReader lstreg = CldrUtility.getUTF8Data("Locales.txt"); 512 while (true) { 513 line = lstreg.readLine(); 514 if (line == null) 515 break; 516 int commentPos = line.indexOf('#'); 517 if (commentPos >= 0) { 518 line = line.substring(0, commentPos); 519 } 520 line = line.trim(); 521 if (line.length() == 0) 522 continue; 523 List<String> stuff = CldrUtility.splitList(line, ';', true); 524 Organization organization; 525 526 // verify that the organization is valid 527 try { 528 organization = Organization.fromString(stuff.get(0)); 529 } catch (Exception e) { 530 throw new IllegalArgumentException("Invalid organization in Locales.txt: " + line); 531 } 532 533 // verify that the locale is valid BCP47 534 String locale = (String) stuff.get(1); 535 if (!locale.equals("*")) { 536 parser.set(locale); 537 String valid = validate(parser); 538 if (valid.length() != 0) { 539 throw new IllegalArgumentException("Invalid locale in Locales.txt: " + line); 540 } 541 locale = parser.toString(); // normalize 542 543 // verify that the locale is not a default content locale 544 if (defaultContentLocales.contains(locale)) { 545 throw new IllegalArgumentException("Cannot have default content locale in Locales.txt: " + line); 546 } 547 } 548 549 Level status = Level.get((String) stuff.get(2)); 550 if (status == Level.UNDETERMINED) { 551 System.out.println("Warning: Level unknown on: " + line); 552 } 553 Map<String, Level> locale_status = platform_locale_level.get(organization); 554 if (locale_status == null) { 555 platform_locale_level.put(organization, locale_status = new TreeMap<String, Level>()); 556 } 557 locale_status.put(locale, status); 558 if (!locale.equals("*")) { 559 String scriptLoc = parser.getLanguageScript(); 560 if (locale_status.get(scriptLoc) == null) 561 locale_status.put(scriptLoc, status); 562 String lang = parser.getLanguage(); 563 if (locale_status.get(lang) == null) 564 locale_status.put(lang, status); 565 } 566 } 567 } catch (IOException e) { 568 throw new ICUUncheckedIOException("Internal Error", e); 569 } 570 571 // now reset the parent to be the max of the children 572 for (Organization platform : platform_locale_level.keySet()) { 573 Map<String, Level> locale_level = platform_locale_level.get(platform); 574 for (String locale : locale_level.keySet()) { 575 parser.set(locale); 576 Level childLevel = locale_level.get(locale); 577 578 String language = parser.getLanguage(); 579 if (!language.equals(locale)) { 580 Level languageLevel = (Level) locale_level.get(language); 581 if (languageLevel == null || languageLevel.compareTo(childLevel) < 0) { 582 locale_level.put(language, childLevel); 583 } 584 } 585 String oldLanguage = language; 586 language = parser.getLanguageScript(); 587 if (!language.equals(oldLanguage)) { 588 Level languageLevel = (Level) locale_level.get(language); 589 if (languageLevel == null || languageLevel.compareTo(childLevel) < 0) { 590 locale_level.put(language, childLevel); 591 } 592 } 593 } 594 } 595 // backwards compat hack 596 platform_locale_levelString = new TreeMap<String, Map<String, String>>(); 597 platform_level_locale = new EnumMap<>(Organization.class); 598 for (Organization platform : platform_locale_level.keySet()) { 599 Map<String, String> locale_levelString = new TreeMap<String, String>(); 600 platform_locale_levelString.put(platform.toString(), locale_levelString); 601 Map<String, Level> locale_level = platform_locale_level.get(platform); 602 for (String locale : locale_level.keySet()) { 603 locale_levelString.put(locale, locale_level.get(locale).toString()); 604 } 605 Relation level_locale = Relation.of(new EnumMap(Level.class), HashSet.class); 606 level_locale.addAllInverted(locale_level).freeze(); 607 platform_level_locale.put(platform, level_locale); 608 } 609 CldrUtility.protectCollection(platform_level_locale); 610 platform_locale_level = CldrUtility.protectCollection(platform_locale_level); 611 platform_locale_levelString = CldrUtility.protectCollection(platform_locale_levelString); 612 } 613 validate(LocaleIDParser parser)614 private String validate(LocaleIDParser parser) { 615 String message = ""; 616 String lang = parser.getLanguage(); 617 if (lang.length() == 0) { 618 message += ", Missing language"; 619 } else if (!getAvailableCodes("language").contains(lang)) { 620 message += ", Invalid language code: " + lang; 621 } 622 String script = parser.getScript(); 623 if (script.length() != 0 && !getAvailableCodes("script").contains(script)) { 624 message += ", Invalid script code: " + script; 625 } 626 String territory = parser.getRegion(); 627 if (territory.length() != 0 && !getAvailableCodes("territory").contains(territory)) { 628 message += ", Invalid territory code: " + lang; 629 } 630 return message.length() == 0 ? message : message.substring(2); 631 } 632 633 /** 634 * Ascertain that the given locale in in the given group specified by the 635 * organization 636 * 637 * @param locale 638 * @param group 639 * @param org 640 * @return boolean 641 */ isLocaleInGroup(String locale, String group, Organization org)642 public boolean isLocaleInGroup(String locale, String group, Organization org) { 643 return group.equals(getGroup(locale, org)); 644 } 645 isLocaleInGroup(String locale, String group, String org)646 public boolean isLocaleInGroup(String locale, String group, String org) { 647 return isLocaleInGroup(locale, group, Organization.fromString(org)); 648 } 649 getGroup(String locale, String org)650 public String getGroup(String locale, String org) { 651 return getGroup(locale, Organization.fromString(org)); 652 } 653 654 /** 655 * Gets the coverage group given a locale and org 656 * 657 * @param locale 658 * @param org 659 * @return group if availble, null if not 660 */ getGroup(String locale, Organization org)661 public String getGroup(String locale, Organization org) { 662 Level l = getLocaleCoverageLevel(org, locale); 663 if (l.equals(Level.UNDETERMINED)) { 664 return null; 665 } else { 666 return l.toString(); 667 } 668 } 669 670 // ========== PRIVATES ========== 671 StandardCodes()672 private StandardCodes() { 673 String[] files = { /* "lstreg.txt", */"ISO4217.txt" }; // , "TZID.txt" 674 type_code_preferred.put(CodeType.tzid, new TreeMap<String, String>()); 675 add(CodeType.language, "root", "Root"); 676 String originalLine = null; 677 for (int fileIndex = 0; fileIndex < files.length; ++fileIndex) { 678 try { 679 BufferedReader lstreg = CldrUtility.getUTF8Data(files[fileIndex]); 680 while (true) { 681 String line = originalLine = lstreg.readLine(); 682 if (line == null) 683 break; 684 if (line.startsWith("\uFEFF")) { 685 line = line.substring(1); 686 } 687 line = line.trim(); 688 int commentPos = line.indexOf('#'); 689 String comment = ""; 690 if (commentPos >= 0) { 691 comment = line.substring(commentPos + 1).trim(); 692 line = line.substring(0, commentPos); 693 } 694 if (line.length() == 0) 695 continue; 696 List<String> pieces = CldrUtility.splitList(line, '|', true, 697 new ArrayList<String>()); 698 CodeType type = CodeType.from(pieces.get(0)); 699 pieces.remove(0); 700 701 String code = pieces.get(0); 702 pieces.remove(0); 703 if (type.equals("date")) { 704 continue; 705 } 706 707 String oldName = pieces.get(0); 708 int pos = oldName.indexOf(';'); 709 if (pos >= 0) { 710 oldName = oldName.substring(0, pos).trim(); 711 pieces.set(0, oldName); 712 } 713 714 List<String> data = pieces; 715 if (comment.indexOf("deprecated") >= 0) { 716 // System.out.println(originalLine); 717 if (data.get(2).toString().length() == 0) { 718 data.set(2, "--"); 719 } 720 } 721 if (oldName.equalsIgnoreCase("PRIVATE USE")) { 722 int separatorPos = code.indexOf(".."); 723 if (separatorPos < 0) { 724 add(type, code, data); 725 } else { 726 String current = code.substring(0, separatorPos); 727 String end = code.substring(separatorPos + 2); 728 // System.out.println(">>" + code + "\t" + current + "\t" + end); 729 for (; current.compareTo(end) <= 0; current = nextAlpha(current)) { 730 // System.out.println(">" + current); 731 add(type, current, data); 732 } 733 } 734 continue; 735 } 736 if (!type.equals("tzid")) { 737 add(type, code, data); 738 if (type.equals("currency")) { 739 // currency | TPE | Timor Escudo | TP | EAST TIMOR | O 740 if (data.get(3).equals("C")) { 741 String country = (String) data.get(1); 742 Set<String> codes = country_modernCurrency.get(country); 743 if (codes == null) { 744 country_modernCurrency.put(country, codes = new TreeSet<String>()); 745 } 746 codes.add(code); 747 } 748 } 749 continue; 750 } 751 // type = tzid 752 // List codes = (List) Utility.splitList(code, ',', true, new 753 // ArrayList()); 754 String preferred = null; 755 for (int i = 0; i < pieces.size(); ++i) { 756 code = (String) pieces.get(i); 757 add(type, code, data); 758 if (preferred == null) 759 preferred = code; 760 else { 761 Map<String, String> code_preferred = type_code_preferred.get(type); 762 code_preferred.put(code, preferred); 763 } 764 } 765 } 766 lstreg.close(); 767 } catch (Exception e) { 768 System.err.println("WARNING: " + files[fileIndex] 769 + " may be a corrupted UTF-8 file. Please check."); 770 throw (IllegalArgumentException) new IllegalArgumentException( 771 "Can't read " + files[fileIndex] + "\t" + originalLine) 772 .initCause(e); 773 } 774 country_modernCurrency = CldrUtility.protectCollection(country_modernCurrency); 775 } 776 777 // data is: description | date | canonical_value | recommended_prefix # 778 // comments 779 // HACK, just rework 780 781 Map<String, Map<String, Map<String, String>>> languageRegistry = getLStreg(); 782 // languageRegistry = CldrUtility.protectCollection(languageRegistry); 783 784 for (String type : languageRegistry.keySet()) { 785 CodeType type2 = CodeType.from(type); 786 Map<String, Map<String, String>> m = languageRegistry.get(type); 787 for (String code : m.keySet()) { 788 Map<String, String> mm = m.get(code); 789 List<String> data = new ArrayList<String>(0); 790 data.add(mm.get("Description")); 791 data.add(mm.get("Added")); 792 String pref = mm.get("Preferred-Value"); 793 if (pref == null) { 794 pref = mm.get("Deprecated"); 795 if (pref == null) 796 pref = ""; 797 else 798 pref = "deprecated"; 799 } 800 data.add(pref); 801 if (type.equals("variant")) { 802 code = code.toUpperCase(); 803 } 804 // data.add(mm.get("Recommended_Prefix")); 805 // {"region", "BQ", "Description", "British Antarctic Territory", 806 // "Preferred-Value", "AQ", "CLDR", "True", "Deprecated", "True"}, 807 add(type2, code, data); 808 } 809 } 810 811 Map<String, List<String>> m = getZoneData(); 812 for (Iterator<String> it = m.keySet().iterator(); it.hasNext();) { 813 String code = it.next(); 814 add(CodeType.tzid, code, m.get(code).toString()); 815 } 816 } 817 818 /** 819 * @param current 820 * @return 821 */ nextAlpha(String current)822 private static String nextAlpha(String current) { 823 // Don't care that this is inefficient 824 int value = 0; 825 for (int i = 0; i < current.length(); ++i) { 826 char c = current.charAt(i); 827 c -= c < 'a' ? 'A' : 'a'; 828 value = value * 26 + c; 829 } 830 value += 1; 831 String result = ""; 832 for (int i = 0; i < current.length(); ++i) { 833 result = (char) ((value % 26) + 'A') + result; 834 value = value / 26; 835 } 836 if (UCharacter.toLowerCase(current).equals(current)) { 837 result = UCharacter.toLowerCase(result); 838 } else if (UCharacter.toUpperCase(current).equals(current)) { 839 // do nothing 840 } else { 841 result = UCharacter.toTitleCase(result, null); 842 } 843 return result; 844 } 845 846 /** 847 * @param string 848 * @param string2 849 * @param string3 850 */ 851 private void add(CodeType type, String string2, String string3) { 852 List<String> l = new ArrayList<String>(); 853 l.add(string3); 854 add(type, string2, l); 855 } 856 857 private void add(CodeType type, String code, List<String> otherData) { 858 // hack 859 if (type == CodeType.script) { 860 if (code.equals("Qaai")) { 861 otherData = new ArrayList<String>(otherData); 862 otherData.set(0, "Inherited"); 863 } else if (code.equals("Zyyy")) { 864 otherData = new ArrayList<String>(otherData); 865 otherData.set(0, "Common"); 866 } 867 } 868 869 // assume name is the first item 870 871 String name = otherData.get(0); 872 873 // add to main list 874 Map<String, List<String>> code_data = getCodeData(type); 875 if (code_data == null) { 876 code_data = new TreeMap<String, List<String>>(); 877 type_code_data.put(type, code_data); 878 } 879 List<String> lastData = code_data.get(code); 880 if (lastData != null) { 881 lastData.addAll(otherData); 882 } else { 883 code_data.put(code, otherData); 884 } 885 886 // now add mapping from name to codes 887 Map<String, List<String>> name_codes = type_name_codes.get(type); 888 if (name_codes == null) { 889 name_codes = new TreeMap<String, List<String>>(); 890 type_name_codes.put(type, name_codes); 891 } 892 List<String> codes = name_codes.get(name); 893 if (codes == null) { 894 codes = new ArrayList<String>(); 895 name_codes.put(name, codes); 896 } 897 codes.add(code); 898 } 899 900 private List<String> DELETED3166 = Collections.unmodifiableList(Arrays 901 .asList(new String[] { "BQ", "BU", "CT", "DD", "DY", "FQ", "FX", "HV", 902 "JT", "MI", "NH", "NQ", "NT", "PC", "PU", "PZ", "RH", "SU", "TP", 903 "VD", "WK", "YD", "YU", "ZR" })); 904 905 public List<String> getOld3166() { 906 return DELETED3166; 907 } 908 909 private Map<String, List<String>> WorldBankInfo; 910 911 public Map<String, List<String>> getWorldBankInfo() { 912 if (WorldBankInfo == null) { 913 List<String> temp = fillFromCommaFile("WorldBankInfo.txt", false); 914 WorldBankInfo = new HashMap<String, List<String>>(); 915 for (String line : temp) { 916 List<String> row = CldrUtility.splitList(line, ';', true); 917 String key = row.get(0); 918 row.remove(0); 919 WorldBankInfo.put(key, row); 920 } 921 WorldBankInfo = CldrUtility.protectCollection(WorldBankInfo); 922 } 923 return WorldBankInfo; 924 } 925 926 Set<String> moribundLanguages; 927 928 public Set<String> getMoribundLanguages() { 929 if (moribundLanguages == null) { 930 List<String> temp = fillFromCommaFile("moribund_languages.txt", true); 931 moribundLanguages = new TreeSet<String>(); 932 moribundLanguages.addAll(temp); 933 moribundLanguages = CldrUtility.protectCollection(moribundLanguages); 934 } 935 return moribundLanguages; 936 } 937 938 // produces a list of the 'clean' lines 939 private List<String> fillFromCommaFile(String filename, boolean trim) { 940 try { 941 List<String> result = new ArrayList<String>(); 942 String line; 943 BufferedReader lstreg = CldrUtility.getUTF8Data(filename); 944 while (true) { 945 line = lstreg.readLine(); 946 if (line == null) 947 break; 948 int commentPos = line.indexOf('#'); 949 if (commentPos >= 0) { 950 line = line.substring(0, commentPos); 951 } 952 if (trim) { 953 line = line.trim(); 954 } 955 if (line.length() == 0) 956 continue; 957 result.add(line); 958 } 959 return result; 960 } catch (Exception e) { 961 throw (RuntimeException) new IllegalArgumentException( 962 "Can't process file: data/" + filename).initCause(e); 963 } 964 } 965 966 // return a complex map. language -> arn -> {"Comments" -> "x", 967 // "Description->y,...} 968 static String[][] extras = { 969 { "language", "root", "Description", "Root", "CLDR", "True" }, 970 // { "language", "cch", "Description", "Atsam", "CLDR", "True" }, 971 // { "language", "kaj", "Description", "Jju", "CLDR", "True" }, 972 // { "language", "kcg", "Description", "Tyap", "CLDR", "True" }, 973 // { "language", "kfo", "Description", "Koro", "CLDR", "True" }, 974 // { "language", "mfe", "Description", "Morisyen", "CLDR", "True" }, 975 // { "region", "172", "Description", "Commonwealth of Independent States", "CLDR", "True" }, 976 // { "region", "062", "Description", "South-Central Asia", "CLDR", "True" }, 977 // { "region", "003", "Description", "North America", "CLDR", "True" }, 978 // { "variant", "POLYTONI", "Description", "Polytonic Greek", "CLDR", "True", "Preferred-Value", "POLYTON" }, 979 { "variant", "REVISED", "Description", "Revised Orthography", "CLDR", "True" }, 980 { "variant", "SAAHO", "Description", "Dialect", "CLDR", "True" }, 981 { "variant", "POSIX", "Description", "Computer-Style", "CLDR", "True" }, 982 // {"region", "172", "Description", "Commonwealth of Independent States", 983 // "CLDR", "True"}, 984 // { "region", "", "Description", "European Union", "CLDR", "True" }, 985 { "region", "ZZ", "Description", "Unknown or Invalid Region", "CLDR", "True" }, 986 { "region", "QO", "Description", "Outlying Oceania", "CLDR", "True" }, 987 { "region", "XK", "Description", "Kosovo", "CLDR", "True" }, 988 { "script", "Qaai", "Description", "Inherited", "CLDR", "True" }, 989 // {"region", "003", "Description", "North America", "CLDR", "True"}, 990 // {"region", "062", "Description", "South-central Asia", "CLDR", "True"}, 991 // {"region", "200", "Description", "Czechoslovakia", "CLDR", "True"}, 992 // {"region", "830", "Description", "Channel Islands", "CLDR", "True"}, 993 // {"region", "833", "Description", "Isle of Man", "CLDR", "True"}, 994 995 // {"region", "NT", "Description", "Neutral Zone (formerly between Saudi 996 // Arabia & Iraq)", "CLDR", "True", "Deprecated", "True"}, 997 // {"region", "SU", "Description", "Union of Soviet Socialist Republics", 998 // "CLDR", "True", "Deprecated", "True"}, 999 // {"region", "BQ", "Description", "British Antarctic Territory", 1000 // "Preferred-Value", "AQ", "CLDR", "True", "Deprecated", "True"}, 1001 // {"region", "CT", "Description", "Canton and Enderbury Islands", 1002 // "Preferred-Value", "KI", "CLDR", "True", "Deprecated", "True"}, 1003 // {"region", "FQ", "Description", "French Southern and Antarctic Territories 1004 // (now split between AQ and TF)", "CLDR", "True", "Deprecated", "True"}, 1005 // {"region", "JT", "Description", "Johnston Island", "Preferred-Value", "UM", 1006 // "CLDR", "True", "Deprecated", "True"}, 1007 // {"region", "MI", "Description", "Midway Islands", "Preferred-Value", "UM", 1008 // "CLDR", "True", "Deprecated", "True"}, 1009 // {"region", "NQ", "Description", "Dronning Maud Land", "Preferred-Value", 1010 // "AQ", "CLDR", "True", "Deprecated", "True"}, 1011 // {"region", "PC", "Description", "Pacific Islands Trust Territory (divided 1012 // into FM, MH, MP, and PW)", "Preferred-Value", "AQ", "CLDR", "True", 1013 // "Deprecated", "True"}, 1014 // {"region", "PU", "Description", "U.S. Miscellaneous Pacific Islands", 1015 // "Preferred-Value", "UM", "CLDR", "True", "Deprecated", "True"}, 1016 // {"region", "PZ", "Description", "Panama Canal Zone", "Preferred-Value", 1017 // "PA", "CLDR", "True", "Deprecated", "True"}, 1018 // {"region", "VD", "Description", "North Vietnam", "Preferred-Value", "VN", 1019 // "CLDR", "True", "Deprecated", "True"}, 1020 // {"region", "WK", "Description", "Wake Island", "Preferred-Value", "UM", 1021 // "CLDR", "True", "Deprecated", "True"}, 1022 }; 1023 1024 static final String registryName = CldrUtility.getProperty("registry", "language-subtag-registry"); 1025 1026 public enum LstrType { 1027 language("und", "zxx", "mul", "mis", "root"), 1028 script("Zzzz", "Zsym", "Zxxx", "Zmth"), 1029 region("ZZ"), 1030 variant(), 1031 extlang(true, false), 1032 grandfathered(true, false), 1033 redundant(true, false), 1034 /** specialized codes for validity; TODO: rename LstrType **/ 1035 currency(false, true, "XXX"), 1036 subdivision(false, true), 1037 unit(false, true); 1038 1039 public final Set<String> specials; 1040 public final String unknown; 1041 public final boolean isLstr; 1042 public final boolean isUnicode; 1043 1044 private LstrType(String... unknownValue) { 1045 this(true, true, unknownValue); 1046 } 1047 1048 private LstrType(boolean lstr, boolean unicode, String... unknownValue) { 1049 unknown = unknownValue.length == 0 ? null : unknownValue[0]; 1050 LinkedHashSet<String> set = new LinkedHashSet<>(Arrays.asList(unknownValue)); 1051 if (unknown != null) { 1052 set.remove(unknown); 1053 } 1054 specials = Collections.unmodifiableSet(set); 1055 isLstr = lstr; 1056 isUnicode = unicode; 1057 } 1058 1059 // 1060 static final Pattern WELLFORMED = Pattern.compile("([0-9]{3}|[a-zA-Z]{2})[a-zA-Z0-9]{1,4}"); 1061 1062 boolean isWellFormed(String candidate) { 1063 switch (this) { 1064 case subdivision: 1065 return WELLFORMED.matcher(candidate).matches(); 1066 default: 1067 throw new UnsupportedOperationException(); 1068 } 1069 } 1070 1071 public String toCompatString() { 1072 return this == region ? "territory" : toString(); 1073 } 1074 } 1075 1076 public enum LstrField { 1077 Type, Subtag, Description, Added, Scope, Tag, Suppress_Script, Macrolanguage, Deprecated, Preferred_Value, Comments, Prefix, CLDR; 1078 public static LstrField from(String s) { 1079 return LstrField.valueOf(s.trim().replace("-", "_")); 1080 } 1081 } 1082 1083 static Map<String, Map<String, Map<String, String>>> LSTREG; 1084 static Map<LstrType, Map<String, Map<LstrField, String>>> LSTREG_ENUM; 1085 static Map<LstrType, Map<String, Map<LstrField, String>>> LSTREG_RAW; 1086 1087 /** 1088 * Returns a map like {extlang={aao={Added=2009-07-29, Description=Algerian Saharan Arabic, ...<br> 1089 * That is, type => subtype => map<tag,value>. Descriptions are concatenated together, separated by 1090 * DESCRIPTION_SEPARATOR. 1091 * 1092 * @return 1093 */ 1094 public static Map<String, Map<String, Map<String, String>>> getLStreg() { 1095 if (LSTREG == null) { 1096 initLstr(); 1097 } 1098 return LSTREG; 1099 } 1100 1101 /** 1102 * Returns a map like {extlang={aao={Added=2009-07-29, Description=Algerian Saharan Arabic, ...<br> 1103 * That is, type => subtype => map<tag,value>. Descriptions are concatenated together, separated by 1104 * DESCRIPTION_SEPARATOR. 1105 * 1106 * @return 1107 */ 1108 public static Map<LstrType, Map<String, Map<LstrField, String>>> getEnumLstreg() { 1109 if (LSTREG_ENUM == null) { 1110 initLstr(); 1111 } 1112 return LSTREG_ENUM; 1113 } 1114 1115 public static Map<LstrType, Map<String, Map<LstrField, String>>> getLstregEnumRaw() { 1116 if (LSTREG_ENUM == null) { 1117 initLstr(); 1118 } 1119 return LSTREG_RAW; 1120 } 1121 1122 private static void initLstr() { 1123 Map<LstrType, Map<String, Map<LstrField, String>>> result2 = new TreeMap<LstrType, Map<String, Map<LstrField, String>>>(); 1124 1125 int lineNumber = 1; 1126 1127 Set<String> funnyTags = new TreeSet<String>(); 1128 String line; 1129 try { 1130 BufferedReader lstreg = CldrUtility.getUTF8Data(registryName); 1131 LstrType lastType = null; 1132 String lastTag = null; 1133 Map<String, Map<LstrField, String>> subtagData = null; 1134 Map<LstrField, String> currentData = null; 1135 LstrField lastLabel = null; 1136 String lastRest = null; 1137 boolean inRealContent = false; 1138 // Map<String, String> translitCache = new HashMap<String, String>(); 1139 for (;; ++lineNumber) { 1140 line = lstreg.readLine(); 1141 if (line == null) 1142 break; 1143 if (line.length() == 0) 1144 continue; // skip blanks 1145 if (line.startsWith("File-Date: ")) { 1146 if (DEBUG) System.out.println("Language Subtag Registry: " + line); 1147 inRealContent = true; 1148 continue; 1149 } 1150 if (!inRealContent) { 1151 // skip until we get to real content 1152 continue; 1153 } 1154 // skip cruft 1155 if (line.startsWith("Internet-Draft")) { 1156 continue; 1157 } 1158 if (line.startsWith("Ewell")) { 1159 continue; 1160 } 1161 if (line.startsWith("\f")) { 1162 continue; 1163 } 1164 if (line.startsWith("4. Security Considerations")) { 1165 break; 1166 } 1167 1168 if (line.startsWith("%%")) 1169 continue; // skip separators (ok, since data starts with Type: 1170 if (line.startsWith(" ")) { 1171 currentData.put(lastLabel, lastRest + " " + line.trim()); 1172 continue; 1173 } 1174 1175 /* 1176 * Type: language Subtag: aa Description: Afar Added: 2005-10-16 1177 * Suppress-Script: Latn 1178 */ 1179 int pos2 = line.indexOf(':'); 1180 LstrField label = LstrField.from(line.substring(0, pos2)); 1181 String rest = line.substring(pos2 + 1).trim(); 1182 if (label == LstrField.Type) { 1183 subtagData = CldrUtility.get(result2, lastType = LstrType.valueOf(rest)); 1184 if (subtagData == null) { 1185 result2.put(LstrType.valueOf(rest), subtagData = new TreeMap<String, Map<LstrField, String>>()); 1186 } 1187 } else if (label == LstrField.Subtag 1188 || label == LstrField.Tag) { 1189 lastTag = rest; 1190 String endTag = null; 1191 // Subtag: qaa..qtz 1192 int pos = lastTag.indexOf(".."); 1193 if (pos >= 0) { 1194 endTag = lastTag.substring(pos + 2); 1195 lastTag = lastTag.substring(0, pos); 1196 } 1197 currentData = new TreeMap<LstrField, String>(); 1198 if (endTag == null) { 1199 putSubtagData(lastTag, subtagData, currentData); 1200 languageCount.add(lastType, 1); 1201 // System.out.println(languageCount.getCount(lastType) + "\t" + lastType + "\t" + lastTag); 1202 } else { 1203 for (; lastTag.compareTo(endTag) <= 0; lastTag = nextAlpha(lastTag)) { 1204 // System.out.println(">" + current); 1205 putSubtagData(lastTag, subtagData, currentData); 1206 languageCount.add(lastType, 1); 1207 // System.out.println(languageCount.getCount(lastType) + "\t" + lastType + "\t" + lastTag); 1208 } 1209 1210 } 1211 // label.equalsIgnoreCase("Added") || label.equalsIgnoreCase("Suppress-Script")) { 1212 // skip 1213 // } else if (pieces.length < 2) { 1214 // System.out.println("Odd Line: " + lastType + "\t" + lastTag + "\t" + line); 1215 } else { 1216 lastLabel = label; 1217 // The following code was removed because in the standard tests (TestAll) both lastRest and rest were always equal. 1218 // if(!translitCache.containsKey(rest)) { 1219 // lastRest = TransliteratorUtilities.fromXML.transliterate(rest); 1220 // translitCache.put(rest, lastRest); 1221 // if (!lastRest.equals(rest)) { 1222 // System.out.println(System.currentTimeMillis()+" initLStr: LastRest: '"+lastRest+"' Rest: '"+rest+"'"); 1223 // } 1224 // } else { 1225 // lastRest = translitCache.get(rest); 1226 // } 1227 lastRest = rest; 1228 String oldValue = (String) CldrUtility.get(currentData, lastLabel); 1229 if (oldValue != null) { 1230 lastRest = oldValue + DESCRIPTION_SEPARATOR + lastRest; 1231 } 1232 currentData.put(lastLabel, lastRest); 1233 } 1234 } 1235 } catch (Exception e) { 1236 throw (RuntimeException) new IllegalArgumentException( 1237 "Can't process file: data/" 1238 + registryName + ";\t at line " + lineNumber).initCause(e); 1239 } finally { 1240 if (!funnyTags.isEmpty()) { 1241 if (DEBUG) 1242 System.out.println("Funny tags: " + funnyTags); 1243 } 1244 } 1245 // copy raw 1246 Map<LstrType, Map<String, Map<LstrField, String>>> rawLstreg = new TreeMap<LstrType, Map<String, Map<LstrField, String>>>(); 1247 for (Entry<LstrType, Map<String, Map<LstrField, String>>> entry1 : result2.entrySet()) { 1248 LstrType key1 = entry1.getKey(); 1249 TreeMap<String, Map<LstrField, String>> raw1 = new TreeMap<String, Map<LstrField, String>>(); rawLstreg.put(key1, raw1)1250 rawLstreg.put(key1, raw1); 1251 for (Entry<String, Map<LstrField, String>> entry2 : entry1.getValue().entrySet()) { 1252 String key2 = entry2.getKey(); 1253 final Map<LstrField, String> value2 = entry2.getValue(); 1254 TreeMap<LstrField, String> raw2 = new TreeMap<LstrField, String>(); 1255 raw2.putAll(value2); raw1.put(key2, raw2)1256 raw1.put(key2, raw2); 1257 } 1258 } 1259 LSTREG_RAW = CldrUtility.protectCollection(rawLstreg); 1260 1261 // add extras 1262 for (int i = 0; i < extras.length; ++i) { 1263 Map<String, Map<LstrField, String>> subtagData = CldrUtility.get(result2, LstrType.valueOf(extras[i][0])); 1264 if (subtagData == null) { LstrType.valueOf(extras[i][0])1265 result2.put(LstrType.valueOf(extras[i][0]), subtagData = new TreeMap<String, Map<LstrField, String>>()); 1266 } 1267 Map<LstrField, String> labelData = new TreeMap<LstrField, String>(); 1268 for (int j = 2; j < extras[i].length; j += 2) { LstrField.from(extras[i][j])1269 labelData.put(LstrField.from(extras[i][j]), extras[i][j + 1]); 1270 } 1271 Map<LstrField, String> old = CldrUtility.get(subtagData, extras[i][1]); 1272 if (old != null) { 1273 if (!"Private use".equals(CldrUtility.get(old, LstrField.Description))) { 1274 throw new IllegalArgumentException("REPLACING data for " + extras[i][1] + "\t" + old + "\twith" 1275 + labelData); 1276 } 1277 } 1278 if (false) { 1279 System.out.println((old != null ? "REPLACING" + "\t" + old : "ADDING") + 1280 " data for " + extras[i][1] + "\twith" + labelData); 1281 } subtagData.put(extras[i][1], labelData)1282 subtagData.put(extras[i][1], labelData); 1283 } 1284 // build compatibility map 1285 Map<String, Map<String, Map<String, String>>> result = new LinkedHashMap<String, Map<String, Map<String, String>>>(); 1286 for (Entry<LstrType, Map<String, Map<LstrField, String>>> entry : result2.entrySet()) { 1287 Map<String, Map<String, String>> copy2 = new LinkedHashMap<String, Map<String, String>>(); 1288 result.put(entry.getKey().toString(), copy2); 1289 for (Entry<String, Map<LstrField, String>> entry2 : entry.getValue().entrySet()) { 1290 Map<String, String> copy3 = new LinkedHashMap<String, String>(); entry2.getKey()1291 copy2.put(entry2.getKey(), copy3); 1292 for (Entry<LstrField, String> entry3 : entry2.getValue().entrySet()) { entry3.getValue()1293 copy3.put(entry3.getKey().toString(), entry3.getValue()); 1294 } 1295 } 1296 } 1297 LSTREG = CldrUtility.protectCollection(result); 1298 LSTREG_ENUM = CldrUtility.protectCollection(result2); 1299 } 1300 1301 private static <K, K2, V> Map<K2, V> putSubtagData(K lastTag, Map<K, Map<K2, V>> subtagData, Map<K2, V> currentData) { 1302 Map<K2, V> oldData = subtagData.get(lastTag); 1303 if (oldData != null) { 1304 if (oldData.get("CLDR") != null) { 1305 System.out.println("overriding: " + lastTag + ", " + oldData); 1306 } else { 1307 throw new IllegalArgumentException("Duplicate tag: " + lastTag); 1308 } 1309 } 1310 return subtagData.put(lastTag, currentData); 1311 } 1312 1313 static Counter<LstrType> languageCount = new Counter<LstrType>(); 1314 1315 public static Counter<LstrType> getLanguageCount() { 1316 return languageCount; 1317 } 1318 1319 ZoneParser zoneParser = new ZoneParser(); 1320 1321 // static public final Set<String> MODERN_SCRIPTS = Collections 1322 // .unmodifiableSet(new TreeSet( 1323 // // "Bali " + 1324 // // "Bugi " + 1325 // // "Copt " + 1326 // // "Hano " + 1327 // // "Osma " + 1328 // // "Qaai " + 1329 // // "Sylo " + 1330 // // "Syrc " + 1331 // // "Tagb " + 1332 // // "Tglg " + 1333 // Arrays 1334 // .asList("Hans Hant Jpan Hrkt Kore Arab Armn Bali Beng Bopo Cans Cham Cher Cyrl Deva Ethi Geor Grek Gujr Guru Hani Hang Hebr Hira Knda Kana Kali Khmr Laoo Latn Lepc Limb Mlym Mong Mymr Talu Nkoo Olck Orya Saur Sinh Tale Taml Telu Thaa Thai Tibt Tfng Vaii Yiii" 1335 // .split("\\s+")))); 1336 1337 // updated to http://www.unicode.org/reports/tr31/tr31-9.html#Specific_Character_Adjustments 1338 1339 /** 1340 * @deprecated 1341 */ 1342 public Map<String, List<ZoneLine>> getZone_rules() { 1343 return zoneParser.getZone_rules(); 1344 } 1345 1346 /** 1347 * @deprecated 1348 */ 1349 public Map<String, List<String>> getZoneData() { 1350 return zoneParser.getZoneData(); 1351 } 1352 1353 /** 1354 * @deprecated 1355 */ 1356 public Set<String> getCanonicalTimeZones() { 1357 return zoneParser.getZoneData().keySet(); 1358 } 1359 1360 /** 1361 * @deprecated 1362 */ 1363 public Map<String, Set<String>> getCountryToZoneSet() { 1364 return zoneParser.getCountryToZoneSet(); 1365 } 1366 1367 /** 1368 * @deprecated 1369 */ 1370 public List<String> getDeprecatedZoneIDs() { 1371 return zoneParser.getDeprecatedZoneIDs(); 1372 } 1373 1374 /** 1375 * @deprecated 1376 */ 1377 public Comparator<String> getTZIDComparator() { 1378 return zoneParser.getTZIDComparator(); 1379 } 1380 1381 /** 1382 * @deprecated 1383 */ 1384 public Map<String, Set<String>> getZoneLinkNew_OldSet() { 1385 return zoneParser.getZoneLinkNew_OldSet(); 1386 } 1387 1388 /** 1389 * @deprecated 1390 */ 1391 public Map<String, String> getZoneLinkold_new() { 1392 return zoneParser.getZoneLinkold_new(); 1393 } 1394 1395 /** 1396 * @deprecated 1397 */ 1398 public Map getZoneRuleID_rules() { 1399 return zoneParser.getZoneRuleID_rules(); 1400 } 1401 1402 /** 1403 * @deprecated 1404 */ 1405 public Map<String, String> getZoneToCounty() { 1406 return zoneParser.getZoneToCounty(); 1407 } 1408 1409 /** 1410 * @deprecated 1411 */ 1412 public String getZoneVersion() { 1413 return zoneParser.getVersion(); 1414 } 1415 1416 public static String fixLanguageTag(String languageSubtag) { 1417 if (languageSubtag.equals("mo")) { // fix special cases 1418 return "ro"; 1419 } else if (languageSubtag.equals("no")) { 1420 return "nb"; 1421 } 1422 return languageSubtag; 1423 } 1424 1425 public boolean isModernLanguage(String languageCode) { 1426 if (getMoribundLanguages().contains(languageCode)) return false; 1427 Type type = Iso639Data.getType(languageCode); 1428 if (type == Type.Living) return true; 1429 if (languageCode.equals("eo")) return true; // exception for Esperanto 1430 // Scope scope = Iso639Data.getScope(languageCode); 1431 // if (scope == Scope.Collection) return false; 1432 return false; 1433 } 1434 1435 public static boolean isScriptModern(String script) { 1436 ScriptMetadata.Info info = ScriptMetadata.getInfo(script); 1437 if (info == null) { 1438 if (false) throw new IllegalArgumentException("No script metadata for: " + script); 1439 return false; 1440 } 1441 IdUsage idUsage = info.idUsage; 1442 return idUsage != IdUsage.EXCLUSION && idUsage != IdUsage.UNKNOWN; 1443 } 1444 1445 static final Pattern whitespace = PatternCache.get("\\s+"); 1446 static Set<String> filteredCurrencies = null; 1447 1448 public Set<String> getSurveyToolDisplayCodes(String type) { 1449 return getGoodAvailableCodes(type); 1450 } 1451 1452 static UnicodeSet COUNTRY = new UnicodeSet("[a-zA-Z]").freeze(); 1453 1454 /** 1455 * Quick check for whether valid country. Not complete: should use Validity 1456 * @param territory 1457 * @return 1458 */ 1459 public static boolean isCountry(String territory) { 1460 switch (territory) { 1461 case "ZZ": 1462 case "QO": 1463 case "EU": 1464 case "UN": 1465 case "EZ": 1466 return false; 1467 default: 1468 return territory.length() == 2 && COUNTRY.containsAll(territory); 1469 } 1470 } 1471 1472 public boolean isLstregPrivateUse(String type, String code) { 1473 Map<String, String> lStregData = getLStreg().get(type).get(code); 1474 return lStregData.get("Description").equalsIgnoreCase("private use"); 1475 } 1476 1477 public boolean isLstregDeprecated(String type, String code) { 1478 Map<String, String> lStregData = getLStreg().get(type).get(code); 1479 return lStregData.get("Deprecated") != null; 1480 } 1481 } 1482