1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2011, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Mark Davis 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.util; 10 11 import java.io.BufferedReader; 12 import java.io.IOException; 13 import java.util.ArrayList; 14 import java.util.Arrays; 15 import java.util.Collections; 16 import java.util.Comparator; 17 import java.util.EnumMap; 18 import java.util.EnumSet; 19 import java.util.HashMap; 20 import java.util.HashSet; 21 import java.util.Iterator; 22 import java.util.LinkedHashMap; 23 import java.util.LinkedHashSet; 24 import java.util.List; 25 import java.util.Locale; 26 import java.util.Map; 27 import java.util.Map.Entry; 28 import java.util.Set; 29 import java.util.TreeMap; 30 import java.util.TreeSet; 31 import java.util.regex.Pattern; 32 33 import org.unicode.cldr.draft.ScriptMetadata; 34 import org.unicode.cldr.draft.ScriptMetadata.IdUsage; 35 import org.unicode.cldr.util.Iso639Data.Type; 36 import org.unicode.cldr.util.ZoneParser.ZoneLine; 37 38 import com.ibm.icu.impl.Relation; 39 import com.ibm.icu.lang.UCharacter; 40 import com.ibm.icu.text.UnicodeSet; 41 import com.ibm.icu.util.ICUUncheckedIOException; 42 import com.ibm.icu.util.Output; 43 44 /** 45 * Provides access to various codes used by CLDR: RFC 3066, ISO 4217, Olson 46 * tzids 47 */ 48 public class StandardCodes { 49 50 public enum CodeType { 51 language, script, territory, extlang, grandfathered, redundant, variant, currency, tzid; from(String name)52 public static CodeType from(String name) { 53 if ("region".equals(name)) { 54 return territory; 55 } 56 return CodeType.valueOf(name); 57 } 58 } 59 60 private static final Set<CodeType> TypeSet = Collections.unmodifiableSet(EnumSet.allOf(CodeType.class)); 61 62 private static final Set<String> TypeStringSet; 63 static { 64 LinkedHashSet<String> foo = new LinkedHashSet<String>(); 65 for (CodeType x : CodeType.values()) { x.toString()66 foo.add(x.toString()); 67 } 68 TypeStringSet = Collections.unmodifiableSet(foo); 69 } 70 71 public static final String DESCRIPTION_SEPARATOR = "\u25AA"; 72 73 public static final String NO_COUNTRY = "001"; 74 75 private static StandardCodes singleton; 76 77 private EnumMap<CodeType, Map<String, List<String>>> type_code_data = new EnumMap<CodeType, Map<String, List<String>>>( 78 CodeType.class); 79 80 private EnumMap<CodeType, Map<String, List<String>>> type_name_codes = new EnumMap<CodeType, Map<String, List<String>>>( 81 CodeType.class); 82 83 private EnumMap<CodeType, Map<String, String>> type_code_preferred = new EnumMap<CodeType, Map<String, String>>( 84 CodeType.class); 85 86 private Map<String, Set<String>> country_modernCurrency = new TreeMap<String, Set<String>>(); 87 88 private Map<CodeType, Set<String>> goodCodes = new TreeMap<CodeType, Set<String>>(); 89 90 private static final boolean DEBUG = false; 91 92 /** 93 * Get the singleton copy of the standard codes. 94 */ make()95 static public synchronized StandardCodes make() { 96 if (singleton == null) 97 singleton = new StandardCodes(); 98 return singleton; 99 } 100 101 /** 102 * The data is the name in the case of RFC3066 codes, and the country code in 103 * the case of TZIDs and ISO currency codes. If the country code is missing, 104 * uses ZZ. 105 */ getData(String type, String code)106 public String getData(String type, String code) { 107 Map<String, List<String>> code_data = getCodeData(type); 108 if (code_data == null) 109 return null; 110 List<String> list = code_data.get(code); 111 if (list == null) 112 return null; 113 return list.get(0); 114 } 115 116 /** 117 * @return the full data for the type and code For the data in lstreg, it is 118 * description | date | canonical_value | recommended_prefix # 119 * comments 120 */ getFullData(String type, String code)121 public List<String> getFullData(String type, String code) { 122 Map<String, List<String>> code_data = getCodeData(type); 123 if (code_data == null) 124 return null; 125 return code_data.get(code); 126 } 127 128 /** 129 * @return the full data for the type and code For the data in lstreg, it is 130 * description | date | canonical_value | recommended_prefix # 131 * comments 132 */ getFullData(CodeType type, String code)133 public List<String> getFullData(CodeType type, String code) { 134 Map<String, List<String>> code_data = type_code_data.get(type); 135 if (code_data == null) 136 return null; 137 return code_data.get(code); 138 } 139 getCodeData(String type)140 private Map<String, List<String>> getCodeData(String type) { 141 return getCodeData(CodeType.from(type)); 142 } 143 getCodeData(CodeType type)144 private Map<String, List<String>> getCodeData(CodeType type) { 145 return type_code_data.get(type); 146 } 147 148 /** 149 * Get at the language registry values, as a Map from label to value. 150 * 151 * @param type 152 * @param code 153 * @return 154 */ getLangData(String type, String code)155 public Map<String, String> getLangData(String type, String code) { 156 try { 157 if (type.equals("territory")) 158 type = "region"; 159 else if (type.equals("variant")) code = code.toLowerCase(Locale.ENGLISH); 160 return (Map) ((Map) getLStreg().get(type)).get(code); 161 } catch (RuntimeException e) { 162 return null; 163 } 164 } 165 166 /** 167 * Return a replacement code, if available. If not, return null. 168 * 169 */ getReplacement(String type, String code)170 public String getReplacement(String type, String code) { 171 if (type.equals("currency")) 172 return null; // no replacement codes for currencies 173 List<String> data = getFullData(type, code); 174 if (data == null) 175 return null; 176 // if available, the replacement is a non-empty value other than --, in 177 // position 2. 178 if (data.size() < 3) 179 return null; 180 String replacement = (String) data.get(2); 181 if (!replacement.equals("") && !replacement.equals("--")) 182 return replacement; 183 return null; 184 } 185 186 /** 187 * Return the list of codes that have the same data. For example, returns all 188 * currency codes for a country. If there is a preferred one, it is first. 189 * 190 * @param type 191 * @param data 192 * @return 193 */ 194 @Deprecated getCodes(String type, String data)195 public List<String> getCodes(String type, String data) { 196 return getCodes(CodeType.valueOf(type), data); 197 } 198 199 /** 200 * Return the list of codes that have the same data. For example, returns all 201 * currency codes for a country. If there is a preferred one, it is first. 202 */ getCodes(CodeType type, String data)203 public List<String> getCodes(CodeType type, String data) { 204 Map<String, List<String>> data_codes = type_name_codes.get(type); 205 if (data_codes == null) 206 return null; 207 return Collections.unmodifiableList(data_codes.get(data)); 208 } 209 210 /** 211 * Where there is a preferred code, return it. 212 */ 213 @Deprecated getPreferred(String type, String code)214 public String getPreferred(String type, String code) { 215 return getPreferred(CodeType.valueOf(type), code); 216 } 217 218 /** 219 * Where there is a preferred code, return it. 220 */ 221 getPreferred(CodeType type, String code)222 public String getPreferred(CodeType type, String code) { 223 Map<String, String> code_preferred = type_code_preferred.get(type); 224 if (code_preferred == null) 225 return code; 226 String newCode = code_preferred.get(code); 227 if (newCode == null) 228 return code; 229 return newCode; 230 } 231 232 /** 233 * Get all the available types 234 */ getAvailableTypes()235 public Set<String> getAvailableTypes() { 236 return TypeStringSet; 237 } 238 239 /** 240 * Get all the available types 241 */ getAvailableTypesEnum()242 public Set<CodeType> getAvailableTypesEnum() { 243 return TypeSet; 244 } 245 246 /** 247 * Get all the available codes for a given type 248 * 249 * @param type 250 * @return 251 */ getAvailableCodes(String type)252 public Set<String> getAvailableCodes(String type) { 253 return getAvailableCodes(CodeType.from(type)); 254 } 255 256 /** 257 * Get all the available codes for a given type 258 * 259 * @param type 260 * @return 261 */ getAvailableCodes(CodeType type)262 public Set<String> getAvailableCodes(CodeType type) { 263 Map<String, List<String>> code_name = type_code_data.get(type); 264 return Collections.unmodifiableSet(code_name.keySet()); 265 } 266 getGoodAvailableCodes(String stringType)267 public Set<String> getGoodAvailableCodes(String stringType) { 268 return getGoodAvailableCodes(CodeType.from(stringType)); 269 } 270 271 /** 272 * Get all the available "real" codes for a given type, excluding private use, 273 * but including some deprecated codes. Use SupplementalDataInfo getLocaleAliases to 274 * exclude others. 275 * 276 * @param type 277 * @return 278 */ getGoodAvailableCodes(CodeType type)279 public Set<String> getGoodAvailableCodes(CodeType type) { 280 Set<String> result = goodCodes.get(type); 281 if (result == null) { 282 synchronized (goodCodes) { 283 Map<String, List<String>> code_name = getCodeData(type); 284 SupplementalDataInfo sd = SupplementalDataInfo.getInstance(); 285 if (code_name == null) 286 return null; 287 result = new TreeSet<String>(code_name.keySet()); 288 switch (type) { 289 case currency: 290 break; // nothing special 291 case language: 292 return sd.getCLDRLanguageCodes(); 293 case script: 294 return sd.getCLDRScriptCodes(); 295 case tzid: 296 break; // nothing special 297 default: 298 for (Iterator<String> it = result.iterator(); it.hasNext();) { 299 String code = (String) it.next(); 300 if (code.equals("root") || code.equals("QO")) 301 continue; 302 List<String> data = getFullData(type, code); 303 if (data.size() < 3) { 304 if (DEBUG) 305 System.out.println(code + "\t" + data); 306 } 307 if ("PRIVATE USE".equalsIgnoreCase(data.get(0)) 308 || (!data.get(2).equals("") && !data.get(2).equals("--"))) { 309 // System.out.println("Removing: " + code); 310 it.remove(); 311 } 312 } 313 } 314 result = Collections.unmodifiableSet(result); 315 goodCodes.put(type, result); 316 } 317 } 318 return result; 319 } 320 321 private static Set<String> GOOD_COUNTRIES; 322 getGoodCountries()323 public Set<String> getGoodCountries() { 324 synchronized (goodCodes) { 325 if (GOOD_COUNTRIES == null) { 326 Set<String> temp = new LinkedHashSet<String>(); 327 for (String s : getGoodAvailableCodes(CodeType.territory)) { 328 if (isCountry(s)) { 329 temp.add(s); 330 } 331 } 332 GOOD_COUNTRIES = Collections.unmodifiableSet(temp); 333 } 334 } 335 return GOOD_COUNTRIES; 336 } 337 338 /** 339 * Gets the modern currency. 340 */ getMainCurrencies(String countryCode)341 public Set<String> getMainCurrencies(String countryCode) { 342 return country_modernCurrency.get(countryCode); 343 } 344 345 private EnumMap<Organization, Map<String, Level>> platform_locale_level = null; 346 private EnumMap<Organization, Relation<Level, String>> platform_level_locale = null; 347 private Map<String, Map<String, String>> platform_locale_levelString = null; 348 349 // /** 350 // * Get rid of this 351 // * 352 // * @param type 353 // * @return 354 // * @throws IOException 355 // * @deprecated 356 // */ 357 // public String getEffectiveLocaleType(String type) throws IOException { 358 // if ((type != null) && (getLocaleCoverageOrganizations().contains(Organization.valueOf(type)))) { 359 // return type; 360 // } else { 361 // return null; // the default.. for now.. 362 // } 363 // } 364 365 static Comparator caseless = new Comparator() { 366 367 public int compare(Object arg0, Object arg1) { 368 String s1 = (String) arg0; 369 String s2 = (String) arg1; 370 return s1.compareToIgnoreCase(s2); 371 } 372 373 }; 374 375 /** 376 * Returns locales according to status. It returns a Map of Maps, key 1 is 377 * either IBM or Java (perhaps more later), key 2 is the Level. 378 * 379 * @deprecated 380 */ getLocaleTypes()381 public Map<Organization, Map<String, Level>> getLocaleTypes() { 382 synchronized (StandardCodes.class) { 383 if (platform_locale_level == null) { 384 loadPlatformLocaleStatus(); 385 } 386 } 387 return platform_locale_level; 388 } 389 390 /** 391 * Return map of locales to levels 392 * @param org 393 * @return 394 */ getLocaleToLevel(Organization org)395 public Map<String, Level> getLocaleToLevel(Organization org) { 396 return getLocaleTypes().get(org); 397 } 398 getLocaleCoverageLevel(String organization, String desiredLocale)399 public Level getLocaleCoverageLevel(String organization, String desiredLocale) { 400 return getLocaleCoverageLevel(Organization.fromString(organization), desiredLocale); 401 } 402 getLocaleCoverageLevel(Organization organization, String desiredLocale)403 public Level getLocaleCoverageLevel(Organization organization, String desiredLocale) { 404 return getLocaleCoverageLevel(organization, desiredLocale, new Output<LocaleCoverageType>()); 405 } 406 407 public enum LocaleCoverageType { 408 explicit, parent, star, undetermined 409 } 410 411 /** 412 * Returns coverage level of locale according to organization. Returns Level.UNDETERMINED if information is missing. 413 * A locale of "*" in the data means "everything else". 414 */ getLocaleCoverageLevel(Organization organization, String desiredLocale, Output<LocaleCoverageType> coverageType)415 public Level getLocaleCoverageLevel(Organization organization, String desiredLocale, Output<LocaleCoverageType> coverageType) { 416 synchronized (StandardCodes.class) { 417 if (platform_locale_level == null) { 418 loadPlatformLocaleStatus(); 419 } 420 } 421 coverageType.value = LocaleCoverageType.undetermined; 422 if (organization == null) { 423 return Level.UNDETERMINED; 424 } 425 Map<String, Level> locale_status = platform_locale_level.get(organization); 426 if (locale_status == null) { 427 return Level.UNDETERMINED; 428 } 429 // see if there is a parent 430 String originalLocale = desiredLocale; 431 while (desiredLocale != null) { 432 Level status = locale_status.get(desiredLocale); 433 if (status != null && status != Level.UNDETERMINED) { 434 coverageType.value = originalLocale == desiredLocale ? LocaleCoverageType.explicit : LocaleCoverageType.parent; 435 return status; 436 } 437 desiredLocale = LocaleIDParser.getParent(desiredLocale); 438 } 439 Level status = locale_status.get("*"); 440 if (status != null && status != Level.UNDETERMINED) { 441 coverageType.value = LocaleCoverageType.star; 442 return status; 443 } 444 return Level.UNDETERMINED; 445 } 446 447 /** 448 * Returns coverage level of locale according to organization. Returns Level.UNDETERMINED if information is missing. 449 */ getDefaultLocaleCoverageLevel(Organization organization)450 public Level getDefaultLocaleCoverageLevel(Organization organization) { 451 return getLocaleCoverageLevel(organization, "*"); 452 } 453 getLocaleCoverageOrganizations()454 public Set<Organization> getLocaleCoverageOrganizations() { 455 synchronized (StandardCodes.class) { 456 if (platform_locale_level == null) { 457 loadPlatformLocaleStatus(); 458 } 459 } 460 return platform_locale_level.keySet(); 461 } 462 getLocaleCoverageOrganizationStrings()463 public Set<String> getLocaleCoverageOrganizationStrings() { 464 synchronized (StandardCodes.class) { 465 if (platform_locale_level == null) { 466 loadPlatformLocaleStatus(); 467 } 468 } 469 return platform_locale_levelString.keySet(); 470 } 471 getLocaleCoverageLocales(String organization)472 public Set<String> getLocaleCoverageLocales(String organization) { 473 return getLocaleCoverageLocales(Organization.fromString(organization)); 474 } 475 getLocaleCoverageLocales(Organization organization)476 public Set<String> getLocaleCoverageLocales(Organization organization) { 477 synchronized (StandardCodes.class) { 478 if (platform_locale_level == null) { 479 loadPlatformLocaleStatus(); 480 } 481 } 482 return platform_locale_level.get(organization).keySet(); 483 } 484 getLevelsToLocalesFor(Organization organization)485 public Relation<Level, String> getLevelsToLocalesFor(Organization organization) { 486 synchronized (StandardCodes.class) { 487 if (platform_level_locale == null) { 488 loadPlatformLocaleStatus(); 489 } 490 } 491 return platform_level_locale.get(organization); 492 } 493 getLocaleCoverageLocales(Organization organization, Set<Level> choice)494 public Set<String> getLocaleCoverageLocales(Organization organization, Set<Level> choice) { 495 Set<String> result = new LinkedHashSet<String>(); 496 for (String locale : getLocaleCoverageLocales(organization)) { 497 if (choice.contains(getLocaleCoverageLevel(organization, locale))) { 498 result.add(locale); 499 } 500 } 501 return result; 502 } 503 loadPlatformLocaleStatus()504 private void loadPlatformLocaleStatus() { 505 LocaleIDParser parser = new LocaleIDParser(); 506 platform_locale_level = new EnumMap<Organization, Map<String, Level>>(Organization.class); 507 SupplementalDataInfo sd = SupplementalDataInfo.getInstance(); 508 Set<String> defaultContentLocales = sd.getDefaultContentLocales(); 509 String line; 510 try { 511 BufferedReader lstreg = CldrUtility.getUTF8Data("Locales.txt"); 512 while (true) { 513 line = lstreg.readLine(); 514 if (line == null) 515 break; 516 int commentPos = line.indexOf('#'); 517 if (commentPos >= 0) { 518 line = line.substring(0, commentPos); 519 } 520 line = line.trim(); 521 if (line.length() == 0) 522 continue; 523 List<String> stuff = CldrUtility.splitList(line, ';', true); 524 Organization organization; 525 526 // verify that the organization is valid 527 try { 528 organization = Organization.fromString(stuff.get(0)); 529 } catch (Exception e) { 530 throw new IllegalArgumentException("Invalid organization in Locales.txt: " + line); 531 } 532 533 // verify that the locale is valid BCP47 534 String locale = (String) stuff.get(1); 535 if (!locale.equals("*")) { 536 parser.set(locale); 537 String valid = validate(parser); 538 if (valid.length() != 0) { 539 throw new IllegalArgumentException("Invalid locale in Locales.txt: " + line); 540 } 541 locale = parser.toString(); // normalize 542 543 // verify that the locale is not a default content locale 544 if (defaultContentLocales.contains(locale)) { 545 throw new IllegalArgumentException("Cannot have default content locale in Locales.txt: " + line); 546 } 547 } 548 549 Level status = Level.get((String) stuff.get(2)); 550 if (status == Level.UNDETERMINED) { 551 System.out.println("Warning: Level unknown on: " + line); 552 } 553 Map<String, Level> locale_status = platform_locale_level.get(organization); 554 if (locale_status == null) { 555 platform_locale_level.put(organization, locale_status = new TreeMap<String, Level>()); 556 } 557 locale_status.put(locale, status); 558 if (!locale.equals("*")) { 559 String scriptLoc = parser.getLanguageScript(); 560 if (locale_status.get(scriptLoc) == null) 561 locale_status.put(scriptLoc, status); 562 String lang = parser.getLanguage(); 563 if (locale_status.get(lang) == null) 564 locale_status.put(lang, status); 565 } 566 } 567 } catch (IOException e) { 568 throw new ICUUncheckedIOException("Internal Error", e); 569 } 570 571 // now reset the parent to be the max of the children 572 for (Organization platform : platform_locale_level.keySet()) { 573 Map<String, Level> locale_level = platform_locale_level.get(platform); 574 for (String locale : locale_level.keySet()) { 575 parser.set(locale); 576 Level childLevel = locale_level.get(locale); 577 578 String language = parser.getLanguage(); 579 if (!language.equals(locale)) { 580 Level languageLevel = (Level) locale_level.get(language); 581 if (languageLevel == null || languageLevel.compareTo(childLevel) < 0) { 582 locale_level.put(language, childLevel); 583 } 584 } 585 String oldLanguage = language; 586 language = parser.getLanguageScript(); 587 if (!language.equals(oldLanguage)) { 588 Level languageLevel = (Level) locale_level.get(language); 589 if (languageLevel == null || languageLevel.compareTo(childLevel) < 0) { 590 locale_level.put(language, childLevel); 591 } 592 } 593 } 594 } 595 // backwards compat hack 596 platform_locale_levelString = new TreeMap<String, Map<String, String>>(); 597 platform_level_locale = new EnumMap<>(Organization.class); 598 for (Organization platform : platform_locale_level.keySet()) { 599 Map<String, String> locale_levelString = new TreeMap<String, String>(); 600 platform_locale_levelString.put(platform.toString(), locale_levelString); 601 Map<String, Level> locale_level = platform_locale_level.get(platform); 602 for (String locale : locale_level.keySet()) { 603 locale_levelString.put(locale, locale_level.get(locale).toString()); 604 } 605 Relation level_locale = Relation.of(new EnumMap(Level.class), HashSet.class); 606 level_locale.addAllInverted(locale_level).freeze(); 607 platform_level_locale.put(platform, level_locale); 608 } 609 CldrUtility.protectCollection(platform_level_locale); 610 platform_locale_level = CldrUtility.protectCollection(platform_locale_level); 611 platform_locale_levelString = CldrUtility.protectCollection(platform_locale_levelString); 612 } 613 validate(LocaleIDParser parser)614 private String validate(LocaleIDParser parser) { 615 String message = ""; 616 String lang = parser.getLanguage(); 617 if (lang.length() == 0) { 618 message += ", Missing language"; 619 } else if (!getAvailableCodes("language").contains(lang)) { 620 message += ", Invalid language code: " + lang; 621 } 622 String script = parser.getScript(); 623 if (script.length() != 0 && !getAvailableCodes("script").contains(script)) { 624 message += ", Invalid script code: " + script; 625 } 626 String territory = parser.getRegion(); 627 if (territory.length() != 0 && !getAvailableCodes("territory").contains(territory)) { 628 message += ", Invalid territory code: " + lang; 629 } 630 return message.length() == 0 ? message : message.substring(2); 631 } 632 633 /** 634 * Ascertain that the given locale in in the given group specified by the 635 * organization 636 * 637 * @param locale 638 * @param group 639 * @param org 640 * @return boolean 641 */ isLocaleInGroup(String locale, String group, Organization org)642 public boolean isLocaleInGroup(String locale, String group, Organization org) { 643 return group.equals(getGroup(locale, org)); 644 } 645 isLocaleInGroup(String locale, String group, String org)646 public boolean isLocaleInGroup(String locale, String group, String org) { 647 return isLocaleInGroup(locale, group, Organization.fromString(org)); 648 } 649 getGroup(String locale, String org)650 public String getGroup(String locale, String org) { 651 return getGroup(locale, Organization.fromString(org)); 652 } 653 654 /** 655 * Gets the coverage group given a locale and org 656 * 657 * @param locale 658 * @param org 659 * @return group if availble, null if not 660 */ getGroup(String locale, Organization org)661 public String getGroup(String locale, Organization org) { 662 Level l = getLocaleCoverageLevel(org, locale); 663 if (l.equals(Level.UNDETERMINED)) { 664 return null; 665 } else { 666 return l.toString(); 667 } 668 } 669 670 // ========== PRIVATES ========== 671 StandardCodes()672 private StandardCodes() { 673 String[] files = { /* "lstreg.txt", */"ISO4217.txt" }; // , "TZID.txt" 674 type_code_preferred.put(CodeType.tzid, new TreeMap<String, String>()); 675 add(CodeType.language, "root", "Root"); 676 String originalLine = null; 677 for (int fileIndex = 0; fileIndex < files.length; ++fileIndex) { 678 try { 679 BufferedReader lstreg = CldrUtility.getUTF8Data(files[fileIndex]); 680 while (true) { 681 String line = originalLine = lstreg.readLine(); 682 if (line == null) 683 break; 684 if (line.startsWith("\uFEFF")) { 685 line = line.substring(1); 686 } 687 line = line.trim(); 688 int commentPos = line.indexOf('#'); 689 String comment = ""; 690 if (commentPos >= 0) { 691 comment = line.substring(commentPos + 1).trim(); 692 line = line.substring(0, commentPos); 693 } 694 if (line.length() == 0) 695 continue; 696 List<String> pieces = CldrUtility.splitList(line, '|', true, 697 new ArrayList<String>()); 698 CodeType type = CodeType.from(pieces.get(0)); 699 pieces.remove(0); 700 701 String code = pieces.get(0); 702 pieces.remove(0); 703 if (type.equals("date")) { 704 continue; 705 } 706 707 String oldName = pieces.get(0); 708 int pos = oldName.indexOf(';'); 709 if (pos >= 0) { 710 oldName = oldName.substring(0, pos).trim(); 711 pieces.set(0, oldName); 712 } 713 714 List<String> data = pieces; 715 if (comment.indexOf("deprecated") >= 0) { 716 // System.out.println(originalLine); 717 if (data.get(2).toString().length() == 0) { 718 data.set(2, "--"); 719 } 720 } 721 if (oldName.equalsIgnoreCase("PRIVATE USE")) { 722 int separatorPos = code.indexOf(".."); 723 if (separatorPos < 0) { 724 add(type, code, data); 725 } else { 726 String current = code.substring(0, separatorPos); 727 String end = code.substring(separatorPos + 2); 728 // System.out.println(">>" + code + "\t" + current + "\t" + end); 729 for (; current.compareTo(end) <= 0; current = nextAlpha(current)) { 730 // System.out.println(">" + current); 731 add(type, current, data); 732 } 733 } 734 continue; 735 } 736 if (!type.equals("tzid")) { 737 add(type, code, data); 738 if (type.equals("currency")) { 739 // currency | TPE | Timor Escudo | TP | EAST TIMOR | O 740 if (data.get(3).equals("C")) { 741 String country = (String) data.get(1); 742 Set<String> codes = country_modernCurrency.get(country); 743 if (codes == null) { 744 country_modernCurrency.put(country, codes = new TreeSet<String>()); 745 } 746 codes.add(code); 747 } 748 } 749 continue; 750 } 751 // type = tzid 752 // List codes = (List) Utility.splitList(code, ',', true, new 753 // ArrayList()); 754 String preferred = null; 755 for (int i = 0; i < pieces.size(); ++i) { 756 code = (String) pieces.get(i); 757 add(type, code, data); 758 if (preferred == null) 759 preferred = code; 760 else { 761 Map<String, String> code_preferred = type_code_preferred.get(type); 762 code_preferred.put(code, preferred); 763 } 764 } 765 } 766 lstreg.close(); 767 } catch (Exception e) { 768 System.err.println("WARNING: " + files[fileIndex] 769 + " may be a corrupted UTF-8 file. Please check."); 770 throw (IllegalArgumentException) new IllegalArgumentException( 771 "Can't read " + files[fileIndex] + "\t" + originalLine) 772 .initCause(e); 773 } 774 country_modernCurrency = CldrUtility.protectCollection(country_modernCurrency); 775 } 776 777 // data is: description | date | canonical_value | recommended_prefix # 778 // comments 779 // HACK, just rework 780 781 Map<String, Map<String, Map<String, String>>> languageRegistry = getLStreg(); 782 // languageRegistry = CldrUtility.protectCollection(languageRegistry); 783 784 for (String type : languageRegistry.keySet()) { 785 CodeType type2 = CodeType.from(type); 786 Map<String, Map<String, String>> m = languageRegistry.get(type); 787 for (String code : m.keySet()) { 788 Map<String, String> mm = m.get(code); 789 List<String> data = new ArrayList<String>(0); 790 data.add(mm.get("Description")); 791 data.add(mm.get("Added")); 792 String pref = mm.get("Preferred-Value"); 793 if (pref == null) { 794 pref = mm.get("Deprecated"); 795 if (pref == null) 796 pref = ""; 797 else 798 pref = "deprecated"; 799 } 800 data.add(pref); 801 if (type.equals("variant")) { 802 code = code.toUpperCase(); 803 } 804 // data.add(mm.get("Recommended_Prefix")); 805 // {"region", "BQ", "Description", "British Antarctic Territory", 806 // "Preferred-Value", "AQ", "CLDR", "True", "Deprecated", "True"}, 807 add(type2, code, data); 808 } 809 } 810 811 Map<String, List<String>> m = getZoneData(); 812 for (Iterator<String> it = m.keySet().iterator(); it.hasNext();) { 813 String code = it.next(); 814 add(CodeType.tzid, code, m.get(code).toString()); 815 } 816 } 817 818 /** 819 * @param current 820 * @return 821 */ nextAlpha(String current)822 private static String nextAlpha(String current) { 823 // Don't care that this is inefficient 824 int value = 0; 825 for (int i = 0; i < current.length(); ++i) { 826 char c = current.charAt(i); 827 c -= c < 'a' ? 'A' : 'a'; 828 value = value * 26 + c; 829 } 830 value += 1; 831 String result = ""; 832 for (int i = 0; i < current.length(); ++i) { 833 result = (char) ((value % 26) + 'A') + result; 834 value = value / 26; 835 } 836 if (UCharacter.toLowerCase(current).equals(current)) { 837 result = UCharacter.toLowerCase(result); 838 } else if (UCharacter.toUpperCase(current).equals(current)) { 839 // do nothing 840 } else { 841 result = UCharacter.toTitleCase(result, null); 842 } 843 return result; 844 } 845 846 /** 847 * @param string 848 * @param string2 849 * @param string3 850 */ 851 private void add(CodeType type, String string2, String string3) { 852 List<String> l = new ArrayList<String>(); 853 l.add(string3); 854 add(type, string2, l); 855 } 856 857 private void add(CodeType type, String code, List<String> otherData) { 858 // hack 859 if (type == CodeType.script) { 860 if (code.equals("Qaai")) { 861 otherData = new ArrayList<String>(otherData); 862 otherData.set(0, "Inherited"); 863 } else if (code.equals("Zyyy")) { 864 otherData = new ArrayList<String>(otherData); 865 otherData.set(0, "Common"); 866 } 867 } 868 869 // assume name is the first item 870 871 String name = otherData.get(0); 872 873 // add to main list 874 Map<String, List<String>> code_data = getCodeData(type); 875 if (code_data == null) { 876 code_data = new TreeMap<String, List<String>>(); 877 type_code_data.put(type, code_data); 878 } 879 List<String> lastData = code_data.get(code); 880 if (lastData != null) { 881 lastData.addAll(otherData); 882 } else { 883 code_data.put(code, otherData); 884 } 885 886 // now add mapping from name to codes 887 Map<String, List<String>> name_codes = type_name_codes.get(type); 888 if (name_codes == null) { 889 name_codes = new TreeMap<String, List<String>>(); 890 type_name_codes.put(type, name_codes); 891 } 892 List<String> codes = name_codes.get(name); 893 if (codes == null) { 894 codes = new ArrayList<String>(); 895 name_codes.put(name, codes); 896 } 897 codes.add(code); 898 } 899 900 private List<String> DELETED3166 = Collections.unmodifiableList(Arrays 901 .asList(new String[] { "BQ", "BU", "CT", "DD", "DY", "FQ", "FX", "HV", 902 "JT", "MI", "NH", "NQ", "NT", "PC", "PU", "PZ", "RH", "SU", "TP", 903 "VD", "WK", "YD", "YU", "ZR" })); 904 905 public List<String> getOld3166() { 906 return DELETED3166; 907 } 908 909 private Map<String, List<String>> WorldBankInfo; 910 911 public Map<String, List<String>> getWorldBankInfo() { 912 if (WorldBankInfo == null) { 913 List<String> temp = fillFromCommaFile("WorldBankInfo.txt", false); 914 WorldBankInfo = new HashMap<String, List<String>>(); 915 for (String line : temp) { 916 List<String> row = CldrUtility.splitList(line, ';', true); 917 String key = row.get(0); 918 row.remove(0); 919 WorldBankInfo.put(key, row); 920 } 921 WorldBankInfo = CldrUtility.protectCollection(WorldBankInfo); 922 } 923 return WorldBankInfo; 924 } 925 926 Set<String> moribundLanguages; 927 928 public Set<String> getMoribundLanguages() { 929 if (moribundLanguages == null) { 930 List<String> temp = fillFromCommaFile("moribund_languages.txt", true); 931 moribundLanguages = new TreeSet<String>(); 932 moribundLanguages.addAll(temp); 933 moribundLanguages = CldrUtility.protectCollection(moribundLanguages); 934 } 935 return moribundLanguages; 936 } 937 938 // produces a list of the 'clean' lines 939 private List<String> fillFromCommaFile(String filename, boolean trim) { 940 try { 941 List<String> result = new ArrayList<String>(); 942 String line; 943 BufferedReader lstreg = CldrUtility.getUTF8Data(filename); 944 while (true) { 945 line = lstreg.readLine(); 946 if (line == null) 947 break; 948 int commentPos = line.indexOf('#'); 949 if (commentPos >= 0) { 950 line = line.substring(0, commentPos); 951 } 952 if (trim) { 953 line = line.trim(); 954 } 955 if (line.length() == 0) 956 continue; 957 result.add(line); 958 } 959 return result; 960 } catch (Exception e) { 961 throw (RuntimeException) new IllegalArgumentException( 962 "Can't process file: data/" + filename).initCause(e); 963 } 964 } 965 966 // return a complex map. language -> arn -> {"Comments" -> "x", 967 // "Description->y,...} 968 static String[][] extras = { 969 { "language", "root", "Description", "Root", "CLDR", "True" }, 970 // { "language", "cch", "Description", "Atsam", "CLDR", "True" }, 971 // { "language", "kaj", "Description", "Jju", "CLDR", "True" }, 972 // { "language", "kcg", "Description", "Tyap", "CLDR", "True" }, 973 // { "language", "kfo", "Description", "Koro", "CLDR", "True" }, 974 // { "language", "mfe", "Description", "Morisyen", "CLDR", "True" }, 975 // { "region", "172", "Description", "Commonwealth of Independent States", "CLDR", "True" }, 976 // { "region", "062", "Description", "South-Central Asia", "CLDR", "True" }, 977 // { "region", "003", "Description", "North America", "CLDR", "True" }, 978 // { "variant", "POLYTONI", "Description", "Polytonic Greek", "CLDR", "True", "Preferred-Value", "POLYTON" }, 979 { "variant", "REVISED", "Description", "Revised Orthography", "CLDR", "True" }, 980 { "variant", "SAAHO", "Description", "Dialect", "CLDR", "True" }, 981 { "variant", "POSIX", "Description", "Computer-Style", "CLDR", "True" }, 982 // {"region", "172", "Description", "Commonwealth of Independent States", 983 // "CLDR", "True"}, 984 // { "region", "", "Description", "European Union", "CLDR", "True" }, 985 { "region", "ZZ", "Description", "Unknown or Invalid Region", "CLDR", "True" }, 986 { "region", "QO", "Description", "Outlying Oceania", "CLDR", "True" }, 987 { "region", "XK", "Description", "Kosovo", "CLDR", "True" }, 988 { "script", "Qaai", "Description", "Inherited", "CLDR", "True" }, 989 // {"region", "003", "Description", "North America", "CLDR", "True"}, 990 // {"region", "062", "Description", "South-central Asia", "CLDR", "True"}, 991 // {"region", "200", "Description", "Czechoslovakia", "CLDR", "True"}, 992 // {"region", "830", "Description", "Channel Islands", "CLDR", "True"}, 993 // {"region", "833", "Description", "Isle of Man", "CLDR", "True"}, 994 995 // {"region", "NT", "Description", "Neutral Zone (formerly between Saudi 996 // Arabia & Iraq)", "CLDR", "True", "Deprecated", "True"}, 997 // {"region", "SU", "Description", "Union of Soviet Socialist Republics", 998 // "CLDR", "True", "Deprecated", "True"}, 999 // {"region", "BQ", "Description", "British Antarctic Territory", 1000 // "Preferred-Value", "AQ", "CLDR", "True", "Deprecated", "True"}, 1001 // {"region", "CT", "Description", "Canton and Enderbury Islands", 1002 // "Preferred-Value", "KI", "CLDR", "True", "Deprecated", "True"}, 1003 // {"region", "FQ", "Description", "French Southern and Antarctic Territories 1004 // (now split between AQ and TF)", "CLDR", "True", "Deprecated", "True"}, 1005 // {"region", "JT", "Description", "Johnston Island", "Preferred-Value", "UM", 1006 // "CLDR", "True", "Deprecated", "True"}, 1007 // {"region", "MI", "Description", "Midway Islands", "Preferred-Value", "UM", 1008 // "CLDR", "True", "Deprecated", "True"}, 1009 // {"region", "NQ", "Description", "Dronning Maud Land", "Preferred-Value", 1010 // "AQ", "CLDR", "True", "Deprecated", "True"}, 1011 // {"region", "PC", "Description", "Pacific Islands Trust Territory (divided 1012 // into FM, MH, MP, and PW)", "Preferred-Value", "AQ", "CLDR", "True", 1013 // "Deprecated", "True"}, 1014 // {"region", "PU", "Description", "U.S. Miscellaneous Pacific Islands", 1015 // "Preferred-Value", "UM", "CLDR", "True", "Deprecated", "True"}, 1016 // {"region", "PZ", "Description", "Panama Canal Zone", "Preferred-Value", 1017 // "PA", "CLDR", "True", "Deprecated", "True"}, 1018 // {"region", "VD", "Description", "North Vietnam", "Preferred-Value", "VN", 1019 // "CLDR", "True", "Deprecated", "True"}, 1020 // {"region", "WK", "Description", "Wake Island", "Preferred-Value", "UM", 1021 // "CLDR", "True", "Deprecated", "True"}, 1022 }; 1023 1024 static final String registryName = CldrUtility.getProperty("registry", "language-subtag-registry"); 1025 1026 public enum LstrType { 1027 language("und", "zxx", "mul", "mis", "root"), 1028 script("Zzzz", "Zsym", "Zxxx", "Zmth"), 1029 region("ZZ"), 1030 variant(), 1031 extlang(true, false), 1032 grandfathered(true, false), 1033 redundant(true, false), 1034 /** specialized codes for validity; TODO: rename LstrType **/ 1035 currency(false, true, "XXX"), 1036 subdivision(false, true), 1037 unit(false, true); 1038 1039 public final Set<String> specials; 1040 public final String unknown; 1041 public final boolean isLstr; 1042 public final boolean isUnicode; 1043 1044 private LstrType(String... unknownValue) { 1045 this(true, true, unknownValue); 1046 } 1047 1048 private LstrType(boolean lstr, boolean unicode, String... unknownValue) { 1049 unknown = unknownValue.length == 0 ? null : unknownValue[0]; 1050 LinkedHashSet<String> set = new LinkedHashSet<>(Arrays.asList(unknownValue)); 1051 if (unknown != null) { 1052 set.remove(unknown); 1053 } 1054 specials = Collections.unmodifiableSet(set); 1055 isLstr = lstr; 1056 isUnicode = unicode; 1057 } 1058 1059 // 1060 static final Pattern WELLFORMED = Pattern.compile("([0-9]{3}|[a-zA-Z]{2})[a-zA-Z0-9]{1,4}"); 1061 1062 boolean isWellFormed(String candidate) { 1063 switch (this) { 1064 case subdivision: 1065 return WELLFORMED.matcher(candidate).matches(); 1066 default: 1067 throw new UnsupportedOperationException(); 1068 } 1069 } 1070 } 1071 1072 public enum LstrField { 1073 Type, Subtag, Description, Added, Scope, Tag, Suppress_Script, Macrolanguage, Deprecated, Preferred_Value, Comments, Prefix, CLDR; 1074 public static LstrField from(String s) { 1075 return LstrField.valueOf(s.trim().replace("-", "_")); 1076 } 1077 } 1078 1079 static Map<String, Map<String, Map<String, String>>> LSTREG; 1080 static Map<LstrType, Map<String, Map<LstrField, String>>> LSTREG_ENUM; 1081 static Map<LstrType, Map<String, Map<LstrField, String>>> LSTREG_RAW; 1082 1083 /** 1084 * Returns a map like {extlang={aao={Added=2009-07-29, Description=Algerian Saharan Arabic, ...<br> 1085 * That is, type => subtype => map<tag,value>. Descriptions are concatenated together, separated by 1086 * DESCRIPTION_SEPARATOR. 1087 * 1088 * @return 1089 */ 1090 public static Map<String, Map<String, Map<String, String>>> getLStreg() { 1091 if (LSTREG == null) { 1092 initLstr(); 1093 } 1094 return LSTREG; 1095 } 1096 1097 /** 1098 * Returns a map like {extlang={aao={Added=2009-07-29, Description=Algerian Saharan Arabic, ...<br> 1099 * That is, type => subtype => map<tag,value>. Descriptions are concatenated together, separated by 1100 * DESCRIPTION_SEPARATOR. 1101 * 1102 * @return 1103 */ 1104 public static Map<LstrType, Map<String, Map<LstrField, String>>> getEnumLstreg() { 1105 if (LSTREG_ENUM == null) { 1106 initLstr(); 1107 } 1108 return LSTREG_ENUM; 1109 } 1110 1111 public static Map<LstrType, Map<String, Map<LstrField, String>>> getLstregEnumRaw() { 1112 if (LSTREG_ENUM == null) { 1113 initLstr(); 1114 } 1115 return LSTREG_RAW; 1116 } 1117 1118 private static void initLstr() { 1119 Map<LstrType, Map<String, Map<LstrField, String>>> result2 = new TreeMap<LstrType, Map<String, Map<LstrField, String>>>(); 1120 1121 int lineNumber = 1; 1122 1123 Set<String> funnyTags = new TreeSet<String>(); 1124 String line; 1125 try { 1126 BufferedReader lstreg = CldrUtility.getUTF8Data(registryName); 1127 LstrType lastType = null; 1128 String lastTag = null; 1129 Map<String, Map<LstrField, String>> subtagData = null; 1130 Map<LstrField, String> currentData = null; 1131 LstrField lastLabel = null; 1132 String lastRest = null; 1133 boolean inRealContent = false; 1134 // Map<String, String> translitCache = new HashMap<String, String>(); 1135 for (;; ++lineNumber) { 1136 line = lstreg.readLine(); 1137 if (line == null) 1138 break; 1139 if (line.length() == 0) 1140 continue; // skip blanks 1141 if (line.startsWith("File-Date: ")) { 1142 if (DEBUG) System.out.println("Language Subtag Registry: " + line); 1143 inRealContent = true; 1144 continue; 1145 } 1146 if (!inRealContent) { 1147 // skip until we get to real content 1148 continue; 1149 } 1150 // skip cruft 1151 if (line.startsWith("Internet-Draft")) { 1152 continue; 1153 } 1154 if (line.startsWith("Ewell")) { 1155 continue; 1156 } 1157 if (line.startsWith("\f")) { 1158 continue; 1159 } 1160 if (line.startsWith("4. Security Considerations")) { 1161 break; 1162 } 1163 1164 if (line.startsWith("%%")) 1165 continue; // skip separators (ok, since data starts with Type: 1166 if (line.startsWith(" ")) { 1167 currentData.put(lastLabel, lastRest + " " + line.trim()); 1168 continue; 1169 } 1170 1171 /* 1172 * Type: language Subtag: aa Description: Afar Added: 2005-10-16 1173 * Suppress-Script: Latn 1174 */ 1175 int pos2 = line.indexOf(':'); 1176 LstrField label = LstrField.from(line.substring(0, pos2)); 1177 String rest = line.substring(pos2 + 1).trim(); 1178 if (label == LstrField.Type) { 1179 subtagData = CldrUtility.get(result2, lastType = LstrType.valueOf(rest)); 1180 if (subtagData == null) { 1181 result2.put(LstrType.valueOf(rest), subtagData = new TreeMap<String, Map<LstrField, String>>()); 1182 } 1183 } else if (label == LstrField.Subtag 1184 || label == LstrField.Tag) { 1185 lastTag = rest; 1186 String endTag = null; 1187 // Subtag: qaa..qtz 1188 int pos = lastTag.indexOf(".."); 1189 if (pos >= 0) { 1190 endTag = lastTag.substring(pos + 2); 1191 lastTag = lastTag.substring(0, pos); 1192 } 1193 currentData = new TreeMap<LstrField, String>(); 1194 if (endTag == null) { 1195 putSubtagData(lastTag, subtagData, currentData); 1196 languageCount.add(lastType, 1); 1197 // System.out.println(languageCount.getCount(lastType) + "\t" + lastType + "\t" + lastTag); 1198 } else { 1199 for (; lastTag.compareTo(endTag) <= 0; lastTag = nextAlpha(lastTag)) { 1200 // System.out.println(">" + current); 1201 putSubtagData(lastTag, subtagData, currentData); 1202 languageCount.add(lastType, 1); 1203 // System.out.println(languageCount.getCount(lastType) + "\t" + lastType + "\t" + lastTag); 1204 } 1205 1206 } 1207 // label.equalsIgnoreCase("Added") || label.equalsIgnoreCase("Suppress-Script")) { 1208 // skip 1209 // } else if (pieces.length < 2) { 1210 // System.out.println("Odd Line: " + lastType + "\t" + lastTag + "\t" + line); 1211 } else { 1212 lastLabel = label; 1213 // The following code was removed because in the standard tests (TestAll) both lastRest and rest were always equal. 1214 // if(!translitCache.containsKey(rest)) { 1215 // lastRest = TransliteratorUtilities.fromXML.transliterate(rest); 1216 // translitCache.put(rest, lastRest); 1217 // if (!lastRest.equals(rest)) { 1218 // System.out.println(System.currentTimeMillis()+" initLStr: LastRest: '"+lastRest+"' Rest: '"+rest+"'"); 1219 // } 1220 // } else { 1221 // lastRest = translitCache.get(rest); 1222 // } 1223 lastRest = rest; 1224 String oldValue = (String) CldrUtility.get(currentData, lastLabel); 1225 if (oldValue != null) { 1226 lastRest = oldValue + DESCRIPTION_SEPARATOR + lastRest; 1227 } 1228 currentData.put(lastLabel, lastRest); 1229 } 1230 } 1231 } catch (Exception e) { 1232 throw (RuntimeException) new IllegalArgumentException( 1233 "Can't process file: data/" 1234 + registryName + ";\t at line " + lineNumber).initCause(e); 1235 } finally { 1236 if (!funnyTags.isEmpty()) { 1237 if (DEBUG) 1238 System.out.println("Funny tags: " + funnyTags); 1239 } 1240 } 1241 // copy raw 1242 Map<LstrType, Map<String, Map<LstrField, String>>> rawLstreg = new TreeMap<LstrType, Map<String, Map<LstrField, String>>>(); 1243 for (Entry<LstrType, Map<String, Map<LstrField, String>>> entry1 : result2.entrySet()) { 1244 LstrType key1 = entry1.getKey(); 1245 TreeMap<String, Map<LstrField, String>> raw1 = new TreeMap<String, Map<LstrField, String>>(); rawLstreg.put(key1, raw1)1246 rawLstreg.put(key1, raw1); 1247 for (Entry<String, Map<LstrField, String>> entry2 : entry1.getValue().entrySet()) { 1248 String key2 = entry2.getKey(); 1249 final Map<LstrField, String> value2 = entry2.getValue(); 1250 TreeMap<LstrField, String> raw2 = new TreeMap<LstrField, String>(); 1251 raw2.putAll(value2); raw1.put(key2, raw2)1252 raw1.put(key2, raw2); 1253 } 1254 } 1255 LSTREG_RAW = CldrUtility.protectCollection(rawLstreg); 1256 1257 // add extras 1258 for (int i = 0; i < extras.length; ++i) { 1259 Map<String, Map<LstrField, String>> subtagData = CldrUtility.get(result2, LstrType.valueOf(extras[i][0])); 1260 if (subtagData == null) { LstrType.valueOf(extras[i][0])1261 result2.put(LstrType.valueOf(extras[i][0]), subtagData = new TreeMap<String, Map<LstrField, String>>()); 1262 } 1263 Map<LstrField, String> labelData = new TreeMap<LstrField, String>(); 1264 for (int j = 2; j < extras[i].length; j += 2) { LstrField.from(extras[i][j])1265 labelData.put(LstrField.from(extras[i][j]), extras[i][j + 1]); 1266 } 1267 Map<LstrField, String> old = CldrUtility.get(subtagData, extras[i][1]); 1268 if (old != null) { 1269 if (!"Private use".equals(CldrUtility.get(old, LstrField.Description))) { 1270 throw new IllegalArgumentException("REPLACING data for " + extras[i][1] + "\t" + old + "\twith" 1271 + labelData); 1272 } 1273 } 1274 if (false) { 1275 System.out.println((old != null ? "REPLACING" + "\t" + old : "ADDING") + 1276 " data for " + extras[i][1] + "\twith" + labelData); 1277 } subtagData.put(extras[i][1], labelData)1278 subtagData.put(extras[i][1], labelData); 1279 } 1280 // build compatibility map 1281 Map<String, Map<String, Map<String, String>>> result = new LinkedHashMap<String, Map<String, Map<String, String>>>(); 1282 for (Entry<LstrType, Map<String, Map<LstrField, String>>> entry : result2.entrySet()) { 1283 Map<String, Map<String, String>> copy2 = new LinkedHashMap<String, Map<String, String>>(); 1284 result.put(entry.getKey().toString(), copy2); 1285 for (Entry<String, Map<LstrField, String>> entry2 : entry.getValue().entrySet()) { 1286 Map<String, String> copy3 = new LinkedHashMap<String, String>(); entry2.getKey()1287 copy2.put(entry2.getKey(), copy3); 1288 for (Entry<LstrField, String> entry3 : entry2.getValue().entrySet()) { entry3.getValue()1289 copy3.put(entry3.getKey().toString(), entry3.getValue()); 1290 } 1291 } 1292 } 1293 LSTREG = CldrUtility.protectCollection(result); 1294 LSTREG_ENUM = CldrUtility.protectCollection(result2); 1295 } 1296 1297 private static <K, K2, V> Map<K2, V> putSubtagData(K lastTag, Map<K, Map<K2, V>> subtagData, Map<K2, V> currentData) { 1298 Map<K2, V> oldData = subtagData.get(lastTag); 1299 if (oldData != null) { 1300 if (oldData.get("CLDR") != null) { 1301 System.out.println("overriding: " + lastTag + ", " + oldData); 1302 } else { 1303 throw new IllegalArgumentException("Duplicate tag: " + lastTag); 1304 } 1305 } 1306 return subtagData.put(lastTag, currentData); 1307 } 1308 1309 static Counter<LstrType> languageCount = new Counter<LstrType>(); 1310 1311 public static Counter<LstrType> getLanguageCount() { 1312 return languageCount; 1313 } 1314 1315 ZoneParser zoneParser = new ZoneParser(); 1316 1317 // static public final Set<String> MODERN_SCRIPTS = Collections 1318 // .unmodifiableSet(new TreeSet( 1319 // // "Bali " + 1320 // // "Bugi " + 1321 // // "Copt " + 1322 // // "Hano " + 1323 // // "Osma " + 1324 // // "Qaai " + 1325 // // "Sylo " + 1326 // // "Syrc " + 1327 // // "Tagb " + 1328 // // "Tglg " + 1329 // Arrays 1330 // .asList("Hans Hant Jpan Hrkt Kore Arab Armn Bali Beng Bopo Cans Cham Cher Cyrl Deva Ethi Geor Grek Gujr Guru Hani Hang Hebr Hira Knda Kana Kali Khmr Laoo Latn Lepc Limb Mlym Mong Mymr Talu Nkoo Olck Orya Saur Sinh Tale Taml Telu Thaa Thai Tibt Tfng Vaii Yiii" 1331 // .split("\\s+")))); 1332 1333 // updated to http://www.unicode.org/reports/tr31/tr31-9.html#Specific_Character_Adjustments 1334 1335 /** 1336 * @deprecated 1337 */ 1338 public Map<String, List<ZoneLine>> getZone_rules() { 1339 return zoneParser.getZone_rules(); 1340 } 1341 1342 /** 1343 * @deprecated 1344 */ 1345 public Map<String, List<String>> getZoneData() { 1346 return zoneParser.getZoneData(); 1347 } 1348 1349 /** 1350 * @deprecated 1351 */ 1352 public Set<String> getCanonicalTimeZones() { 1353 return zoneParser.getZoneData().keySet(); 1354 } 1355 1356 /** 1357 * @deprecated 1358 */ 1359 public Map<String, Set<String>> getCountryToZoneSet() { 1360 return zoneParser.getCountryToZoneSet(); 1361 } 1362 1363 /** 1364 * @deprecated 1365 */ 1366 public List<String> getDeprecatedZoneIDs() { 1367 return zoneParser.getDeprecatedZoneIDs(); 1368 } 1369 1370 /** 1371 * @deprecated 1372 */ 1373 public Comparator<String> getTZIDComparator() { 1374 return zoneParser.getTZIDComparator(); 1375 } 1376 1377 /** 1378 * @deprecated 1379 */ 1380 public Map<String, Set<String>> getZoneLinkNew_OldSet() { 1381 return zoneParser.getZoneLinkNew_OldSet(); 1382 } 1383 1384 /** 1385 * @deprecated 1386 */ 1387 public Map<String, String> getZoneLinkold_new() { 1388 return zoneParser.getZoneLinkold_new(); 1389 } 1390 1391 /** 1392 * @deprecated 1393 */ 1394 public Map getZoneRuleID_rules() { 1395 return zoneParser.getZoneRuleID_rules(); 1396 } 1397 1398 /** 1399 * @deprecated 1400 */ 1401 public Map<String, String> getZoneToCounty() { 1402 return zoneParser.getZoneToCounty(); 1403 } 1404 1405 /** 1406 * @deprecated 1407 */ 1408 public String getZoneVersion() { 1409 return zoneParser.getVersion(); 1410 } 1411 1412 public static String fixLanguageTag(String languageSubtag) { 1413 if (languageSubtag.equals("mo")) { // fix special cases 1414 return "ro"; 1415 } else if (languageSubtag.equals("no")) { 1416 return "nb"; 1417 } 1418 return languageSubtag; 1419 } 1420 1421 public boolean isModernLanguage(String languageCode) { 1422 if (getMoribundLanguages().contains(languageCode)) return false; 1423 Type type = Iso639Data.getType(languageCode); 1424 if (type == Type.Living) return true; 1425 if (languageCode.equals("eo")) return true; // exception for Esperanto 1426 // Scope scope = Iso639Data.getScope(languageCode); 1427 // if (scope == Scope.Collection) return false; 1428 return false; 1429 } 1430 1431 public static boolean isScriptModern(String script) { 1432 ScriptMetadata.Info info = ScriptMetadata.getInfo(script); 1433 if (info == null) { 1434 if (false) throw new IllegalArgumentException("No script metadata for: " + script); 1435 return false; 1436 } 1437 IdUsage idUsage = info.idUsage; 1438 return idUsage != IdUsage.EXCLUSION && idUsage != IdUsage.UNKNOWN; 1439 } 1440 1441 static final Pattern whitespace = PatternCache.get("\\s+"); 1442 static Set<String> filteredCurrencies = null; 1443 1444 public Set<String> getSurveyToolDisplayCodes(String type) { 1445 return getGoodAvailableCodes(type); 1446 } 1447 1448 static UnicodeSet COUNTRY = new UnicodeSet("[a-zA-Z]").freeze(); 1449 1450 /** 1451 * Quick check for whether valid country. Not complete: should use Validity 1452 * @param territory 1453 * @return 1454 */ 1455 public static boolean isCountry(String territory) { 1456 switch (territory) { 1457 case "ZZ": 1458 case "QO": 1459 case "EU": 1460 case "UN": 1461 case "EZ": 1462 return false; 1463 default: 1464 return territory.length() == 2 && COUNTRY.containsAll(territory); 1465 } 1466 } 1467 1468 public boolean isLstregPrivateUse(String type, String code) { 1469 Map<String, String> lStregData = getLStreg().get(type).get(code); 1470 return lStregData.get("Description").equalsIgnoreCase("private use"); 1471 } 1472 1473 public boolean isLstregDeprecated(String type, String code) { 1474 Map<String, String> lStregData = getLStreg().get(type).get(code); 1475 return lStregData.get("Deprecated") != null; 1476 } 1477 } 1478