• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  **********************************************************************
3  * Copyright (c) 2002-2011, International Business Machines
4  * Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  * Author: Mark Davis
7  **********************************************************************
8  */
9 package org.unicode.cldr.util;
10 
11 import java.io.BufferedReader;
12 import java.io.IOException;
13 import java.util.ArrayList;
14 import java.util.Arrays;
15 import java.util.Collections;
16 import java.util.Comparator;
17 import java.util.EnumMap;
18 import java.util.EnumSet;
19 import java.util.HashMap;
20 import java.util.HashSet;
21 import java.util.Iterator;
22 import java.util.LinkedHashMap;
23 import java.util.LinkedHashSet;
24 import java.util.List;
25 import java.util.Locale;
26 import java.util.Map;
27 import java.util.Map.Entry;
28 import java.util.Set;
29 import java.util.TreeMap;
30 import java.util.TreeSet;
31 import java.util.regex.Pattern;
32 
33 import org.unicode.cldr.draft.ScriptMetadata;
34 import org.unicode.cldr.draft.ScriptMetadata.IdUsage;
35 import org.unicode.cldr.util.Iso639Data.Type;
36 import org.unicode.cldr.util.ZoneParser.ZoneLine;
37 
38 import com.ibm.icu.impl.Relation;
39 import com.ibm.icu.lang.UCharacter;
40 import com.ibm.icu.text.UnicodeSet;
41 import com.ibm.icu.util.ICUUncheckedIOException;
42 import com.ibm.icu.util.Output;
43 
44 /**
45  * Provides access to various codes used by CLDR: RFC 3066, ISO 4217, Olson
46  * tzids
47  */
48 public class StandardCodes {
49 
50     public enum CodeType {
51         language, script, territory, extlang, legacy, redundant, variant, currency, tzid;
from(String name)52         public static CodeType from(String name) {
53             if ("region".equals(name)) {
54                 return territory;
55             }
56             return CodeType.valueOf(name);
57         }
58     }
59 
60     private static final Set<CodeType> TypeSet = Collections.unmodifiableSet(EnumSet.allOf(CodeType.class));
61 
62     private static final Set<String> TypeStringSet;
63     static {
64         LinkedHashSet<String> foo = new LinkedHashSet<>();
65         for (CodeType x : CodeType.values()) {
x.toString()66             foo.add(x.toString());
67         }
68         TypeStringSet = Collections.unmodifiableSet(foo);
69     }
70 
71     public static final String DESCRIPTION_SEPARATOR = "\u25AA";
72 
73     public static final String NO_COUNTRY = "001";
74 
75     private EnumMap<CodeType, Map<String, List<String>>> type_code_data = new EnumMap<>(
76         CodeType.class);
77 
78     private EnumMap<CodeType, Map<String, List<String>>> type_name_codes = new EnumMap<>(
79         CodeType.class);
80 
81     private EnumMap<CodeType, Map<String, String>> type_code_preferred = new EnumMap<>(
82         CodeType.class);
83 
84     private Map<String, Set<String>> country_modernCurrency = new TreeMap<>();
85 
86     private Map<CodeType, Set<String>> goodCodes = new TreeMap<>();
87 
88     private static final boolean DEBUG = false;
89 
90     private static final class StandardCodesHelper {
91         static final StandardCodes SINGLETON = new StandardCodes();
92     }
93     /**
94      * Get the singleton copy of the standard codes.
95      */
make()96     static public synchronized StandardCodes make() {
97         return StandardCodesHelper.SINGLETON;
98     }
99 
100     /**
101      * The data is the name in the case of RFC3066 codes, and the country code in
102      * the case of TZIDs and ISO currency codes. If the country code is missing,
103      * uses ZZ.
104      */
getData(String type, String code)105     public String getData(String type, String code) {
106         Map<String, List<String>> code_data = getCodeData(type);
107         if (code_data == null)
108             return null;
109         List<String> list = code_data.get(code);
110         if (list == null)
111             return null;
112         return list.get(0);
113     }
114 
115     /**
116      * @return the full data for the type and code For the data in lstreg, it is
117      *         description | date | canonical_value | recommended_prefix #
118      *         comments
119      */
getFullData(String type, String code)120     public List<String> getFullData(String type, String code) {
121         Map<String, List<String>> code_data = getCodeData(type);
122         if (code_data == null)
123             return null;
124         return code_data.get(code);
125     }
126 
127     /**
128      * @return the full data for the type and code For the data in lstreg, it is
129      *         description | date | canonical_value | recommended_prefix #
130      *         comments
131      */
getFullData(CodeType type, String code)132     public List<String> getFullData(CodeType type, String code) {
133         Map<String, List<String>> code_data = type_code_data.get(type);
134         if (code_data == null)
135             return null;
136         return code_data.get(code);
137     }
138 
getCodeData(String type)139     private Map<String, List<String>> getCodeData(String type) {
140         return getCodeData(CodeType.from(type));
141     }
142 
getCodeData(CodeType type)143     private Map<String, List<String>> getCodeData(CodeType type) {
144         return type_code_data.get(type);
145     }
146 
147     /**
148      * Get at the language registry values, as a Map from label to value.
149      *
150      * @param type
151      * @param code
152      * @return
153      */
getLangData(String type, String code)154     public Map<String, String> getLangData(String type, String code) {
155         try {
156             if (type.equals("territory"))
157                 type = "region";
158             else if (type.equals("variant")) code = code.toLowerCase(Locale.ENGLISH);
159             return (Map) ((Map) getLStreg().get(type)).get(code);
160         } catch (RuntimeException e) {
161             return null;
162         }
163     }
164 
165     /**
166      * Return a replacement code, if available. If not, return null.
167      *
168      */
getReplacement(String type, String code)169     public String getReplacement(String type, String code) {
170         if (type.equals("currency"))
171             return null; // no replacement codes for currencies
172         List<String> data = getFullData(type, code);
173         if (data == null)
174             return null;
175         // if available, the replacement is a non-empty value other than --, in
176         // position 2.
177         if (data.size() < 3)
178             return null;
179         String replacement = data.get(2);
180         if (!replacement.equals("") && !replacement.equals("--"))
181             return replacement;
182         return null;
183     }
184 
185     /**
186      * Return the list of codes that have the same data. For example, returns all
187      * currency codes for a country. If there is a preferred one, it is first.
188      *
189      * @param type
190      * @param data
191      * @return
192      */
193     @Deprecated
getCodes(String type, String data)194     public List<String> getCodes(String type, String data) {
195         return getCodes(CodeType.from(type), data);
196     }
197 
198     /**
199      * Return the list of codes that have the same data. For example, returns all
200      * currency codes for a country. If there is a preferred one, it is first.
201      */
getCodes(CodeType type, String data)202     public List<String> getCodes(CodeType type, String data) {
203         Map<String, List<String>> data_codes = type_name_codes.get(type);
204         if (data_codes == null)
205             return null;
206         return Collections.unmodifiableList(data_codes.get(data));
207     }
208 
209     /**
210      * Where there is a preferred code, return it.
211      */
212     @Deprecated
getPreferred(String type, String code)213     public String getPreferred(String type, String code) {
214         return getPreferred(CodeType.from(type), code);
215     }
216 
217     /**
218      * Where there is a preferred code, return it.
219      */
220 
getPreferred(CodeType type, String code)221     public String getPreferred(CodeType type, String code) {
222         Map<String, String> code_preferred = type_code_preferred.get(type);
223         if (code_preferred == null)
224             return code;
225         String newCode = code_preferred.get(code);
226         if (newCode == null)
227             return code;
228         return newCode;
229     }
230 
231     /**
232      * Get all the available types
233      */
getAvailableTypes()234     public Set<String> getAvailableTypes() {
235         return TypeStringSet;
236     }
237 
238     /**
239      * Get all the available types
240      */
getAvailableTypesEnum()241     public Set<CodeType> getAvailableTypesEnum() {
242         return TypeSet;
243     }
244 
245     /**
246      * Get all the available codes for a given type
247      *
248      * @param type
249      * @return
250      */
getAvailableCodes(String type)251     public Set<String> getAvailableCodes(String type) {
252         return getAvailableCodes(CodeType.from(type));
253     }
254 
255     /**
256      * Get all the available codes for a given type
257      *
258      * @param type
259      * @return
260      */
getAvailableCodes(CodeType type)261     public Set<String> getAvailableCodes(CodeType type) {
262         Map<String, List<String>> code_name = type_code_data.get(type);
263         return Collections.unmodifiableSet(code_name.keySet());
264     }
265 
getGoodAvailableCodes(String stringType)266     public Set<String> getGoodAvailableCodes(String stringType) {
267         return getGoodAvailableCodes(CodeType.from(stringType));
268     }
269 
270     /**
271      * Get all the available "real" codes for a given type, excluding private use,
272      * but including some deprecated codes. Use SupplementalDataInfo getLocaleAliases to
273      * exclude others.
274      *
275      * @param type
276      * @return
277      */
getGoodAvailableCodes(CodeType type)278     public Set<String> getGoodAvailableCodes(CodeType type) {
279         Set<String> result = goodCodes.get(type);
280         if (result == null) {
281             synchronized (goodCodes) {
282                 Map<String, List<String>> code_name = getCodeData(type);
283                 SupplementalDataInfo sd = SupplementalDataInfo.getInstance();
284                 if (code_name == null)
285                     return null;
286                 result = new TreeSet<>(code_name.keySet());
287                 switch (type) {
288                 case currency:
289                     break; // nothing special
290                 case language:
291                     return sd.getCLDRLanguageCodes();
292                 case script:
293                     return sd.getCLDRScriptCodes();
294                 case tzid:
295                     break; // nothing special
296                 default:
297                     for (Iterator<String> it = result.iterator(); it.hasNext();) {
298                         String code = it.next();
299                         if (code.equals("root") || code.equals("QO"))
300                             continue;
301                         List<String> data = getFullData(type, code);
302                         if (data.size() < 3) {
303                             if (DEBUG)
304                                 System.out.println(code + "\t" + data);
305                         }
306                         if ("PRIVATE USE".equalsIgnoreCase(data.get(0))
307                             || (!data.get(2).equals("") && !data.get(2).equals("--"))) {
308                             // System.out.println("Removing: " + code);
309                             it.remove();
310                         }
311                     }
312                 }
313                 result = Collections.unmodifiableSet(result);
314                 goodCodes.put(type, result);
315             }
316         }
317         return result;
318     }
319 
320     private static Set<String> GOOD_COUNTRIES;
321 
getGoodCountries()322     public Set<String> getGoodCountries() {
323         synchronized (goodCodes) {
324             if (GOOD_COUNTRIES == null) {
325                 Set<String> temp = new LinkedHashSet<>();
326                 for (String s : getGoodAvailableCodes(CodeType.territory)) {
327                     if (isCountry(s)) {
328                         temp.add(s);
329                     }
330                 }
331                 GOOD_COUNTRIES = Collections.unmodifiableSet(temp);
332             }
333         }
334         return GOOD_COUNTRIES;
335     }
336 
337     /**
338      * Gets the modern currency.
339      */
getMainCurrencies(String countryCode)340     public Set<String> getMainCurrencies(String countryCode) {
341         return country_modernCurrency.get(countryCode);
342     }
343 
344     private Map<Organization, Map<String, Level>> platform_locale_level = null;
345     private Map<Organization, Relation<Level, String>> platform_level_locale = null;
346     private Map<String, Map<String, String>> platform_locale_levelString = null;
347 
348 //    /**
349 //     * Get rid of this
350 //     *
351 //     * @param type
352 //     * @return
353 //     * @throws IOException
354 //     * @deprecated
355 //     */
356 //    public String getEffectiveLocaleType(String type) throws IOException {
357 //        if ((type != null) && (getLocaleCoverageOrganizations().contains(Organization.valueOf(type)))) {
358 //            return type;
359 //        } else {
360 //            return null; // the default.. for now..
361 //        }
362 //    }
363 
364     static Comparator caseless = new Comparator() {
365 
366         @Override
367         public int compare(Object arg0, Object arg1) {
368             String s1 = (String) arg0;
369             String s2 = (String) arg1;
370             return s1.compareToIgnoreCase(s2);
371         }
372 
373     };
374 
375     /**
376      * Used for Locales.txt to mean "all"
377      */
378     public static final String ALL_LOCALES = "*";
379 
380     /**
381      * Returns locales according to status. It returns a Map of Maps, key 1 is
382      * either IBM or Java (perhaps more later), key 2 is the Level.
383      *
384      * @deprecated
385      */
386     @Deprecated
getLocaleTypes()387     public Map<Organization, Map<String, Level>> getLocaleTypes() {
388         synchronized (StandardCodes.class) {
389             if (platform_locale_level == null) {
390                 loadPlatformLocaleStatus();
391             }
392         }
393         return platform_locale_level;
394     }
395 
396     /**
397      * Return map of locales to levels
398      * @param org
399      * @return
400      */
getLocaleToLevel(Organization org)401     public Map<String, Level> getLocaleToLevel(Organization org) {
402         return getLocaleTypes().get(org);
403     }
404 
getLocaleCoverageLevel(String organization, String desiredLocale)405     public Level getLocaleCoverageLevel(String organization, String desiredLocale) {
406         return getLocaleCoverageLevel(Organization.fromString(organization), desiredLocale);
407     }
408 
getLocaleCoverageLevel(Organization organization, String desiredLocale)409     public Level getLocaleCoverageLevel(Organization organization, String desiredLocale) {
410         return getLocaleCoverageLevel(organization, desiredLocale, new Output<LocaleCoverageType>());
411     }
412 
413     public enum LocaleCoverageType {
414         explicit, parent, star, undetermined
415     }
416 
417     /**
418      * Returns coverage level of locale according to organization. Returns Level.UNDETERMINED if information is missing.
419      * A locale of "*" in the data means "everything else".
420      */
getLocaleCoverageLevel(Organization organization, String desiredLocale, Output<LocaleCoverageType> coverageType)421     public Level getLocaleCoverageLevel(Organization organization, String desiredLocale, Output<LocaleCoverageType> coverageType) {
422         synchronized (StandardCodes.class) {
423             if (platform_locale_level == null) {
424                 loadPlatformLocaleStatus();
425             }
426         }
427         coverageType.value = LocaleCoverageType.undetermined;
428         if (organization == null) {
429             return Level.UNDETERMINED;
430         }
431         Map<String, Level> locale_status = platform_locale_level.get(organization);
432         if (locale_status == null) {
433             return Level.UNDETERMINED;
434         }
435         // see if there is a parent
436         String originalLocale = desiredLocale;
437         while (desiredLocale != null) {
438             Level status = locale_status.get(desiredLocale);
439             if (status != null && status != Level.UNDETERMINED) {
440                 coverageType.value = originalLocale == desiredLocale ? LocaleCoverageType.explicit : LocaleCoverageType.parent;
441                 return status;
442             }
443             desiredLocale = LocaleIDParser.getParent(desiredLocale);
444         }
445         Level status = locale_status.get(ALL_LOCALES);
446         if (status != null && status != Level.UNDETERMINED) {
447             coverageType.value = LocaleCoverageType.star;
448             return status;
449         }
450         return Level.UNDETERMINED;
451     }
452 
453     /**
454      * Returns coverage level of locale according to organization. Returns Level.UNDETERMINED if information is missing.
455      */
getDefaultLocaleCoverageLevel(Organization organization)456     public Level getDefaultLocaleCoverageLevel(Organization organization) {
457         return getLocaleCoverageLevel(organization, ALL_LOCALES);
458     }
459 
getLocaleCoverageOrganizations()460     public Set<Organization> getLocaleCoverageOrganizations() {
461         synchronized (StandardCodes.class) {
462             if (platform_locale_level == null) {
463                 loadPlatformLocaleStatus();
464             }
465         }
466         return platform_locale_level.keySet();
467     }
468 
getLocaleCoverageOrganizationStrings()469     public Set<String> getLocaleCoverageOrganizationStrings() {
470         synchronized (StandardCodes.class) {
471             if (platform_locale_level == null) {
472                 loadPlatformLocaleStatus();
473             }
474         }
475         return platform_locale_levelString.keySet();
476     }
477 
getLocaleCoverageLocales(String organization)478     public Set<String> getLocaleCoverageLocales(String organization) {
479         return getLocaleCoverageLocales(Organization.fromString(organization));
480     }
481 
getLocaleCoverageLocales(Organization organization)482     public Set<String> getLocaleCoverageLocales(Organization organization) {
483         synchronized (StandardCodes.class) {
484             if (platform_locale_level == null) {
485                 loadPlatformLocaleStatus();
486             }
487         }
488         return platform_locale_level.get(organization).keySet();
489     }
490 
getLevelsToLocalesFor(Organization organization)491     public Relation<Level, String> getLevelsToLocalesFor(Organization organization) {
492         synchronized (StandardCodes.class) {
493             if (platform_level_locale == null) {
494                 loadPlatformLocaleStatus();
495             }
496         }
497         return platform_level_locale.get(organization);
498     }
499 
getLocaleCoverageLocales(Organization organization, Set<Level> choice)500     public Set<String> getLocaleCoverageLocales(Organization organization, Set<Level> choice) {
501         Set<String> result = new LinkedHashSet<>();
502         for (String locale : getLocaleCoverageLocales(organization)) {
503             if (choice.contains(getLocaleCoverageLevel(organization, locale))) {
504                 result.add(locale);
505             }
506         }
507         return result;
508     }
509 
loadPlatformLocaleStatus()510     private void loadPlatformLocaleStatus() {
511         LocaleIDParser parser = new LocaleIDParser();
512         platform_locale_level = new EnumMap<>(Organization.class);
513         SupplementalDataInfo sd = SupplementalDataInfo.getInstance();
514         Set<String> defaultContentLocales = sd.getDefaultContentLocales();
515         String line;
516         try {
517             BufferedReader lstreg = CldrUtility.getUTF8Data("Locales.txt");
518             while (true) {
519                 line = lstreg.readLine();
520                 if (line == null)
521                     break;
522                 int commentPos = line.indexOf('#');
523                 if (commentPos >= 0) {
524                     line = line.substring(0, commentPos);
525                 }
526                 line = line.trim();
527                 if (line.length() == 0)
528                     continue;
529                 List<String> stuff = CldrUtility.splitList(line, ';', true);
530                 Organization organization;
531 
532                 // verify that the organization is valid
533                 try {
534                     organization = Organization.fromString(stuff.get(0));
535                 } catch (Exception e) {
536                     throw new IllegalArgumentException("Invalid organization in Locales.txt: " + line);
537                 }
538 
539                 // verify that the locale is valid BCP47
540                 String locale = stuff.get(1);
541                 if (!locale.equals(ALL_LOCALES)) {
542                     parser.set(locale);
543                     String valid = validate(parser);
544                     if (valid.length() != 0) {
545                         throw new IllegalArgumentException("Invalid locale in Locales.txt: " + line);
546                     }
547                     locale = parser.toString(); // normalize
548 
549                     // verify that the locale is not a default content locale
550                     if (defaultContentLocales.contains(locale)) {
551                         throw new IllegalArgumentException("Cannot have default content locale in Locales.txt: " + line);
552                     }
553                 }
554 
555                 Level status = Level.get(stuff.get(2));
556                 if (status == Level.UNDETERMINED) {
557                     System.out.println("Warning: Level unknown on: " + line);
558                 }
559                 Map<String, Level> locale_status = platform_locale_level.get(organization);
560                 if (locale_status == null) {
561                     platform_locale_level.put(organization, locale_status = new TreeMap<>());
562                 }
563                 locale_status.put(locale, status);
564                 if (!locale.equals(ALL_LOCALES)) {
565                     String scriptLoc = parser.getLanguageScript();
566                     if (locale_status.get(scriptLoc) == null)
567                         locale_status.put(scriptLoc, status);
568                     String lang = parser.getLanguage();
569                     if (locale_status.get(lang) == null)
570                         locale_status.put(lang, status);
571                 }
572             }
573         } catch (IOException e) {
574             throw new ICUUncheckedIOException("Internal Error", e);
575         }
576 
577         // now reset the parent to be the max of the children
578         for (Organization platform : platform_locale_level.keySet()) {
579             Map<String, Level> locale_level = platform_locale_level.get(platform);
580             for (String locale : locale_level.keySet()) {
581                 parser.set(locale);
582                 Level childLevel = locale_level.get(locale);
583 
584                 String language = parser.getLanguage();
585                 if (!language.equals(locale)) {
586                     Level languageLevel = locale_level.get(language);
587                     if (languageLevel == null || languageLevel.compareTo(childLevel) < 0) {
588                         locale_level.put(language, childLevel);
589                     }
590                 }
591                 String oldLanguage = language;
592                 language = parser.getLanguageScript();
593                 if (!language.equals(oldLanguage)) {
594                     Level languageLevel = locale_level.get(language);
595                     if (languageLevel == null || languageLevel.compareTo(childLevel) < 0) {
596                         locale_level.put(language, childLevel);
597                     }
598                 }
599             }
600         }
601         // backwards compat hack
602         platform_locale_levelString = new TreeMap<>();
603         platform_level_locale = new EnumMap<>(Organization.class);
604         for (Organization platform : platform_locale_level.keySet()) {
605             Map<String, String> locale_levelString = new TreeMap<>();
606             platform_locale_levelString.put(platform.toString(), locale_levelString);
607             Map<String, Level> locale_level = platform_locale_level.get(platform);
608             for (String locale : locale_level.keySet()) {
609                 locale_levelString.put(locale, locale_level.get(locale).toString());
610             }
611             Relation level_locale = Relation.of(new EnumMap(Level.class), HashSet.class);
612             level_locale.addAllInverted(locale_level).freeze();
613             platform_level_locale.put(platform, level_locale);
614         }
615         CldrUtility.protectCollection(platform_level_locale);
616         platform_locale_level = CldrUtility.protectCollection(platform_locale_level);
617         platform_locale_levelString = CldrUtility.protectCollection(platform_locale_levelString);
618     }
619 
validate(LocaleIDParser parser)620     private String validate(LocaleIDParser parser) {
621         String message = "";
622         String lang = parser.getLanguage();
623         if (lang.length() == 0) {
624             message += ", Missing language";
625         } else if (!getAvailableCodes("language").contains(lang)) {
626             message += ", Invalid language code: " + lang;
627         }
628         String script = parser.getScript();
629         if (script.length() != 0 && !getAvailableCodes("script").contains(script)) {
630             message += ", Invalid script code: " + script;
631         }
632         String territory = parser.getRegion();
633         if (territory.length() != 0 && !getAvailableCodes("territory").contains(territory)) {
634             message += ", Invalid territory code: " + lang;
635         }
636         return message.length() == 0 ? message : message.substring(2);
637     }
638 
639     /**
640      * Ascertain that the given locale in in the given group specified by the
641      * organization
642      *
643      * @param locale
644      * @param group
645      * @param org
646      * @return boolean
647      */
isLocaleInGroup(String locale, String group, Organization org)648     public boolean isLocaleInGroup(String locale, String group, Organization org) {
649         return group.equals(getGroup(locale, org));
650     }
651 
isLocaleInGroup(String locale, String group, String org)652     public boolean isLocaleInGroup(String locale, String group, String org) {
653         return isLocaleInGroup(locale, group, Organization.fromString(org));
654     }
655 
getGroup(String locale, String org)656     public String getGroup(String locale, String org) {
657         return getGroup(locale, Organization.fromString(org));
658     }
659 
660     /**
661      * Gets the coverage group given a locale and org
662      *
663      * @param locale
664      * @param org
665      * @return group if availble, null if not
666      */
getGroup(String locale, Organization org)667     private String getGroup(String locale, Organization org) {
668         Level l = getLocaleCoverageLevel(org, locale);
669         if (l.equals(Level.UNDETERMINED)) {
670             return null;
671         } else {
672             return l.toString();
673         }
674     }
675 
676     // ========== PRIVATES ==========
677 
StandardCodes()678     private StandardCodes() {
679         String[] files = { "ISO4217.txt" }; // , "TZID.txt"
680         type_code_preferred.put(CodeType.tzid, new TreeMap<String, String>());
681         add(CodeType.language, "root", "Root");
682         String originalLine = null;
683         for (int fileIndex = 0; fileIndex < files.length; ++fileIndex) {
684             try {
685                 BufferedReader lstreg = CldrUtility.getUTF8Data(files[fileIndex]);
686                 while (true) {
687                     String line = originalLine = lstreg.readLine();
688                     if (line == null)
689                         break;
690                     if (line.startsWith("\uFEFF")) {
691                         line = line.substring(1);
692                     }
693                     line = line.trim();
694                     int commentPos = line.indexOf('#');
695                     String comment = "";
696                     if (commentPos >= 0) {
697                         comment = line.substring(commentPos + 1).trim();
698                         line = line.substring(0, commentPos);
699                     }
700                     if (line.length() == 0)
701                         continue;
702                     List<String> pieces = CldrUtility.splitList(line, '|', true,
703                         new ArrayList<String>());
704                     CodeType type = CodeType.from(pieces.get(0));
705                     pieces.remove(0);
706 
707                     String code = pieces.get(0);
708                     pieces.remove(0);
709                     if (type.equals("date")) {
710                         continue;
711                     }
712 
713                     String oldName = pieces.get(0);
714                     int pos = oldName.indexOf(';');
715                     if (pos >= 0) {
716                         oldName = oldName.substring(0, pos).trim();
717                         pieces.set(0, oldName);
718                     }
719 
720                     List<String> data = pieces;
721                     if (comment.indexOf("deprecated") >= 0) {
722                         // System.out.println(originalLine);
723                         if (data.get(2).toString().length() == 0) {
724                             data.set(2, "--");
725                         }
726                     }
727                     if (oldName.equalsIgnoreCase("PRIVATE USE")) {
728                         int separatorPos = code.indexOf("..");
729                         if (separatorPos < 0) {
730                             add(type, code, data);
731                         } else {
732                             String current = code.substring(0, separatorPos);
733                             String end = code.substring(separatorPos + 2);
734                             // System.out.println(">>" + code + "\t" + current + "\t" + end);
735                             for (; current.compareTo(end) <= 0; current = nextAlpha(current)) {
736                                 // System.out.println(">" + current);
737                                 add(type, current, data);
738                             }
739                         }
740                         continue;
741                     }
742                     if (!type.equals("tzid")) {
743                         add(type, code, data);
744                         if (type.equals("currency")) {
745                             // currency | TPE | Timor Escudo | TP | EAST TIMOR | O
746                             if (data.get(3).equals("C")) {
747                                 String country = data.get(1);
748                                 Set<String> codes = country_modernCurrency.get(country);
749                                 if (codes == null) {
750                                     country_modernCurrency.put(country, codes = new TreeSet<>());
751                                 }
752                                 codes.add(code);
753                             }
754                         }
755                         continue;
756                     }
757                     // type = tzid
758                     // List codes = (List) Utility.splitList(code, ',', true, new
759                     // ArrayList());
760                     String preferred = null;
761                     for (int i = 0; i < pieces.size(); ++i) {
762                         code = pieces.get(i);
763                         add(type, code, data);
764                         if (preferred == null)
765                             preferred = code;
766                         else {
767                             Map<String, String> code_preferred = type_code_preferred.get(type);
768                             code_preferred.put(code, preferred);
769                         }
770                     }
771                 }
772                 lstreg.close();
773             } catch (Exception e) {
774                 System.err.println("WARNING: " + files[fileIndex]
775                     + " may be a corrupted UTF-8 file. Please check.");
776                 throw (IllegalArgumentException) new IllegalArgumentException(
777                     "Can't read " + files[fileIndex] + "\t" + originalLine)
778                         .initCause(e);
779             }
780             country_modernCurrency = CldrUtility.protectCollection(country_modernCurrency);
781         }
782 
783         // data is: description | date | canonical_value | recommended_prefix #
784         // comments
785         // HACK, just rework
786 
787         Map<String, Map<String, Map<String, String>>> languageRegistry = getLStreg();
788         // languageRegistry = CldrUtility.protectCollection(languageRegistry);
789 
790         for (String type : languageRegistry.keySet()) {
791             CodeType type2 = CodeType.from(type);
792             Map<String, Map<String, String>> m = languageRegistry.get(type);
793             for (String code : m.keySet()) {
794                 Map<String, String> mm = m.get(code);
795                 List<String> data = new ArrayList<>(0);
796                 data.add(mm.get("Description"));
797                 data.add(mm.get("Added"));
798                 String pref = mm.get("Preferred-Value");
799                 if (pref == null) {
800                     pref = mm.get("Deprecated");
801                     if (pref == null)
802                         pref = "";
803                     else
804                         pref = "deprecated";
805                 }
806                 data.add(pref);
807                 if (type.equals("variant")) {
808                     code = code.toUpperCase();
809                 }
810                 // data.add(mm.get("Recommended_Prefix"));
811                 // {"region", "BQ", "Description", "British Antarctic Territory",
812                 // "Preferred-Value", "AQ", "CLDR", "True", "Deprecated", "True"},
813                 add(type2, code, data);
814             }
815         }
816 
817         Map<String, List<String>> m = getZoneData();
818         for (Iterator<String> it = m.keySet().iterator(); it.hasNext();) {
819             String code = it.next();
820             add(CodeType.tzid, code, m.get(code).toString());
821         }
822     }
823 
824     /**
825      * @param current
826      * @return
827      */
nextAlpha(String current)828     private static String nextAlpha(String current) {
829         // Don't care that this is inefficient
830         int value = 0;
831         for (int i = 0; i < current.length(); ++i) {
832             char c = current.charAt(i);
833             c -= c < 'a' ? 'A' : 'a';
834             value = value * 26 + c;
835         }
836         value += 1;
837         String result = "";
838         for (int i = 0; i < current.length(); ++i) {
839             result = (char) ((value % 26) + 'A') + result;
840             value = value / 26;
841         }
842         if (UCharacter.toLowerCase(current).equals(current)) {
843             result = UCharacter.toLowerCase(result);
844         } else if (UCharacter.toUpperCase(current).equals(current)) {
845             // do nothing
846         } else {
847             result = UCharacter.toTitleCase(result, null);
848         }
849         return result;
850     }
851 
852     /**
853      * @param string
854      * @param string2
855      * @param string3
856      */
857     private void add(CodeType type, String string2, String string3) {
858         List<String> l = new ArrayList<>();
859         l.add(string3);
860         add(type, string2, l);
861     }
862 
863     private void add(CodeType type, String code, List<String> otherData) {
864         // hack
865         if (type == CodeType.script) {
866             if (code.equals("Qaai")) {
867                 otherData = new ArrayList<>(otherData);
868                 otherData.set(0, "Inherited");
869             } else if (code.equals("Zyyy")) {
870                 otherData = new ArrayList<>(otherData);
871                 otherData.set(0, "Common");
872             }
873         }
874 
875         // assume name is the first item
876 
877         String name = otherData.get(0);
878 
879         // add to main list
880         Map<String, List<String>> code_data = getCodeData(type);
881         if (code_data == null) {
882             code_data = new TreeMap<>();
883             type_code_data.put(type, code_data);
884         }
885         List<String> lastData = code_data.get(code);
886         if (lastData != null) {
887             lastData.addAll(otherData);
888         } else {
889             code_data.put(code, otherData);
890         }
891 
892         // now add mapping from name to codes
893         Map<String, List<String>> name_codes = type_name_codes.get(type);
894         if (name_codes == null) {
895             name_codes = new TreeMap<>();
896             type_name_codes.put(type, name_codes);
897         }
898         List<String> codes = name_codes.get(name);
899         if (codes == null) {
900             codes = new ArrayList<>();
901             name_codes.put(name, codes);
902         }
903         codes.add(code);
904     }
905 
906     private List<String> DELETED3166 = Collections.unmodifiableList(Arrays
907         .asList(new String[] { "BQ", "BU", "CT", "DD", "DY", "FQ", "FX", "HV",
908             "JT", "MI", "NH", "NQ", "NT", "PC", "PU", "PZ", "RH", "SU", "TP",
909             "VD", "WK", "YD", "YU", "ZR" }));
910 
911     public List<String> getOld3166() {
912         return DELETED3166;
913     }
914 
915     private Map<String, List<String>> WorldBankInfo;
916 
917     public Map<String, List<String>> getWorldBankInfo() {
918         if (WorldBankInfo == null) {
919             List<String> temp = fillFromCommaFile("WorldBankInfo.txt", false);
920             WorldBankInfo = new HashMap<>();
921             for (String line : temp) {
922                 List<String> row = CldrUtility.splitList(line, ';', true);
923                 String key = row.get(0);
924                 row.remove(0);
925                 WorldBankInfo.put(key, row);
926             }
927             WorldBankInfo = CldrUtility.protectCollection(WorldBankInfo);
928         }
929         return WorldBankInfo;
930     }
931 
932     Set<String> moribundLanguages;
933 
934     public Set<String> getMoribundLanguages() {
935         if (moribundLanguages == null) {
936             List<String> temp = fillFromCommaFile("moribund_languages.txt", true);
937             moribundLanguages = new TreeSet<>();
938             moribundLanguages.addAll(temp);
939             moribundLanguages = CldrUtility.protectCollection(moribundLanguages);
940         }
941         return moribundLanguages;
942     }
943 
944     // produces a list of the 'clean' lines
945     private List<String> fillFromCommaFile(String filename, boolean trim) {
946         try {
947             List<String> result = new ArrayList<>();
948             String line;
949             BufferedReader lstreg = CldrUtility.getUTF8Data(filename);
950             while (true) {
951                 line = lstreg.readLine();
952                 if (line == null)
953                     break;
954                 int commentPos = line.indexOf('#');
955                 if (commentPos >= 0) {
956                     line = line.substring(0, commentPos);
957                 }
958                 if (trim) {
959                     line = line.trim();
960                 }
961                 if (line.length() == 0)
962                     continue;
963                 result.add(line);
964             }
965             return result;
966         } catch (Exception e) {
967             throw (RuntimeException) new IllegalArgumentException(
968                 "Can't process file: data/" + filename).initCause(e);
969         }
970     }
971 
972     // return a complex map. language -> arn -> {"Comments" -> "x",
973     // "Description->y,...}
974     static String[][] extras = {
975         { "language", "root", "Description", "Root", "CLDR", "True" },
976         // { "language", "cch", "Description", "Atsam", "CLDR", "True" },
977         // { "language", "kaj", "Description", "Jju", "CLDR", "True" },
978         // { "language", "kcg", "Description", "Tyap", "CLDR", "True" },
979         // { "language", "kfo", "Description", "Koro", "CLDR", "True" },
980         // { "language", "mfe", "Description", "Morisyen", "CLDR", "True" },
981         // { "region", "172", "Description", "Commonwealth of Independent States", "CLDR", "True" },
982         // { "region", "062", "Description", "South-Central Asia", "CLDR", "True" },
983         // { "region", "003", "Description", "North America", "CLDR", "True" },
984         //        { "variant", "POLYTONI", "Description", "Polytonic Greek", "CLDR", "True", "Preferred-Value", "POLYTON" },
985         { "variant", "REVISED", "Description", "Revised Orthography", "CLDR", "True" },
986         { "variant", "SAAHO", "Description", "Dialect", "CLDR", "True" },
987         { "variant", "POSIX", "Description", "Computer-Style", "CLDR", "True" },
988         // {"region", "172", "Description", "Commonwealth of Independent States",
989         // "CLDR", "True"},
990         // { "region", "", "Description", "European Union", "CLDR", "True" },
991         { "region", "ZZ", "Description", "Unknown or Invalid Region", "CLDR", "True" },
992         { "region", "QO", "Description", "Outlying Oceania", "CLDR", "True" },
993         { "region", "XK", "Description", "Kosovo", "CLDR", "True" },
994         { "script", "Qaai", "Description", "Inherited", "CLDR", "True" },
995         // {"region", "003", "Description", "North America", "CLDR", "True"},
996         // {"region", "062", "Description", "South-central Asia", "CLDR", "True"},
997         // {"region", "200", "Description", "Czechoslovakia", "CLDR", "True"},
998         // {"region", "830", "Description", "Channel Islands", "CLDR", "True"},
999         // {"region", "833", "Description", "Isle of Man", "CLDR", "True"},
1000 
1001         // {"region", "NT", "Description", "Neutral Zone (formerly between Saudi
1002         // Arabia & Iraq)", "CLDR", "True", "Deprecated", "True"},
1003         // {"region", "SU", "Description", "Union of Soviet Socialist Republics",
1004         // "CLDR", "True", "Deprecated", "True"},
1005         // {"region", "BQ", "Description", "British Antarctic Territory",
1006         // "Preferred-Value", "AQ", "CLDR", "True", "Deprecated", "True"},
1007         // {"region", "CT", "Description", "Canton and Enderbury Islands",
1008         // "Preferred-Value", "KI", "CLDR", "True", "Deprecated", "True"},
1009         // {"region", "FQ", "Description", "French Southern and Antarctic Territories
1010         // (now split between AQ and TF)", "CLDR", "True", "Deprecated", "True"},
1011         // {"region", "JT", "Description", "Johnston Island", "Preferred-Value", "UM",
1012         // "CLDR", "True", "Deprecated", "True"},
1013         // {"region", "MI", "Description", "Midway Islands", "Preferred-Value", "UM",
1014         // "CLDR", "True", "Deprecated", "True"},
1015         // {"region", "NQ", "Description", "Dronning Maud Land", "Preferred-Value",
1016         // "AQ", "CLDR", "True", "Deprecated", "True"},
1017         // {"region", "PC", "Description", "Pacific Islands Trust Territory (divided
1018         // into FM, MH, MP, and PW)", "Preferred-Value", "AQ", "CLDR", "True",
1019         // "Deprecated", "True"},
1020         // {"region", "PU", "Description", "U.S. Miscellaneous Pacific Islands",
1021         // "Preferred-Value", "UM", "CLDR", "True", "Deprecated", "True"},
1022         // {"region", "PZ", "Description", "Panama Canal Zone", "Preferred-Value",
1023         // "PA", "CLDR", "True", "Deprecated", "True"},
1024         // {"region", "VD", "Description", "North Vietnam", "Preferred-Value", "VN",
1025         // "CLDR", "True", "Deprecated", "True"},
1026         // {"region", "WK", "Description", "Wake Island", "Preferred-Value", "UM",
1027         // "CLDR", "True", "Deprecated", "True"},
1028     };
1029 
1030     static final String registryName = CldrUtility.getProperty("registry", "language-subtag-registry");
1031 
1032     public enum LstrType {
1033         language("und", "zxx", "mul", "mis", "root"),
1034         script("Zzzz", "Zsym", "Zxxx", "Zmth"),
1035         region("ZZ"),
1036         variant(),
1037         extlang(true, false),
1038         legacy(true, false),
1039         redundant(true, false),
1040         /** specialized codes for validity; TODO: rename LstrType **/
1041         currency(false, true, "XXX"),
1042         subdivision(false, true),
1043         unit(false, true),
1044         usage(false, true),
1045         zone(false, true);
1046 
1047         public final Set<String> specials;
1048         public final String unknown;
1049         public final boolean isLstr;
1050         public final boolean isUnicode;
1051 
1052         private LstrType(String... unknownValue) {
1053             this(true, true, unknownValue);
1054         }
1055 
1056         private LstrType(boolean lstr, boolean unicode, String... unknownValue) {
1057             unknown = unknownValue.length == 0 ? null : unknownValue[0];
1058             LinkedHashSet<String> set = new LinkedHashSet<>(Arrays.asList(unknownValue));
1059             if (unknown != null) {
1060                 set.remove(unknown);
1061             }
1062             specials = Collections.unmodifiableSet(set);
1063             isLstr = lstr;
1064             isUnicode = unicode;
1065         }
1066 
1067         //
1068         static final Pattern WELLFORMED = Pattern.compile("([0-9]{3}|[a-zA-Z]{2})[a-zA-Z0-9]{1,4}");
1069 
1070         boolean isWellFormed(String candidate) {
1071             switch (this) {
1072             case subdivision:
1073                 return WELLFORMED.matcher(candidate).matches();
1074             default:
1075                 throw new UnsupportedOperationException();
1076             }
1077         }
1078 
1079         /**
1080          * Generate compatibility string, returning 'territory' instead of 'region', etc.
1081          */
1082         public String toCompatString() {
1083             switch (this) {
1084             case region: return "territory";
1085             case legacy: return "language";
1086             case redundant: return "language";
1087             default: return toString();
1088             }
1089         }
1090 
1091         /**
1092          * Create LstrType from string, allowing the compat string 'territory'.
1093          */
1094         public static LstrType fromString(String rawType) {
1095             try {
1096                 return valueOf(rawType);
1097             } catch (IllegalArgumentException e) {
1098                 if ("territory".equals(rawType)) {
1099                     return region;
1100                 }
1101                 throw e;
1102             }
1103         }
1104     }
1105 
1106     public enum LstrField {
1107         Type, Subtag, Description, Added, Scope, Tag, Suppress_Script, Macrolanguage, Deprecated, Preferred_Value, Comments, Prefix, CLDR;
1108         public static LstrField from(String s) {
1109             return LstrField.valueOf(s.trim().replace("-", "_"));
1110         }
1111     }
1112 
1113     static Map<String, Map<String, Map<String, String>>> LSTREG;
1114     static Map<LstrType, Map<String, Map<LstrField, String>>> LSTREG_ENUM;
1115     static Map<LstrType, Map<String, Map<LstrField, String>>> LSTREG_RAW;
1116 
1117     /**
1118      * Returns a map like {extlang={aao={Added=2009-07-29, Description=Algerian Saharan Arabic, ...<br>
1119      * That is, type => subtype => map<tag,value>. Descriptions are concatenated together, separated by
1120      * DESCRIPTION_SEPARATOR.
1121      *
1122      * @return
1123      */
1124     public static Map<String, Map<String, Map<String, String>>> getLStreg() {
1125         if (LSTREG == null) {
1126             initLstr();
1127         }
1128         return LSTREG;
1129     }
1130 
1131     /**
1132      * Returns a map like {extlang={aao={Added=2009-07-29, Description=Algerian Saharan Arabic, ...<br>
1133      * That is, type => subtype => map<tag,value>. Descriptions are concatenated together, separated by
1134      * DESCRIPTION_SEPARATOR.
1135      *
1136      * @return
1137      */
1138     public static Map<LstrType, Map<String, Map<LstrField, String>>> getEnumLstreg() {
1139         if (LSTREG_ENUM == null) {
1140             initLstr();
1141         }
1142         return LSTREG_ENUM;
1143     }
1144 
1145     public static Map<LstrType, Map<String, Map<LstrField, String>>> getLstregEnumRaw() {
1146         if (LSTREG_ENUM == null) {
1147             initLstr();
1148         }
1149         return LSTREG_RAW;
1150     }
1151 
1152     private static void initLstr() {
1153         Map<LstrType, Map<String, Map<LstrField, String>>> result2 = new TreeMap<>();
1154 
1155         int lineNumber = 1;
1156 
1157         Set<String> funnyTags = new TreeSet<>();
1158         String line;
1159         try {
1160             BufferedReader lstreg = CldrUtility.getUTF8Data(registryName);
1161             LstrType lastType = null;
1162             String lastTag = null;
1163             Map<String, Map<LstrField, String>> subtagData = null;
1164             Map<LstrField, String> currentData = null;
1165             LstrField lastLabel = null;
1166             String lastRest = null;
1167             boolean inRealContent = false;
1168 //            Map<String, String> translitCache = new HashMap<String, String>();
1169             for (;; ++lineNumber) {
1170                 line = lstreg.readLine();
1171                 if (line == null)
1172                     break;
1173                 if (line.length() == 0)
1174                     continue; // skip blanks
1175                 if (line.startsWith("File-Date: ")) {
1176                     if (DEBUG) System.out.println("Language Subtag Registry: " + line);
1177                     inRealContent = true;
1178                     continue;
1179                 }
1180                 if (!inRealContent) {
1181                     // skip until we get to real content
1182                     continue;
1183                 }
1184                 // skip cruft
1185                 if (line.startsWith("Internet-Draft")) {
1186                     continue;
1187                 }
1188                 if (line.startsWith("Ewell")) {
1189                     continue;
1190                 }
1191                 if (line.startsWith("\f")) {
1192                     continue;
1193                 }
1194                 if (line.startsWith("4.  Security Considerations")) {
1195                     break;
1196                 }
1197 
1198                 if (line.startsWith("%%"))
1199                     continue; // skip separators (ok, since data starts with Type:
1200                 if (line.startsWith(" ")) {
1201                     currentData.put(lastLabel, lastRest + " " + line.trim());
1202                     continue;
1203                 }
1204 
1205                 /*
1206                  * Type: language Subtag: aa Description: Afar Added: 2005-10-16
1207                  * Suppress-Script: Latn
1208                  */
1209                 int pos2 = line.indexOf(':');
1210                 LstrField label = LstrField.from(line.substring(0, pos2));
1211                 String rest = line.substring(pos2 + 1).trim();
1212                 if (label == LstrField.Type) {
1213                     lastType = rest.equals("grandfathered") ?
1214                         LstrType.legacy : LstrType.fromString(rest);
1215                     subtagData = CldrUtility.get(result2, lastType);
1216                     if (subtagData == null) {
1217                         result2.put(lastType, subtagData = new TreeMap<>());
1218                     }
1219                 } else if (label == LstrField.Subtag
1220                     || label == LstrField.Tag) {
1221                     lastTag = rest;
1222                     String endTag = null;
1223                     // Subtag: qaa..qtz
1224                     int pos = lastTag.indexOf("..");
1225                     if (pos >= 0) {
1226                         endTag = lastTag.substring(pos + 2);
1227                         lastTag = lastTag.substring(0, pos);
1228                     }
1229                     currentData = new TreeMap<>();
1230                     if (endTag == null) {
1231                         putSubtagData(lastTag, subtagData, currentData);
1232                         languageCount.add(lastType, 1);
1233                         // System.out.println(languageCount.getCount(lastType) + "\t" + lastType + "\t" + lastTag);
1234                     } else {
1235                         for (; lastTag.compareTo(endTag) <= 0; lastTag = nextAlpha(lastTag)) {
1236                             // System.out.println(">" + current);
1237                             putSubtagData(lastTag, subtagData, currentData);
1238                             languageCount.add(lastType, 1);
1239                             // System.out.println(languageCount.getCount(lastType) + "\t" + lastType + "\t" + lastTag);
1240                         }
1241 
1242                     }
1243                     // label.equalsIgnoreCase("Added") || label.equalsIgnoreCase("Suppress-Script")) {
1244                     // skip
1245                     // } else if (pieces.length < 2) {
1246                     // System.out.println("Odd Line: " + lastType + "\t" + lastTag + "\t" + line);
1247                 } else {
1248                     lastLabel = label;
1249                     // The following code was removed because in the standard tests (TestAll) both lastRest and rest were always equal.
1250                     //                    if(!translitCache.containsKey(rest)) {
1251                     //                        lastRest = TransliteratorUtilities.fromXML.transliterate(rest);
1252                     //                        translitCache.put(rest, lastRest);
1253                     //                        if (!lastRest.equals(rest)) {
1254                     //                            System.out.println(System.currentTimeMillis()+" initLStr: LastRest: '"+lastRest+"' Rest: '"+rest+"'");
1255                     //                        }
1256                     //                    } else {
1257                     //                        lastRest = translitCache.get(rest);
1258                     //                    }
1259                     lastRest = rest;
1260                     String oldValue = CldrUtility.get(currentData, lastLabel);
1261                     if (oldValue != null) {
1262                         lastRest = oldValue + DESCRIPTION_SEPARATOR + lastRest;
1263                     }
1264                     currentData.put(lastLabel, lastRest);
1265                 }
1266             }
1267         } catch (Exception e) {
1268             throw (RuntimeException) new IllegalArgumentException(
1269                 "Can't process file: data/"
1270                     + registryName + ";\t at line " + lineNumber).initCause(e);
1271         } finally {
1272             if (!funnyTags.isEmpty()) {
1273                 if (DEBUG)
1274                     System.out.println("Funny tags: " + funnyTags);
1275             }
1276         }
1277         // copy raw
1278         Map<LstrType, Map<String, Map<LstrField, String>>> rawLstreg = new TreeMap<>();
1279         for (Entry<LstrType, Map<String, Map<LstrField, String>>> entry1 : result2.entrySet()) {
1280             LstrType key1 = entry1.getKey();
1281             TreeMap<String, Map<LstrField, String>> raw1 = new TreeMap<>();
rawLstreg.put(key1, raw1)1282             rawLstreg.put(key1, raw1);
1283             for (Entry<String, Map<LstrField, String>> entry2 : entry1.getValue().entrySet()) {
1284                 String key2 = entry2.getKey();
1285                 final Map<LstrField, String> value2 = entry2.getValue();
1286                 TreeMap<LstrField, String> raw2 = new TreeMap<>();
1287                 raw2.putAll(value2);
raw1.put(key2, raw2)1288                 raw1.put(key2, raw2);
1289             }
1290         }
1291         LSTREG_RAW = CldrUtility.protectCollection(rawLstreg);
1292 
1293         // add extras
1294         for (int i = 0; i < extras.length; ++i) {
1295             Map<String, Map<LstrField, String>> subtagData = CldrUtility.get(result2, LstrType.fromString(extras[i][0]));
1296             if (subtagData == null) {
LstrType.fromString(extras[i][0])1297                 result2.put(LstrType.fromString(extras[i][0]), subtagData = new TreeMap<>());
1298             }
1299             Map<LstrField, String> labelData = new TreeMap<>();
1300             for (int j = 2; j < extras[i].length; j += 2) {
LstrField.from(extras[i][j])1301                 labelData.put(LstrField.from(extras[i][j]), extras[i][j + 1]);
1302             }
1303             Map<LstrField, String> old = CldrUtility.get(subtagData, extras[i][1]);
1304             if (old != null) {
1305                 if (!"Private use".equals(CldrUtility.get(old, LstrField.Description))) {
1306                     throw new IllegalArgumentException("REPLACING data for " + extras[i][1] + "\t" + old + "\twith"
1307                         + labelData);
1308                 }
1309             }
1310             if (false) {
1311                 System.out.println((old != null ? "REPLACING" + "\t" + old : "ADDING") +
1312                     " data for " + extras[i][1] + "\twith" + labelData);
1313             }
subtagData.put(extras[i][1], labelData)1314             subtagData.put(extras[i][1], labelData);
1315         }
1316         // build compatibility map
1317         Map<String, Map<String, Map<String, String>>> result = new LinkedHashMap<>();
1318         for (Entry<LstrType, Map<String, Map<LstrField, String>>> entry : result2.entrySet()) {
1319             Map<String, Map<String, String>> copy2 = new LinkedHashMap<>();
1320             result.put(entry.getKey().toString(), copy2);
1321             for (Entry<String, Map<LstrField, String>> entry2 : entry.getValue().entrySet()) {
1322                 Map<String, String> copy3 = new LinkedHashMap<>();
entry2.getKey()1323                 copy2.put(entry2.getKey(), copy3);
1324                 for (Entry<LstrField, String> entry3 : entry2.getValue().entrySet()) {
entry3.getValue()1325                     copy3.put(entry3.getKey().toString(), entry3.getValue());
1326                 }
1327             }
1328         }
1329         LSTREG = CldrUtility.protectCollection(result);
1330         LSTREG_ENUM = CldrUtility.protectCollection(result2);
1331     }
1332 
1333     private static <K, K2, V> Map<K2, V> putSubtagData(K lastTag, Map<K, Map<K2, V>> subtagData, Map<K2, V> currentData) {
1334         Map<K2, V> oldData = subtagData.get(lastTag);
1335         if (oldData != null) {
1336             if (oldData.get("CLDR") != null) {
1337                 System.out.println("overriding: " + lastTag + ", " + oldData);
1338             } else {
1339                 throw new IllegalArgumentException("Duplicate tag: " + lastTag);
1340             }
1341         }
1342         return subtagData.put(lastTag, currentData);
1343     }
1344 
1345     static Counter<LstrType> languageCount = new Counter<>();
1346 
1347     public static Counter<LstrType> getLanguageCount() {
1348         return languageCount;
1349     }
1350 
1351     ZoneParser zoneParser = new ZoneParser();
1352 
1353     // static public final Set<String> MODERN_SCRIPTS = Collections
1354     // .unmodifiableSet(new TreeSet(
1355     // // "Bali " +
1356     // // "Bugi " +
1357     // // "Copt " +
1358     // // "Hano " +
1359     // // "Osma " +
1360     // // "Qaai " +
1361     // // "Sylo " +
1362     // // "Syrc " +
1363     // // "Tagb " +
1364     // // "Tglg " +
1365     // Arrays
1366     // .asList("Hans Hant Jpan Hrkt Kore Arab Armn Bali Beng Bopo Cans Cham Cher Cyrl Deva Ethi Geor Grek Gujr Guru Hani Hang Hebr Hira Knda Kana Kali Khmr Laoo Latn Lepc Limb Mlym Mong Mymr Talu Nkoo Olck Orya Saur Sinh Tale Taml Telu Thaa Thai Tibt Tfng Vaii Yiii"
1367     // .split("\\s+"))));
1368 
1369     // updated to http://www.unicode.org/reports/tr31/tr31-9.html#Specific_Character_Adjustments
1370 
1371     /**
1372      * @deprecated
1373      */
1374     @Deprecated
1375     public Map<String, List<ZoneLine>> getZone_rules() {
1376         return zoneParser.getZone_rules();
1377     }
1378 
1379     /**
1380      * @deprecated
1381      */
1382     @Deprecated
1383     public Map<String, List<String>> getZoneData() {
1384         return zoneParser.getZoneData();
1385     }
1386 
1387     /**
1388      * @deprecated
1389      */
1390     @Deprecated
1391     public Set<String> getCanonicalTimeZones() {
1392         return zoneParser.getZoneData().keySet();
1393     }
1394 
1395     /**
1396      * @deprecated
1397      */
1398     @Deprecated
1399     public Map<String, Set<String>> getCountryToZoneSet() {
1400         return zoneParser.getCountryToZoneSet();
1401     }
1402 
1403     /**
1404      * @deprecated
1405      */
1406     @Deprecated
1407     public List<String> getDeprecatedZoneIDs() {
1408         return zoneParser.getDeprecatedZoneIDs();
1409     }
1410 
1411     /**
1412      * @deprecated
1413      */
1414     @Deprecated
1415     public Comparator<String> getTZIDComparator() {
1416         return zoneParser.getTZIDComparator();
1417     }
1418 
1419     /**
1420      * @deprecated
1421      */
1422     @Deprecated
1423     public Map<String, Set<String>> getZoneLinkNew_OldSet() {
1424         return zoneParser.getZoneLinkNew_OldSet();
1425     }
1426 
1427     /**
1428      * @deprecated
1429      */
1430     @Deprecated
1431     public Map<String, String> getZoneLinkold_new() {
1432         return zoneParser.getZoneLinkold_new();
1433     }
1434 
1435     /**
1436      * @deprecated
1437      */
1438     @Deprecated
1439     public Map getZoneRuleID_rules() {
1440         return zoneParser.getZoneRuleID_rules();
1441     }
1442 
1443     /**
1444      * @deprecated
1445      */
1446     @Deprecated
1447     public Map<String, String> getZoneToCounty() {
1448         return zoneParser.getZoneToCounty();
1449     }
1450 
1451     /**
1452      * @deprecated
1453      */
1454     @Deprecated
1455     public String getZoneVersion() {
1456         return zoneParser.getVersion();
1457     }
1458 
1459     public static String fixLanguageTag(String languageSubtag) {
1460         if (languageSubtag.equals("mo")) { // fix special cases
1461             return "ro";
1462         }
1463         return languageSubtag;
1464     }
1465 
1466     public boolean isModernLanguage(String languageCode) {
1467         if (getMoribundLanguages().contains(languageCode)) return false;
1468         Type type = Iso639Data.getType(languageCode);
1469         if (type == Type.Living) return true;
1470         if (languageCode.equals("eo")) return true; // exception for Esperanto
1471         // Scope scope = Iso639Data.getScope(languageCode);
1472         // if (scope == Scope.Collection) return false;
1473         return false;
1474     }
1475 
1476     public static boolean isScriptModern(String script) {
1477         ScriptMetadata.Info info = ScriptMetadata.getInfo(script);
1478         if (info == null) {
1479             if (false) throw new IllegalArgumentException("No script metadata for: " + script);
1480             return false;
1481         }
1482         IdUsage idUsage = info.idUsage;
1483         return idUsage != IdUsage.EXCLUSION && idUsage != IdUsage.UNKNOWN;
1484     }
1485 
1486     static final Pattern whitespace = PatternCache.get("\\s+");
1487     static Set<String> filteredCurrencies = null;
1488 
1489     public Set<String> getSurveyToolDisplayCodes(String type) {
1490         return getGoodAvailableCodes(type);
1491     }
1492 
1493     static UnicodeSet COUNTRY = new UnicodeSet("[a-zA-Z]").freeze();
1494 
1495     /**
1496      * Quick check for whether valid country. Not complete: should use Validity
1497      * @param territory
1498      * @return
1499      */
1500     public static boolean isCountry(String territory) {
1501         switch (territory) {
1502         case "ZZ":
1503         case "QO":
1504         case "EU":
1505         case "UN":
1506         case "EZ":
1507             return false;
1508         default:
1509             return territory.length() == 2 && COUNTRY.containsAll(territory);
1510         }
1511     }
1512 
1513     public boolean isLstregPrivateUse(String type, String code) {
1514         Map<String, String> lStregData = getLStreg().get(type).get(code);
1515         return lStregData.get("Description").equalsIgnoreCase("private use");
1516     }
1517 
1518     public boolean isLstregDeprecated(String type, String code) {
1519         Map<String, String> lStregData = getLStreg().get(type).get(code);
1520         return lStregData.get("Deprecated") != null;
1521     }
1522 }
1523