• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.unittest;
2 
3 import java.util.Arrays;
4 import java.util.Calendar;
5 import java.util.HashSet;
6 import java.util.LinkedHashSet;
7 import java.util.List;
8 import java.util.Map;
9 import java.util.Map.Entry;
10 import java.util.Set;
11 import java.util.TreeSet;
12 import java.util.regex.Matcher;
13 import java.util.regex.Pattern;
14 
15 import org.unicode.cldr.util.CLDRConfig;
16 import org.unicode.cldr.util.PatternCache;
17 import org.unicode.cldr.util.StandardCodes;
18 
19 import com.ibm.icu.dev.test.TestFmwk;
20 import com.ibm.icu.impl.Relation;
21 import com.ibm.icu.impl.Row.R2;
22 import com.ibm.icu.lang.UProperty;
23 import com.ibm.icu.lang.UScript;
24 import com.ibm.icu.text.UnicodeSet;
25 
26 public class TestCanonicalIds extends TestFmwk {
27 
28     // TODO consider whether we can pull the $variable stuff from other
29     // sources..
30 
31     static final Pattern WHITESPACE_PATTERN = PatternCache.get("\\s+");
32 
33     static CLDRConfig testInfo = CLDRConfig.getInstance();
34     static Map<String, Map<String, R2<List<String>, String>>> aliasInfo = testInfo
35         .getSupplementalDataInfo().getLocaleAliasInfo();
36 
main(String[] args)37     public static void main(String[] args) {
38         new TestCanonicalIds().run(args);
39     }
40 
TestTimezones()41     public void TestTimezones() {
42         Set<String> bcp47Canonical = new LinkedHashSet<String>();
43         Relation<R2<String, String>, String> data = testInfo
44             .getSupplementalDataInfo().getBcp47Aliases();
45         Map<R2<String, String>, String> deprecatedData = testInfo
46             .getSupplementalDataInfo().getBcp47Deprecated();
47 
48         // the first item in each set of aliases is the primary.
49         for (Entry<R2<String, String>, Set<String>> entry : data.keyValuesSet()) {
50             final R2<String, String> keyType = entry.getKey();
51             if ("tz".equals(keyType.get0())) {
52                 if (keyType.get1().isEmpty()) {
53                     continue;
54                 }
55                 String deprecated = deprecatedData.get(keyType);
56                 if ("true".equals(deprecated)) {
57                     continue;
58                 }
59                 Set<String> aliases = entry.getValue();
60                 String firstAlias = aliases.iterator().next();
61                 bcp47Canonical.add(firstAlias);
62             }
63         }
64 
65         // check that the metadata is up to date
66         // Not necessary any more, since the bcp47 data is used directly.
67 
68 //        Map<String, R2<String, String>> validityInfo = testInfo
69 //            .getSupplementalDataInfo().getValidityInfo();
70 //        String timezoneItemString = validityInfo.get("$tzid").get1();
71 //        HashSet<String> variable = new LinkedHashSet<String>(
72 //            Arrays.asList(WHITESPACE_PATTERN.split(timezoneItemString
73 //                .trim())));
74 //        if (!variable.equals(bcp47Canonical)) {
75 //            TreeSet<String> bcp47Only = new TreeSet<String>(bcp47Canonical);
76 //            bcp47Only.removeAll(variable);
77 //            TreeSet<String> variableOnly = new TreeSet<String>(variable);
78 //            variableOnly.removeAll(bcp47Canonical);
79 //            errln("Timezones: bcp47≠validity; bcp47:\t" + bcp47Only
80 //                + ";\tvalidity:\t" + variableOnly);
81 //        }
82     }
83 
84     enum Type {
85         language, script, territory, zone
86     }
87 
88 //    public void TestForDeprecatedVariables() {
89 //        Map<String, Map<String, R2<List<String>, String>>> aliasInfo = testInfo
90 //            .getSupplementalDataInfo().getLocaleAliasInfo();
91 //        // language, script, territory, variant, zone
92 //        Map<String, R2<String, String>> validityInfo = testInfo
93 //            .getSupplementalDataInfo().getValidityInfo();
94 //        for (Entry<String, R2<String, String>> entry : validityInfo.entrySet()) {
95 //            String key = entry.getKey();
96 //            if (key.equals("$language")) {
97 //                checkItems(aliasInfo, entry, key, Type.language);
98 //            } else if (key.equals("$script")) {
99 //                checkItems(aliasInfo, entry, key, Type.script);
100 //            } else if (key.equals("$territory")) {
101 //                checkItems(aliasInfo, entry, key, Type.territory);
102 //            } else if (key.equals("$tzid")) {
103 //                checkItems(aliasInfo, entry, key, Type.zone);
104 //            }
105 //        }
106 //    }
107 
checkItems( Map<String, Map<String, R2<List<String>, String>>> aliasInfo, Entry<String, R2<String, String>> entry, String key, final Type type)108     private void checkItems(
109         Map<String, Map<String, R2<List<String>, String>>> aliasInfo,
110         Entry<String, R2<String, String>> entry, String key, final Type type) {
111         Map<String, R2<List<String>, String>> badMap = aliasInfo.get(type
112             .toString());
113         final String valueString = entry.getValue().get1();
114         HashSet<String> values = new LinkedHashSet<String>(
115             Arrays.asList(WHITESPACE_PATTERN.split(valueString.trim())));
116         Set<String> emptyScripts = new TreeSet<String>();
117         UnicodeSet remainingCharacters = new UnicodeSet(0, 0x10FFFF);
118         UnicodeSet s = new UnicodeSet();
119         for (String value : values) {
120             R2<List<String>, String> replacement = badMap.get(value);
121             if (replacement != null && replacement.get1().equals("deprecated")
122                 && !isOk(type, value)) {
123                 errln("Deprecated value in " + key + ":\t" + value
124                     + " should be " + badMap.get(value).get0());
125             }
126             if (type == Type.script) {
127                 int scriptEnum = UScript.getCodeFromName(value);
128                 s.applyIntPropertyValue(UProperty.SCRIPT, scriptEnum);
129                 if (s.size() == 0) {
130                     emptyScripts.add(value);
131                 } else {
132                     remainingCharacters.removeAll(s);
133                 }
134             }
135         }
136         if (type == Type.script) {
137             final List<String> specialValues = Arrays.asList("Zmth", "Zsym",
138                 "Zxxx");
139             emptyScripts.removeAll(specialValues);
140             // Empty scripts can still be valid in CLDR, so this test is bogus
141             // if (!emptyScripts.isEmpty()) {
142             // errln("Remove empty scripts from $script!: " + emptyScripts);
143             // }
144             Set<String> missingScripts = new TreeSet<String>(specialValues);
145             missingScripts.removeAll(values);
146             while (remainingCharacters.size() != 0) {
147                 String first = remainingCharacters.iterator().next();
148                 int scriptEnum = UScript.getScript(first.codePointAt(0));
149                 missingScripts.add(UScript.getShortName(scriptEnum));
150                 s.applyIntPropertyValue(UProperty.SCRIPT, scriptEnum);
151                 remainingCharacters.removeAll(s);
152             }
153             if (!missingScripts.isEmpty()) {
154                 errln("Add missing scripts to $script!: " + emptyScripts);
155             }
156         }
157     }
158 
159     static final long CURRENT_YEAR = Calendar.getInstance().get(Calendar.YEAR);
160 
isOk(Type type, String value)161     private boolean isOk(Type type, String value) {
162         if (type == Type.territory) {
163             if (value.equals("QU")) {
164                 return false;
165             }
166             Map<String, String> regionInfo = StandardCodes.getLStreg()
167                 .get("region").get(value);
168             if (regionInfo == null) {
169                 errln("Region info null for " + value);
170                 return false;
171             }
172             String deprecated = regionInfo.get("Deprecated");
173             if (deprecated == null) {
174                 errln("No deprecated info for " + value);
175                 return false;
176             }
177             Matcher m = PatternCache.get("(\\d{4})-(\\d{2})-(\\d{2})").matcher(
178                 deprecated);
179             if (!m.matches()) {
180                 errln("Bad deprecated date for " + value + ", " + deprecated);
181                 return false;
182             }
183             long deprecationYear = Integer.parseInt(m.group(1));
184             if (CURRENT_YEAR - deprecationYear <= 5) {
185                 logln("Region " + value
186                     + " is deprecated but less than 5 years...");
187                 return true;
188             }
189         } else if (type == Type.language) {
190             Map<String, String> languageInfo = StandardCodes.getLStreg()
191                 .get("language").get(value);
192             if (languageInfo == null) {
193                 errln("Language info null for " + value);
194                 return false;
195             }
196             String deprecated = languageInfo.get("Deprecated");
197             if (deprecated == null) {
198                 errln("No deprecated info for " + value);
199                 return false;
200             }
201             Matcher m = PatternCache.get("(\\d{4})-(\\d{2})-(\\d{2})").matcher(
202                 deprecated);
203             if (!m.matches()) {
204                 errln("Bad deprecated date for " + value + ", " + deprecated);
205                 return false;
206             }
207             long deprecationYear = Integer.parseInt(m.group(1));
208             if (CURRENT_YEAR - deprecationYear <= 5) {
209                 logln("Language " + value
210                     + " is deprecated but less than 5 years...");
211                 return true;
212             }
213         }
214         return false;
215     }
216 }
217