1 package org.unicode.cldr.unittest; 2 3 import java.util.Arrays; 4 import java.util.Calendar; 5 import java.util.HashSet; 6 import java.util.LinkedHashSet; 7 import java.util.List; 8 import java.util.Map; 9 import java.util.Map.Entry; 10 import java.util.Set; 11 import java.util.TreeSet; 12 import java.util.regex.Matcher; 13 import java.util.regex.Pattern; 14 15 import org.unicode.cldr.util.CLDRConfig; 16 import org.unicode.cldr.util.PatternCache; 17 import org.unicode.cldr.util.StandardCodes; 18 19 import com.ibm.icu.dev.test.TestFmwk; 20 import com.ibm.icu.impl.Relation; 21 import com.ibm.icu.impl.Row.R2; 22 import com.ibm.icu.lang.UProperty; 23 import com.ibm.icu.lang.UScript; 24 import com.ibm.icu.text.UnicodeSet; 25 26 public class TestCanonicalIds extends TestFmwk { 27 28 // TODO consider whether we can pull the $variable stuff from other 29 // sources.. 30 31 static final Pattern WHITESPACE_PATTERN = PatternCache.get("\\s+"); 32 33 static CLDRConfig testInfo = CLDRConfig.getInstance(); 34 static Map<String, Map<String, R2<List<String>, String>>> aliasInfo = testInfo 35 .getSupplementalDataInfo().getLocaleAliasInfo(); 36 main(String[] args)37 public static void main(String[] args) { 38 new TestCanonicalIds().run(args); 39 } 40 TestTimezones()41 public void TestTimezones() { 42 Set<String> bcp47Canonical = new LinkedHashSet<String>(); 43 Relation<R2<String, String>, String> data = testInfo 44 .getSupplementalDataInfo().getBcp47Aliases(); 45 Map<R2<String, String>, String> deprecatedData = testInfo 46 .getSupplementalDataInfo().getBcp47Deprecated(); 47 48 // the first item in each set of aliases is the primary. 49 for (Entry<R2<String, String>, Set<String>> entry : data.keyValuesSet()) { 50 final R2<String, String> keyType = entry.getKey(); 51 if ("tz".equals(keyType.get0())) { 52 if (keyType.get1().isEmpty()) { 53 continue; 54 } 55 String deprecated = deprecatedData.get(keyType); 56 if ("true".equals(deprecated)) { 57 continue; 58 } 59 Set<String> aliases = entry.getValue(); 60 String firstAlias = aliases.iterator().next(); 61 bcp47Canonical.add(firstAlias); 62 } 63 } 64 65 // check that the metadata is up to date 66 // Not necessary any more, since the bcp47 data is used directly. 67 68 // Map<String, R2<String, String>> validityInfo = testInfo 69 // .getSupplementalDataInfo().getValidityInfo(); 70 // String timezoneItemString = validityInfo.get("$tzid").get1(); 71 // HashSet<String> variable = new LinkedHashSet<String>( 72 // Arrays.asList(WHITESPACE_PATTERN.split(timezoneItemString 73 // .trim()))); 74 // if (!variable.equals(bcp47Canonical)) { 75 // TreeSet<String> bcp47Only = new TreeSet<String>(bcp47Canonical); 76 // bcp47Only.removeAll(variable); 77 // TreeSet<String> variableOnly = new TreeSet<String>(variable); 78 // variableOnly.removeAll(bcp47Canonical); 79 // errln("Timezones: bcp47≠validity; bcp47:\t" + bcp47Only 80 // + ";\tvalidity:\t" + variableOnly); 81 // } 82 } 83 84 enum Type { 85 language, script, territory, zone 86 } 87 88 // public void TestForDeprecatedVariables() { 89 // Map<String, Map<String, R2<List<String>, String>>> aliasInfo = testInfo 90 // .getSupplementalDataInfo().getLocaleAliasInfo(); 91 // // language, script, territory, variant, zone 92 // Map<String, R2<String, String>> validityInfo = testInfo 93 // .getSupplementalDataInfo().getValidityInfo(); 94 // for (Entry<String, R2<String, String>> entry : validityInfo.entrySet()) { 95 // String key = entry.getKey(); 96 // if (key.equals("$language")) { 97 // checkItems(aliasInfo, entry, key, Type.language); 98 // } else if (key.equals("$script")) { 99 // checkItems(aliasInfo, entry, key, Type.script); 100 // } else if (key.equals("$territory")) { 101 // checkItems(aliasInfo, entry, key, Type.territory); 102 // } else if (key.equals("$tzid")) { 103 // checkItems(aliasInfo, entry, key, Type.zone); 104 // } 105 // } 106 // } 107 checkItems( Map<String, Map<String, R2<List<String>, String>>> aliasInfo, Entry<String, R2<String, String>> entry, String key, final Type type)108 private void checkItems( 109 Map<String, Map<String, R2<List<String>, String>>> aliasInfo, 110 Entry<String, R2<String, String>> entry, String key, final Type type) { 111 Map<String, R2<List<String>, String>> badMap = aliasInfo.get(type 112 .toString()); 113 final String valueString = entry.getValue().get1(); 114 HashSet<String> values = new LinkedHashSet<String>( 115 Arrays.asList(WHITESPACE_PATTERN.split(valueString.trim()))); 116 Set<String> emptyScripts = new TreeSet<String>(); 117 UnicodeSet remainingCharacters = new UnicodeSet(0, 0x10FFFF); 118 UnicodeSet s = new UnicodeSet(); 119 for (String value : values) { 120 R2<List<String>, String> replacement = badMap.get(value); 121 if (replacement != null && replacement.get1().equals("deprecated") 122 && !isOk(type, value)) { 123 errln("Deprecated value in " + key + ":\t" + value 124 + " should be " + badMap.get(value).get0()); 125 } 126 if (type == Type.script) { 127 int scriptEnum = UScript.getCodeFromName(value); 128 s.applyIntPropertyValue(UProperty.SCRIPT, scriptEnum); 129 if (s.size() == 0) { 130 emptyScripts.add(value); 131 } else { 132 remainingCharacters.removeAll(s); 133 } 134 } 135 } 136 if (type == Type.script) { 137 final List<String> specialValues = Arrays.asList("Zmth", "Zsym", 138 "Zxxx"); 139 emptyScripts.removeAll(specialValues); 140 // Empty scripts can still be valid in CLDR, so this test is bogus 141 // if (!emptyScripts.isEmpty()) { 142 // errln("Remove empty scripts from $script!: " + emptyScripts); 143 // } 144 Set<String> missingScripts = new TreeSet<String>(specialValues); 145 missingScripts.removeAll(values); 146 while (remainingCharacters.size() != 0) { 147 String first = remainingCharacters.iterator().next(); 148 int scriptEnum = UScript.getScript(first.codePointAt(0)); 149 missingScripts.add(UScript.getShortName(scriptEnum)); 150 s.applyIntPropertyValue(UProperty.SCRIPT, scriptEnum); 151 remainingCharacters.removeAll(s); 152 } 153 if (!missingScripts.isEmpty()) { 154 errln("Add missing scripts to $script!: " + emptyScripts); 155 } 156 } 157 } 158 159 static final long CURRENT_YEAR = Calendar.getInstance().get(Calendar.YEAR); 160 isOk(Type type, String value)161 private boolean isOk(Type type, String value) { 162 if (type == Type.territory) { 163 if (value.equals("QU")) { 164 return false; 165 } 166 Map<String, String> regionInfo = StandardCodes.getLStreg() 167 .get("region").get(value); 168 if (regionInfo == null) { 169 errln("Region info null for " + value); 170 return false; 171 } 172 String deprecated = regionInfo.get("Deprecated"); 173 if (deprecated == null) { 174 errln("No deprecated info for " + value); 175 return false; 176 } 177 Matcher m = PatternCache.get("(\\d{4})-(\\d{2})-(\\d{2})").matcher( 178 deprecated); 179 if (!m.matches()) { 180 errln("Bad deprecated date for " + value + ", " + deprecated); 181 return false; 182 } 183 long deprecationYear = Integer.parseInt(m.group(1)); 184 if (CURRENT_YEAR - deprecationYear <= 5) { 185 logln("Region " + value 186 + " is deprecated but less than 5 years..."); 187 return true; 188 } 189 } else if (type == Type.language) { 190 Map<String, String> languageInfo = StandardCodes.getLStreg() 191 .get("language").get(value); 192 if (languageInfo == null) { 193 errln("Language info null for " + value); 194 return false; 195 } 196 String deprecated = languageInfo.get("Deprecated"); 197 if (deprecated == null) { 198 errln("No deprecated info for " + value); 199 return false; 200 } 201 Matcher m = PatternCache.get("(\\d{4})-(\\d{2})-(\\d{2})").matcher( 202 deprecated); 203 if (!m.matches()) { 204 errln("Bad deprecated date for " + value + ", " + deprecated); 205 return false; 206 } 207 long deprecationYear = Integer.parseInt(m.group(1)); 208 if (CURRENT_YEAR - deprecationYear <= 5) { 209 logln("Language " + value 210 + " is deprecated but less than 5 years..."); 211 return true; 212 } 213 } 214 return false; 215 } 216 } 217