1 package org.unicode.cldr.unittest; 2 3 import java.util.ArrayList; 4 import java.util.Arrays; 5 import java.util.Collection; 6 import java.util.Collections; 7 import java.util.Date; 8 import java.util.EnumMap; 9 import java.util.EnumSet; 10 import java.util.HashMap; 11 import java.util.HashSet; 12 import java.util.Iterator; 13 import java.util.LinkedHashMap; 14 import java.util.LinkedHashSet; 15 import java.util.List; 16 import java.util.Locale; 17 import java.util.Map; 18 import java.util.Map.Entry; 19 import java.util.Set; 20 import java.util.TreeMap; 21 import java.util.TreeSet; 22 import java.util.regex.Matcher; 23 import java.util.regex.Pattern; 24 25 import org.unicode.cldr.draft.ScriptMetadata; 26 import org.unicode.cldr.test.CoverageLevel2; 27 import org.unicode.cldr.tool.LikelySubtags; 28 import org.unicode.cldr.tool.PluralMinimalPairs; 29 import org.unicode.cldr.tool.PluralRulesFactory; 30 import org.unicode.cldr.util.Builder; 31 import org.unicode.cldr.util.CLDRConfig; 32 import org.unicode.cldr.util.CLDRFile; 33 import org.unicode.cldr.util.CLDRFile.WinningChoice; 34 import org.unicode.cldr.util.CLDRLocale; 35 import org.unicode.cldr.util.CldrUtility; 36 import org.unicode.cldr.util.Iso639Data; 37 import org.unicode.cldr.util.Iso639Data.Scope; 38 import org.unicode.cldr.util.IsoCurrencyParser; 39 import org.unicode.cldr.util.LanguageTagCanonicalizer; 40 import org.unicode.cldr.util.LanguageTagParser; 41 import org.unicode.cldr.util.Level; 42 import org.unicode.cldr.util.Organization; 43 import org.unicode.cldr.util.Pair; 44 import org.unicode.cldr.util.PluralRanges; 45 import org.unicode.cldr.util.PreferredAndAllowedHour; 46 import org.unicode.cldr.util.PreferredAndAllowedHour.HourStyle; 47 import org.unicode.cldr.util.StandardCodes; 48 import org.unicode.cldr.util.StandardCodes.CodeType; 49 import org.unicode.cldr.util.StandardCodes.LstrType; 50 import org.unicode.cldr.util.SupplementalDataInfo; 51 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData; 52 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type; 53 import org.unicode.cldr.util.SupplementalDataInfo.ContainmentStyle; 54 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo; 55 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyNumberInfo; 56 import org.unicode.cldr.util.SupplementalDataInfo.DateRange; 57 import org.unicode.cldr.util.SupplementalDataInfo.MetaZoneRange; 58 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus; 59 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 60 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 61 import org.unicode.cldr.util.SupplementalDataInfo.PluralType; 62 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; 63 import org.unicode.cldr.util.SupplementalDataInfo.SampleList; 64 import org.unicode.cldr.util.Validity; 65 import org.unicode.cldr.util.Validity.Status; 66 67 import com.google.common.collect.ImmutableSet; 68 import com.google.common.collect.Multimap; 69 import com.google.common.collect.TreeMultimap; 70 import com.ibm.icu.dev.util.CollectionUtilities; 71 import com.ibm.icu.impl.Relation; 72 import com.ibm.icu.impl.Row; 73 import com.ibm.icu.impl.Row.R2; 74 import com.ibm.icu.impl.Row.R3; 75 import com.ibm.icu.impl.Utility; 76 import com.ibm.icu.lang.UCharacter; 77 import com.ibm.icu.lang.UCharacterEnums; 78 import com.ibm.icu.lang.UScript; 79 import com.ibm.icu.text.PluralRules; 80 import com.ibm.icu.text.PluralRules.FixedDecimal; 81 import com.ibm.icu.text.PluralRules.FixedDecimalRange; 82 import com.ibm.icu.text.PluralRules.FixedDecimalSamples; 83 import com.ibm.icu.text.PluralRules.SampleType; 84 import com.ibm.icu.text.StringTransform; 85 import com.ibm.icu.text.UnicodeSet; 86 import com.ibm.icu.util.Output; 87 import com.ibm.icu.util.TimeZone; 88 import com.ibm.icu.util.ULocale; 89 90 public class TestSupplementalInfo extends TestFmwkPlus { 91 static CLDRConfig testInfo = CLDRConfig.getInstance(); 92 93 private static final StandardCodes STANDARD_CODES = testInfo 94 .getStandardCodes(); 95 96 private static final SupplementalDataInfo SUPPLEMENTAL = testInfo 97 .getSupplementalDataInfo(); 98 main(String[] args)99 public static void main(String[] args) { 100 new TestSupplementalInfo().run(args); 101 } 102 TestPluralSampleOrder()103 public void TestPluralSampleOrder() { 104 HashSet<PluralInfo> seen = new HashSet<PluralInfo>(); 105 for (String locale : SUPPLEMENTAL.getPluralLocales()) { 106 if (locale.equals("root")) { 107 continue; 108 } 109 PluralInfo pi = SUPPLEMENTAL.getPlurals(locale); 110 if (seen.contains(pi)) { 111 continue; 112 } 113 seen.add(pi); 114 for (SampleType s : SampleType.values()) { 115 for (Count c : pi.getCounts(s)) { 116 FixedDecimalSamples sSamples = pi.getPluralRules() 117 .getDecimalSamples(c.toString(), s); 118 if (sSamples == null) { 119 errln(locale + " no sample for " + c); 120 continue; 121 } 122 if (s == SampleType.DECIMAL) { 123 continue; // skip 124 } 125 FixedDecimalRange lastSample = null; 126 for (FixedDecimalRange sample : sSamples.samples) { 127 if (lastSample != null) { 128 if (lastSample.start.compareTo(sample.start) > 0) { 129 errln(locale + ":" + c + ": out of order with " 130 + lastSample + " > " + sample); 131 } else if (false) { 132 logln(locale + ":" + c + ": in order with " 133 + lastSample + " < " + sample); 134 } 135 } 136 lastSample = sample; 137 } 138 } 139 } 140 } 141 } 142 TestPluralRanges()143 public void TestPluralRanges() { 144 PluralRulesFactory prf = PluralRulesFactory.getInstance(SUPPLEMENTAL); 145 Set<String> localesToTest = new TreeSet<String>( 146 SUPPLEMENTAL.getPluralRangesLocales()); 147 for (String locale : StandardCodes.make().getLocaleCoverageLocales( 148 "google")) { // superset 149 if (locale.equals("*") || locale.contains("_")) { 150 continue; 151 } 152 localesToTest.add(locale); 153 } 154 Set<String> modernLocales = testInfo.getStandardCodes() 155 .getLocaleCoverageLocales(Organization.cldr, 156 EnumSet.of(Level.MODERN)); 157 158 Output<FixedDecimal> maxSample = new Output<FixedDecimal>(); 159 Output<FixedDecimal> minSample = new Output<FixedDecimal>(); 160 161 for (String locale : localesToTest) { 162 final String templateLine = "Template for " + ULocale.getDisplayName(locale, "en") + " (" + locale + ") translators to fix:"; 163 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(locale); 164 Set<Count> counts = pluralInfo.getCounts(); 165 166 final PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(new ULocale(locale).toString()); 167 168 // check that there are no null values 169 PluralRanges pluralRanges = SUPPLEMENTAL.getPluralRanges(locale); 170 if (pluralRanges == null) { 171 if (!modernLocales.contains(locale)) { 172 logln("Missing plural ranges for " + locale); 173 } else { 174 errOrLog(CoverageIssue.error, locale + "\tMissing plural ranges", "Cldrbug:7839", "Missing plural data for modern locales"); 175 StringBuilder failureCases = new StringBuilder(templateLine); 176 for (Count start : counts) { 177 for (Count end : counts) { 178 pluralInfo.rangeExists(start, end, minSample, maxSample); 179 final String rangeLine = getRangeLine(start, end, null, maxSample, minSample, samplePatterns); 180 failureCases.append("\n" + locale + "\t" + rangeLine); 181 } 182 } 183 errOrLog(CoverageIssue.warn, failureCases.toString()); 184 } 185 continue; 186 } 187 EnumSet<Count> found = EnumSet.noneOf(Count.class); 188 for (Count count : Count.values()) { 189 if (pluralRanges.isExplicitlySet(count) 190 && !counts.contains(count)) { 191 assertTrue( 192 locale 193 + "\t pluralRanges categories must be valid for locale:\t" 194 + count + " must be in " + counts, 195 !pluralRanges.isExplicitlySet(count)); 196 } 197 for (Count end : Count.values()) { 198 Count result = pluralRanges.getExplicit(count, end); 199 if (result != null) { 200 found.add(result); 201 } 202 } 203 } 204 205 // check empty range results 206 if (found.isEmpty()) { 207 errOrLog(CoverageIssue.error, "Empty range results for " + locale, "Cldrbug:7839", "Missing plural data for modern locales"); 208 } else { 209 if (samplePatterns == null) { 210 errOrLog(CoverageIssue.error, locale + "\tMissing sample patterns", "Cldrbug:7839", "Missing plural data for modern locales"); 211 } else { 212 for (Count result : found) { 213 String samplePattern = samplePatterns.get( 214 PluralRules.PluralType.CARDINAL, result); 215 if (samplePattern != null && !samplePattern.contains("{0}")) { 216 errln("Plural Ranges cannot have results that don't use {0} in samples: " 217 + locale 218 + ", " 219 + result 220 + "\t«" 221 + samplePattern + "»"); 222 } 223 } 224 } 225 if (isVerbose()) { 226 logln("Range results for " + locale + ":\t" + found); 227 } 228 } 229 230 // check for missing values 231 boolean failure = false; 232 StringBuilder failureCases = new StringBuilder(templateLine); 233 for (Count start : counts) { 234 for (Count end : counts) { 235 boolean needsValue = pluralInfo.rangeExists(start, end, 236 minSample, maxSample); 237 Count explicitValue = pluralRanges.getExplicit(start, end); 238 final String rangeLine = getRangeLine(start, end, explicitValue, maxSample, minSample, samplePatterns); 239 failureCases.append("\n" + locale + "\t" + rangeLine); 240 if (needsValue && explicitValue == null) { 241 errOrLog(CoverageIssue.error, locale + "\tNo explicit value for range: " 242 + rangeLine, 243 "Cldrbug:7839", "Missing plural data for modern locales"); 244 failure = true; 245 failureCases.append("\tError — need explicit result"); 246 } else if (!needsValue && explicitValue != null) { 247 errOrLog(CoverageIssue.error, locale + "\tDoesn't need explicit value, but has one: " 248 + PluralRanges.showRange(start, end, explicitValue), 249 "Cldrbug:7839", "Missing plural data for modern locales"); 250 failureCases.append("\tUnnecessary"); 251 failure = true; 252 } else { 253 failureCases.append("\tOK"); 254 } 255 } 256 } 257 if (failure) { 258 errOrLog(CoverageIssue.warn, failureCases.toString()); 259 } 260 } 261 } 262 getRangeLine(Count start, Count end, Count result, Output<FixedDecimal> maxSample, Output<FixedDecimal> minSample, PluralMinimalPairs samplePatterns)263 private String getRangeLine(Count start, Count end, Count result, 264 Output<FixedDecimal> maxSample, Output<FixedDecimal> minSample, 265 PluralMinimalPairs samplePatterns) { 266 final String range = minSample + "–" + maxSample; 267 String example = range; 268 if (samplePatterns != null) { 269 example = ""; 270 if (result != null) { 271 String pat = samplePatterns.get(PluralRules.PluralType.CARDINAL, result); 272 example += "«" + (pat == null ? "MISSING-PATTERN" : pat.replace("{0}", range)) + "»"; 273 } else { 274 for (Count c : new TreeSet<>(Arrays.asList(start, end, Count.other))) { 275 String pat = samplePatterns.get(PluralRules.PluralType.CARDINAL, c); 276 example += c + ":«" + (pat == null ? "MISSING-PATTERN" : pat.replace("{0}", range)) + "»" + "?\tOR "; 277 } 278 example += " …"; 279 } 280 } 281 return start + "\t" + end + "\t" + (result == null ? "?" : result.toString()) + "\t" + example; 282 } 283 getRangeLine(Count count, PluralRules pluralRules, String pattern)284 private String getRangeLine(Count count, PluralRules pluralRules, String pattern) { 285 String sample = "?"; 286 FixedDecimalSamples exampleList = pluralRules.getDecimalSamples(count.toString(), PluralRules.SampleType.INTEGER); 287 if (exampleList == null) { 288 exampleList = pluralRules.getDecimalSamples(count.toString(), PluralRules.SampleType.DECIMAL); 289 } 290 FixedDecimal sampleDecimal = PluralInfo.getNonZeroSampleIfPossible(exampleList); 291 sample = sampleDecimal.toString(); 292 293 String example = pattern == null ? "NO-SAMPLE!" : "«" + pattern.replace("{0}", sample) + "»"; 294 return count + "\t" + example; 295 } 296 TestPluralSamples()297 public void TestPluralSamples() { 298 String[][] test = { { "en", "ordinal", "1", "one" }, 299 { "en", "ordinal", "2", "two" }, 300 { "en", "ordinal", "3", "few" }, 301 { "en", "ordinal", "4", "other" }, 302 { "sl", "cardinal", "2", "two" }, }; 303 for (String[] row : test) { 304 checkPluralSamples(row); 305 } 306 } 307 TestPluralSamples2()308 public void TestPluralSamples2() { 309 PluralRulesFactory prf = PluralRulesFactory.getInstance(SUPPLEMENTAL); 310 for (String locale : prf.getLocales()) { 311 if (locale.equals("und")) { 312 continue; 313 } 314 if (locale.equals("pl")) { 315 int debug = 0; 316 } 317 final PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(locale); 318 for (PluralRules.PluralType type : PluralRules.PluralType.values()) { 319 PluralInfo rules = SUPPLEMENTAL.getPlurals( 320 SupplementalDataInfo.PluralType.fromStandardType(type), 321 locale.toString()); 322 if (rules.getCounts().size() == 1) { 323 continue; // don't require rules for unary cases 324 } 325 Multimap<String, Count> sampleToCount = TreeMultimap.create(); 326 327 for (Count count : rules.getCounts()) { 328 String sample = samplePatterns.get(type, count); 329 if (sample == null) { 330 errOrLog(CoverageIssue.error, locale + "\t" + type + " \tmissing samples for " + count, "cldrbug:7075", 331 "Missing ordinal minimal pairs"); 332 } else { 333 sampleToCount.put(sample, count); 334 PluralRules pRules = rules.getPluralRules(); 335 double unique = pRules.getUniqueKeywordValue(count 336 .toString()); 337 if (unique == PluralRules.NO_UNIQUE_VALUE 338 && !sample.contains("{0}")) { 339 errln("Missing {0} in sample: " + locale + ", " + type + ", " + count + " «" + sample + "»"); 340 } 341 } 342 } 343 for (Entry<String, Collection<Count>> entry : sampleToCount.asMap().entrySet()) { 344 if (entry.getValue().size() > 1) { 345 errln("Colliding minimal pair samples: " + locale + ", " + type + ", " + entry.getValue() + " «" + entry.getKey() + "»"); 346 } 347 } 348 } 349 } 350 } 351 TestCldrScriptCodes()352 public void TestCldrScriptCodes() { 353 Set<String> codes = SUPPLEMENTAL.getCLDRScriptCodes(); 354 355 Set<String> unicodeScripts = ScriptMetadata.getScripts(); 356 assertRelation("getCLDRScriptCodes contains Unicode Scripts", true, codes, CONTAINS_ALL, unicodeScripts); 357 358 ImmutableSet<String> allSpecials = ImmutableSet.of("Zinh", "Zmth", "Zsye", "Zsym", "Zxxx", "Zyyy", "Zzzz"); 359 assertRelation("getCLDRScriptCodes contains allSpecials", true, codes, CONTAINS_ALL, allSpecials); 360 361 ImmutableSet<String> allCompos = ImmutableSet.of("Hanb", "Hrkt", "Jamo", "Jpan", "Kore"); 362 assertRelation("getCLDRScriptCodes contains allCompos", true, codes, CONTAINS_ALL, allCompos); 363 364 Map<Status, Set<String>> scripts = Validity.getInstance().getStatusToCodes(LstrType.script); 365 for (Entry<Status, Set<String>> e : scripts.entrySet()) { 366 switch (e.getKey()) { 367 case regular: 368 case special: 369 case unknown: 370 assertRelation("getCLDRScriptCodes contains " + e.getKey(), true, codes, CONTAINS_ALL, e.getValue()); 371 break; 372 default: 373 break; // do nothin 374 } 375 } 376 377 ImmutableSet<String> variants = ImmutableSet.of("Aran", "Cyrs", "Geok", "Latf", "Latg", "Syre", "Syrj", "Syrn"); 378 assertRelation("getCLDRScriptCodes contains variants", false, codes, CONTAINS_SOME, variants); 379 } 380 checkPluralSamples(String... row)381 public void checkPluralSamples(String... row) { 382 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals( 383 PluralType.valueOf(row[1]), row[0]); 384 Count count = pluralInfo.getCount(new FixedDecimal(row[2])); 385 assertEquals(CollectionUtilities.join(row, ", "), 386 Count.valueOf(row[3]), count); 387 } 388 TestPluralLocales()389 public void TestPluralLocales() { 390 // get the unique rules 391 for (PluralType type : PluralType.values()) { 392 Relation<PluralInfo, String> pluralsToLocale = Relation.of( 393 new HashMap<PluralInfo, Set<String>>(), TreeSet.class); 394 for (String locale : new TreeSet<String>( 395 SUPPLEMENTAL.getPluralLocales(type))) { 396 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(type, locale); 397 pluralsToLocale.put(pluralInfo, locale); 398 } 399 400 String[][] equivalents = { { "mo", "ro" }, { "tl", "fil" }, 401 { "he", "iw" }, { "in", "id" }, { "jw", "jv" }, 402 { "ji", "yi" }, { "sh", "sr" }, }; 403 for (Entry<PluralInfo, Set<String>> pluralInfoEntry : pluralsToLocale 404 .keyValuesSet()) { 405 PluralInfo pluralInfo2 = pluralInfoEntry.getKey(); 406 Set<String> locales = pluralInfoEntry.getValue(); 407 // check that equivalent locales are either both in or both out 408 for (String[] row : equivalents) { 409 assertEquals( 410 type + " must be equivalent: " + Arrays.asList(row), 411 locales.contains(row[0]), locales.contains(row[1])); 412 } 413 // check that no rules contain 'within' 414 for (Count count : pluralInfo2.getCounts()) { 415 String rule = pluralInfo2.getRule(count); 416 if (rule == null) { 417 continue; 418 } 419 assertFalse( 420 "Rule '" + rule + "' for " + Arrays.asList(locales) 421 + " doesn't contain 'within'", 422 rule.contains("within")); 423 } 424 } 425 } 426 } 427 TestDigitPluralCases()428 public void TestDigitPluralCases() { 429 String[][] tests = { 430 { "en", "one", "1", "1" }, 431 { "en", "one", "2", "" }, 432 { "en", "one", "3", "" }, 433 { "en", "one", "4", "" }, 434 { "en", "other", "1", "0, 2-9, 0.0, 0.1, 0.2, …" }, 435 { "en", "other", "2", "10-99, 10.0, 10.1, 10.2, …" }, 436 { "en", "other", "3", "100-999, 100.0, 100.1, 100.2, …" }, 437 { "en", "other", "4", "1000-9999, 1000.0, 1000.1, 1000.2, …" }, 438 { "hr", "one", "1", "1, 0.1, 2.10, 1.1, …" }, 439 { "hr", "one", "2", 440 "21, 31, 41, 51, 61, 71, …, 10.1, 12.10, 11.1, …" }, 441 { "hr", "one", "3", 442 "101, 121, 131, 141, 151, 161, …, 100.1, 102.10, 101.1, …" }, 443 { "hr", "one", "4", 444 "1001, 1021, 1031, 1041, 1051, 1061, …, 1000.1, 1002.10, 1001.1, …" }, 445 { "hr", "few", "1", "2-4, 0.2, 0.3, 0.4, …" }, 446 { "hr", "few", "2", 447 "22-24, 32-34, 42-44, …, 10.2, 10.3, 10.4, …" }, 448 { "hr", "few", "3", 449 "102-104, 122-124, 132-134, …, 100.2, 100.3, 100.4, …" }, 450 { "hr", "few", "4", 451 "1002-1004, 1022-1024, 1032-1034, …, 1000.2, 1000.3, 1000.4, …" }, 452 { "hr", "other", "1", "0, 5-9, 0.0, 0.5, 0.6, …" }, 453 { "hr", "other", "2", 454 "10-20, 25-30, 35-40, …, 10.0, 10.5, 10.6, …" }, 455 { "hr", "other", "3", 456 "100, 105-120, 125-130, 135-140, …, 100.0, 100.5, 100.6, …" }, 457 { "hr", "other", "4", 458 "1000, 1005-1020, 1025-1030, 1035-1040, …, 1000.0, 1000.5, 1000.6, …" }, }; 459 for (String[] row : tests) { 460 PluralInfo plurals = SUPPLEMENTAL.getPlurals(row[0]); 461 SampleList uset = plurals.getSamples9999(Count.valueOf(row[1]), 462 Integer.parseInt(row[2])); 463 assertEquals(row[0] + ", " + row[1] + ", " + row[2], row[3], 464 uset.toString()); 465 } 466 } 467 TestDigitPluralCompleteness()468 public void TestDigitPluralCompleteness() { 469 String[][] exceptionStrings = { 470 // defaults 471 { "*", "zero", "0,00,000,0000" }, { "*", "one", "0" }, 472 { "*", "two", "0,00,000,0000" }, 473 { "*", "few", "0,00,000,0000" }, 474 { "*", "many", "0,00,000,0000" }, 475 { "*", "other", "0,00,000,0000" }, 476 // others 477 { "mo", "other", "00,000,0000" }, // 478 { "ro", "other", "00,000,0000" }, // 479 { "cs", "few", "0" }, // j in 2..4 480 { "sk", "few", "0" }, // j in 2..4 481 { "da", "one", "0" }, // j is 1 or t is not 0 and n within 0..2 482 { "is", "one", "0,00,000,0000" }, // j is 1 or f is 1 483 { "sv", "one", "0" }, // j is 1 484 { "he", "two", "0" }, // j is 2 485 { "ru", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100 486 // is not 11 487 { "uk", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100 488 // is not 11 489 { "bs", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100 490 // is not 11 or f mod 10 is 491 // 1 and f mod 100 is not 11 492 { "hr", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100 493 // is not 11 or f mod 10 is 494 // 1 and f mod 100 is not 11 495 { "sh", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100 496 // is not 11 or f mod 10 is 497 // 1 and f mod 100 is not 11 498 { "sr", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100 499 // is not 11 or f mod 10 is 500 // 1 and f mod 100 is not 11 501 { "mk", "one", "0,00,000,0000" }, // j mod 10 is 1 or f mod 10 502 // is 1 503 { "sl", "one", "0,000,0000" }, // j mod 100 is 1 504 { "sl", "two", "0,000,0000" }, // j mod 100 is 2 505 { "he", "many", "00,000,0000" }, // j not in 0..10 and j mod 10 506 // is 0 507 { "tzm", "one", "0,00" }, // n in 0..1 or n in 11..99 508 { "gd", "one", "0,00" }, // n in 1,11 509 { "gd", "two", "0,00" }, // n in 2,12 510 { "shi", "few", "0,00" }, // n in 2..10 511 { "gd", "few", "0,00" }, // n in 3..10,13..19 512 { "ga", "few", "0" }, // n in 3..6 513 { "ga", "many", "0,00" }, // n in 7..10 514 { "ar", "zero", "0" }, // n is 0 515 { "cy", "zero", "0" }, // n is 0 516 { "ksh", "zero", "0" }, // n is 0 517 { "lag", "zero", "0" }, // n is 0 518 { "pt", "one", "0" }, // i = 1 and v = 0 or i = 0 and t = 1 519 { "pt_PT", "one", "0" }, // n = 1 and v = 0 520 { "ar", "two", "0" }, // n is 2 521 { "cy", "two", "0" }, // n is 2 522 { "ga", "two", "0" }, // n is 2 523 { "iu", "two", "0" }, // n is 2 524 { "kw", "two", "0" }, // n is 2 525 { "naq", "two", "0" }, // n is 2 526 { "se", "two", "0" }, // n is 2 527 { "sma", "two", "0" }, // n is 2 528 { "smi", "two", "0" }, // n is 2 529 { "smj", "two", "0" }, // n is 2 530 { "smn", "two", "0" }, // n is 2 531 { "sms", "two", "0" }, // n is 2 532 { "cy", "few", "0" }, // n is 3 533 { "cy", "many", "0" }, // n is 6 534 { "br", "many", "" }, // n is not 0 and n mod 1000000 is 0 535 { "gv", "one", "0,00,000,0000" }, // n mod 10 is 1 536 { "be", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100 537 // is not 11 538 { "lv", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100 539 // is not 11 or v is 2 and f 540 // mod 10 is 1 and f mod 100 541 // is not 11 or v is not 2 542 // and f mod 10 is 1 543 { "br", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100 544 // not in 11,71,91 545 { "lt", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100 546 // not in 11..19 547 { "fil", "one", "0,00,000,0000" }, // v = 0 and i = 1,2,3 or v = 548 // 0 and i % 10 != 4,6,9 or 549 // v != 0 and f % 10 != 550 // 4,6,9 551 { "tl", "one", "0,00,000,0000" }, // v = 0 and i = 1,2,3 or v = 552 // 0 and i % 10 != 4,6,9 or 553 // v != 0 and f % 10 != 554 // 4,6,9 555 { "dsb", "one", "0,00,000,0000" }, // v = 0 and i % 100 = 1 or f 556 // % 100 = 1 557 }; 558 // parse out the exceptions 559 Map<PluralInfo, Relation<Count, Integer>> exceptions = new HashMap<PluralInfo, Relation<Count, Integer>>(); 560 Relation<Count, Integer> fallback = Relation.of( 561 new EnumMap<Count, Set<Integer>>(Count.class), TreeSet.class); 562 for (String[] row : exceptionStrings) { 563 Relation<Count, Integer> countToDigits; 564 if (row[0].equals("*")) { 565 countToDigits = fallback; 566 } else { 567 PluralInfo plurals = SUPPLEMENTAL.getPlurals(row[0]); 568 countToDigits = exceptions.get(plurals); 569 if (countToDigits == null) { 570 exceptions.put( 571 plurals, 572 countToDigits = Relation.of( 573 new EnumMap<Count, Set<Integer>>( 574 Count.class), 575 TreeSet.class)); 576 } 577 } 578 Count c = Count.valueOf(row[1]); 579 for (String digit : row[2].split(",")) { 580 // "99" is special, just to have the result be non-empty 581 countToDigits.put(c, digit.length()); 582 } 583 } 584 Set<PluralInfo> seen = new HashSet<PluralInfo>(); 585 Set<String> sorted = new TreeSet<String>( 586 SUPPLEMENTAL.getPluralLocales(PluralType.cardinal)); 587 Relation<String, String> ruleToExceptions = Relation.of( 588 new TreeMap<String, Set<String>>(), TreeSet.class); 589 590 for (String locale : sorted) { 591 PluralInfo plurals = SUPPLEMENTAL.getPlurals(locale); 592 if (seen.contains(plurals)) { // skip identicals 593 continue; 594 } 595 Relation<Count, Integer> countToDigits = exceptions.get(plurals); 596 if (countToDigits == null) { 597 countToDigits = fallback; 598 } 599 for (Count c : plurals.getCounts()) { 600 List<String> compose = new ArrayList<String>(); 601 boolean needLine = false; 602 Set<Integer> digitSet = countToDigits.get(c); 603 if (digitSet == null) { 604 digitSet = fallback.get(c); 605 } 606 for (int digits = 1; digits < 5; ++digits) { 607 boolean expected = digitSet.contains(digits); 608 boolean hasSamples = plurals.hasSamples(c, digits); 609 if (hasSamples) { 610 compose.add(Utility.repeat("0", digits)); 611 } 612 if (!assertEquals(locale + ", " + digits + ", " + c, 613 expected, hasSamples)) { 614 needLine = true; 615 } 616 } 617 if (needLine) { 618 String countRules = plurals.getPluralRules().getRules( 619 c.toString()); 620 ruleToExceptions.put(countRules == null ? "" : countRules, 621 "{\"" + locale + "\", \"" + c + "\", \"" 622 + CollectionUtilities.join(compose, ",") 623 + "\"},"); 624 } 625 } 626 } 627 if (!ruleToExceptions.isEmpty()) { 628 System.out 629 .println("To fix the above, review the following, then replace in TestDigitPluralCompleteness"); 630 for (Entry<String, String> entry : ruleToExceptions.entrySet()) { 631 System.out.println(entry.getValue() + "\t// " + entry.getKey()); 632 } 633 } 634 } 635 TestLikelyCode()636 public void TestLikelyCode() { 637 Map<String, String> likely = SUPPLEMENTAL.getLikelySubtags(); 638 String[][] tests = { { "it_AQ", "it_Latn_AQ" }, 639 { "it_Arab", "it_Arab_IT" }, { "az_Cyrl", "az_Cyrl_AZ" }, }; 640 for (String[] pair : tests) { 641 String newMax = LikelySubtags.maximize(pair[0], likely); 642 assertEquals("Likely", pair[1], newMax); 643 } 644 645 } 646 TestLikelySubtagCompleteness()647 public void TestLikelySubtagCompleteness() { 648 Map<String, String> likely = SUPPLEMENTAL.getLikelySubtags(); 649 650 for (String language : SUPPLEMENTAL.getCLDRLanguageCodes()) { 651 if (!likely.containsKey(language)) { 652 logln("WARNING: No likely subtag for CLDR language code (" 653 + language + ")"); 654 } 655 } 656 for (String script : SUPPLEMENTAL.getCLDRScriptCodes()) { 657 if (!likely.containsKey("und_" + script) 658 && !script.equals("Latn") 659 && !script.equals("Zinh") 660 && !script.equals("Zyyy") 661 && ScriptMetadata.getInfo(script) != null 662 && ScriptMetadata.getInfo(script).idUsage != ScriptMetadata.IdUsage.EXCLUSION 663 && ScriptMetadata.getInfo(script).idUsage != ScriptMetadata.IdUsage.UNKNOWN) { 664 errln("No likely subtag for CLDR script code (und_" + script 665 + ")"); 666 } 667 } 668 669 } 670 TestEquivalentLocales()671 public void TestEquivalentLocales() { 672 Set<Set<String>> seen = new HashSet<Set<String>>(); 673 Set<String> toTest = new TreeSet<String>(testInfo.getCldrFactory() 674 .getAvailable()); 675 toTest.addAll(SUPPLEMENTAL.getLikelySubtags().keySet()); 676 toTest.addAll(SUPPLEMENTAL.getLikelySubtags().values()); 677 toTest.addAll(SUPPLEMENTAL.getDefaultContentLocales()); 678 LanguageTagParser ltp = new LanguageTagParser(); 679 main: for (String locale : toTest) { 680 if (locale.startsWith("und") || locale.equals("root")) { 681 continue; 682 } 683 Set<String> s = SUPPLEMENTAL.getEquivalentsForLocale(locale); 684 if (seen.contains(s)) { 685 continue; 686 } 687 // System.out.println(s + " => " + VettingViewer.gatherCodes(s)); 688 689 List<String> ss = new ArrayList<String>(s); 690 String last = ss.get(ss.size() - 1); 691 ltp.set(last); 692 if (!ltp.getVariants().isEmpty() || !ltp.getExtensions().isEmpty()) { 693 continue; // skip variants for now. 694 } 695 String language = ltp.getLanguage(); 696 String script = ltp.getScript(); 697 String region = ltp.getRegion(); 698 if (!script.isEmpty() && !region.isEmpty()) { 699 String noScript = ltp.setScript("").toString(); 700 String noRegion = ltp.setScript(script).setRegion("") 701 .toString(); 702 switch (s.size()) { 703 case 1: // ok if already maximized and strange script/country, 704 // eg it_Arab_JA 705 continue main; 706 case 2: // ok if adds default country/script, eg {en_Cyrl, 707 // en_Cyrl_US} or {en_GB, en_Latn_GB} 708 String first = ss.get(0); 709 if (first.equals(noScript) || first.equals(noRegion)) { 710 continue main; 711 } 712 break; 713 case 3: // ok if different script in different country, eg 714 // {az_IR, az_Arab, az_Arab_IR} 715 if (noScript.equals(ss.get(0)) 716 && noRegion.equals(ss.get(1))) { 717 continue main; 718 } 719 break; 720 case 4: // ok if all combinations, eg {en, en_US, en_Latn, 721 // en_Latn_US} 722 if (language.equals(ss.get(0)) 723 && noScript.equals(ss.get(1)) 724 && noRegion.equals(ss.get(2))) { 725 continue main; 726 } 727 break; 728 } 729 } 730 errln("Strange size or composition:\t" + s + " \t" 731 + showLocaleParts(s)); 732 seen.add(s); 733 } 734 } 735 showLocaleParts(Set<String> s)736 private String showLocaleParts(Set<String> s) { 737 LanguageTagParser ltp = new LanguageTagParser(); 738 Set<String> b = new LinkedHashSet<String>(); 739 for (String ss : s) { 740 ltp.set(ss); 741 addName(CLDRFile.LANGUAGE_NAME, ltp.getLanguage(), b); 742 addName(CLDRFile.SCRIPT_NAME, ltp.getScript(), b); 743 addName(CLDRFile.TERRITORY_NAME, ltp.getRegion(), b); 744 } 745 return CollectionUtilities.join(b, "; "); 746 } 747 addName(int languageName, String code, Set<String> b)748 private void addName(int languageName, String code, Set<String> b) { 749 if (code.isEmpty()) { 750 return; 751 } 752 String name = testInfo.getEnglish().getName(languageName, code); 753 if (!code.equals(name)) { 754 b.add(code + "=" + name); 755 } 756 } 757 TestDefaultScriptCompleteness()758 public void TestDefaultScriptCompleteness() { 759 Relation<String, String> scriptToBase = Relation.of( 760 new LinkedHashMap<String, Set<String>>(), TreeSet.class); 761 main: for (String locale : testInfo.getCldrFactory() 762 .getAvailableLanguages()) { 763 if (!locale.contains("_") && !"root".equals(locale)) { 764 String defaultScript = SUPPLEMENTAL.getDefaultScript(locale); 765 if (defaultScript != null) { 766 continue; 767 } 768 CLDRFile cldrFile = testInfo.getCLDRFile(locale, 769 false); 770 UnicodeSet set = cldrFile.getExemplarSet("", 771 WinningChoice.NORMAL); 772 for (String s : set) { 773 int script = UScript.getScript(s.codePointAt(0)); 774 if (script != UScript.UNKNOWN && script != UScript.COMMON 775 && script != UScript.INHERITED) { 776 scriptToBase.put(UScript.getShortName(script), locale); 777 continue main; 778 } 779 } 780 scriptToBase.put(UScript.getShortName(UScript.UNKNOWN), locale); 781 } 782 } 783 if (scriptToBase.size() != 0) { 784 for (Entry<String, Set<String>> entry : scriptToBase.keyValuesSet()) { 785 errln("Default Scripts missing:\t" + entry.getKey() + "\t" 786 + entry.getValue()); 787 } 788 } 789 } 790 TestTimeData()791 public void TestTimeData() { 792 Map<String, PreferredAndAllowedHour> timeData = SUPPLEMENTAL 793 .getTimeData(); 794 Set<String> regionsSoFar = new HashSet<String>(); 795 Set<String> current24only = new HashSet<String>(); 796 Set<String> current12preferred = new HashSet<String>(); 797 798 boolean haveWorld = false; 799 800 ImmutableSet<HourStyle> oldSchool = ImmutableSet.copyOf(EnumSet.of(HourStyle.H, HourStyle.h, HourStyle.K, HourStyle.k)); 801 802 for (Entry<String, PreferredAndAllowedHour> e : timeData.entrySet()) { 803 String region = e.getKey(); 804 if (region.equals("001")) { 805 haveWorld = true; 806 } 807 regionsSoFar.add(region); 808 PreferredAndAllowedHour preferredAndAllowedHour = e.getValue(); 809 assertNotNull("Preferred must not be null", preferredAndAllowedHour.preferred); 810 811 // find first h or H 812 HourStyle found = null; 813 814 for (HourStyle item : preferredAndAllowedHour.allowed) { 815 if (oldSchool.contains(item)) { 816 found = item; 817 if (item != preferredAndAllowedHour.preferred) { 818 String message = "Inconsistent values for " + region + ": preferred=" + preferredAndAllowedHour.preferred 819 + " but that isn't the first " + oldSchool + " in allowed: " + preferredAndAllowedHour.allowed; 820 if (!logKnownIssue("cldrbug:11448", message)) { 821 errln(message); 822 } 823 } 824 break; 825 } 826 } 827 if (found == null) { 828 errln(region + ": preferred " + preferredAndAllowedHour.preferred 829 + " not in " + preferredAndAllowedHour.allowed); 830 } 831 // final HourStyle firstAllowed = preferredAndAllowedHour.allowed.iterator().next(); 832 // if (preferredAndAllowedHour.preferred == HourStyle.H && firstAllowed == HourStyle.h 833 // || preferredAndAllowedHour.preferred == HourStyle.H && firstAllowed == HourStyle.hb 834 // || preferredAndAllowedHour.preferred == HourStyle.h && firstAllowed == HourStyle.H) { 835 // errln(region + ": allowed " + preferredAndAllowedHour.allowed 836 // + " starts with preferred " + preferredAndAllowedHour.preferred); 837 // } else if (isVerbose()) { 838 // logln(region + ": allowed " + preferredAndAllowedHour.allowed 839 // + " starts with preferred " + preferredAndAllowedHour.preferred); 840 // } 841 // for (HourStyle c : preferredAndAllowedHour.allowed) { 842 // if (!PreferredAndAllowedHour.HOURS.contains(c)) { 843 // errln(region + ": illegal character in " + 844 // preferredAndAllowedHour.allowed + ". It contains " + c 845 // + " which is not in " + PreferredAndAllowedHour.HOURS); 846 // } 847 // } 848 if (!preferredAndAllowedHour.allowed.contains(HourStyle.h) 849 && !preferredAndAllowedHour.allowed.contains(HourStyle.hb)) { 850 current24only.add(region); 851 } 852 if (preferredAndAllowedHour.preferred == HourStyle.h) { 853 current12preferred.add(region); 854 } 855 } 856 Set<String> missing = new TreeSet<String>( 857 STANDARD_CODES.getGoodAvailableCodes(CodeType.territory)); 858 missing.removeAll(regionsSoFar); 859 for (Iterator<String> it = missing.iterator(); it.hasNext();) { 860 if (!StandardCodes.isCountry(it.next())) { 861 it.remove(); 862 } 863 } 864 865 // if we don't have 001, then we can't miss any regions 866 if (!missing.isEmpty()) { 867 if (haveWorld) { 868 logln("Implicit regions: " + missing); 869 } else { 870 errln("Missing regions: " + missing); 871 } 872 } 873 874 // The feedback gathered from our translators is that the following use 875 // 24 hour time ONLY: 876 Set<String> only24lang = new TreeSet<String>( 877 Arrays.asList(("sq, br, bu, ca, hr, cs, da, de, nl, et, eu, fi, " 878 + "fr, gl, he, is, id, it, nb, pt, ro, ru, sr, sk, sl, sv, tr, hy") 879 .split(",\\s*"))); 880 // With the new preferences, this is changed 881 Set<String> only24region = new TreeSet<String>(); 882 Set<String> either24or12region = new TreeSet<String>(); 883 884 // get all countries where official or de-facto official 885 // add them two one of two lists, based on the above list of languages 886 for (String language : SUPPLEMENTAL 887 .getLanguagesForTerritoriesPopulationData()) { 888 boolean a24lang = only24lang.contains(language); 889 for (String region : SUPPLEMENTAL 890 .getTerritoriesForPopulationData(language)) { 891 PopulationData pop = SUPPLEMENTAL 892 .getLanguageAndTerritoryPopulationData(language, region); 893 if (pop.getOfficialStatus().compareTo( 894 OfficialStatus.de_facto_official) < 0) { 895 continue; 896 } 897 if (a24lang) { 898 only24region.add(region); 899 } else { 900 either24or12region.add(region); 901 } 902 } 903 } 904 // if we have a case like CA, where en uses 12/24 but fr uses 24, remove 905 // it for safety 906 only24region.removeAll(either24or12region); 907 // There are always exceptions... Remove VA (Vatican), since it allows 12/24 908 // but the de facto langauge is Italian. 909 only24region.remove("VA"); 910 // also remove all the regions where 'h' is preferred 911 only24region.removeAll(current12preferred); 912 // now verify 913 if (!current24only.containsAll(only24region)) { 914 Set<String> missing24only = new TreeSet<String>(only24region); 915 missing24only.removeAll(current24only); 916 917 errln("24-hour-only doesn't include needed items:\n" 918 + " add " 919 + CldrUtility.join(missing24only, " ") 920 + "\n\t\t" 921 + CldrUtility.join(missing24only, "\n\t\t", 922 new NameCodeTransform(testInfo.getEnglish(), 923 CLDRFile.TERRITORY_NAME))); 924 } 925 } 926 927 public static class NameCodeTransform implements StringTransform { 928 private final CLDRFile file; 929 private final int codeType; 930 NameCodeTransform(CLDRFile file, int code)931 public NameCodeTransform(CLDRFile file, int code) { 932 this.file = file; 933 this.codeType = code; 934 } 935 936 @Override transform(String code)937 public String transform(String code) { 938 return file.getName(codeType, code) + " [" + code + "]"; 939 } 940 } 941 TestAliases()942 public void TestAliases() { 943 testInfo.getStandardCodes(); 944 Map<String, Map<String, Map<String, String>>> bcp47Data = StandardCodes 945 .getLStreg(); 946 Map<String, Map<String, R2<List<String>, String>>> aliases = SUPPLEMENTAL 947 .getLocaleAliasInfo(); 948 949 for (Entry<String, Map<String, R2<List<String>, String>>> typeMap : aliases 950 .entrySet()) { 951 String type = typeMap.getKey(); 952 Map<String, R2<List<String>, String>> codeReplacement = typeMap 953 .getValue(); 954 955 Map<String, Map<String, String>> bcp47DataTypeData = bcp47Data 956 .get(type.equals("territory") ? "region" : type); 957 if (bcp47DataTypeData == null) { 958 logln("skipping BCP47 test for " + type); 959 } else { 960 for (Entry<String, Map<String, String>> codeData : bcp47DataTypeData 961 .entrySet()) { 962 String code = codeData.getKey(); 963 if (codeReplacement.containsKey(code) 964 || codeReplacement.containsKey(code 965 .toUpperCase(Locale.ENGLISH))) { 966 continue; 967 // TODO, check the value 968 } 969 Map<String, String> data = codeData.getValue(); 970 if (data.containsKey("Deprecated") 971 && SUPPLEMENTAL.getCLDRLanguageCodes().contains( 972 code)) { 973 errln("supplementalMetadata.xml: alias is missing <languageAlias type=\"" 974 + code + "\" ... /> " + "\t" + data); 975 } 976 } 977 } 978 979 Set<R3<String, List<String>, List<String>>> failures = new TreeSet<R3<String, List<String>, List<String>>>(); 980 Set<String> nullReplacements = new TreeSet<String>(); 981 for (Entry<String, R2<List<String>, String>> codeRep : codeReplacement 982 .entrySet()) { 983 String code = codeRep.getKey(); 984 List<String> replacements = codeRep.getValue().get0(); 985 if (replacements == null) { 986 nullReplacements.add(code); 987 continue; 988 } 989 Set<String> fixedReplacements = new LinkedHashSet<String>(); 990 for (String replacement : replacements) { 991 R2<List<String>, String> newReplacement = codeReplacement 992 .get(replacement); 993 if (newReplacement != null) { 994 List<String> list = newReplacement.get0(); 995 if (list != null) { 996 fixedReplacements.addAll(list); 997 } 998 } else { 999 fixedReplacements.add(replacement); 1000 } 1001 } 1002 List<String> fixedList = new ArrayList<String>( 1003 fixedReplacements); 1004 if (!replacements.equals(fixedList)) { 1005 R3<String, List<String>, List<String>> row = Row.of(code, 1006 replacements, fixedList); 1007 System.out.println(row.toString()); 1008 failures.add(row); 1009 } 1010 } 1011 1012 if (failures.size() != 0) { 1013 for (R3<String, List<String>, List<String>> item : failures) { 1014 String code = item.get0(); 1015 List<String> oldReplacement = item.get1(); 1016 List<String> newReplacement = item.get2(); 1017 1018 errln(code + "\t=>\t" + oldReplacement + "\tshould be:\n\t" 1019 + "<" + type + "Alias type=\"" + code 1020 + "\" replacement=\"" 1021 + CollectionUtilities.join(newReplacement, " ") 1022 + "\" reason=\"XXX\"/> <!-- YYY -->\n"); 1023 } 1024 } 1025 if (nullReplacements.size() != 0) { 1026 logln("No Replacements\t" + type + "\t" + nullReplacements); 1027 } 1028 } 1029 } 1030 1031 static final List<String> oldRegions = Arrays 1032 .asList("NT, YD, QU, SU, DD, FX, ZR, AN, BU, TP, CS, YU" 1033 .split(", ")); 1034 TestTerritoryContainment()1035 public void TestTerritoryContainment() { 1036 Relation<String, String> map = SUPPLEMENTAL 1037 .getTerritoryToContained(ContainmentStyle.all); 1038 Relation<String, String> mapCore = SUPPLEMENTAL.getContainmentCore(); 1039 Set<String> mapItems = new LinkedHashSet<String>(); 1040 // get all the items 1041 for (String item : map.keySet()) { 1042 mapItems.add(item); 1043 mapItems.addAll(map.getAll(item)); 1044 } 1045 Map<String, Map<String, String>> bcp47RegionData = StandardCodes 1046 .getLStreg().get("region"); 1047 1048 // verify that all regions are covered 1049 Set<String> bcp47Regions = new LinkedHashSet<String>( 1050 bcp47RegionData.keySet()); 1051 bcp47Regions.remove("ZZ"); // We don't care about ZZ since it is the 1052 // unknown region... 1053 for (Iterator<String> it = bcp47Regions.iterator(); it.hasNext();) { 1054 String region = it.next(); 1055 Map<String, String> data = bcp47RegionData.get(region); 1056 if (data.containsKey("Deprecated")) { 1057 logln("Removing deprecated " + region); 1058 it.remove(); 1059 } 1060 if ("Private use".equals(data.get("Description"))) { 1061 it.remove(); 1062 } 1063 } 1064 1065 if (!mapItems.equals(bcp47Regions)) { 1066 mapItems.removeAll(oldRegions); 1067 errlnDiff("containment items not in bcp47 regions: ", mapItems, 1068 bcp47Regions); 1069 errlnDiff("bcp47 regions not in containment items: ", bcp47Regions, 1070 mapItems); 1071 } 1072 1073 // verify that everything in the containment core can be reached 1074 // downwards from 001. 1075 1076 Map<String, Integer> from001 = getRecursiveContainment("001", map, 1077 new LinkedHashMap<String, Integer>(), 1); 1078 from001.put("001", 0); 1079 Set<String> keySet = from001.keySet(); 1080 for (String region : keySet) { 1081 logln(Utility.repeat("\t", from001.get(region)) + "\t" + region 1082 + "\t" + getRegionName(region)); 1083 } 1084 1085 // Populate mapItems with the core containment 1086 mapItems.clear(); 1087 for (String item : mapCore.keySet()) { 1088 mapItems.add(item); 1089 mapItems.addAll(mapCore.getAll(item)); 1090 } 1091 1092 if (!mapItems.equals(keySet)) { 1093 errlnDiff( 1094 "containment core items that can't be reached from 001: ", 1095 mapItems, keySet); 1096 } 1097 } 1098 errlnDiff(String title, Set<String> mapItems, Set<String> keySet)1099 private void errlnDiff(String title, Set<String> mapItems, 1100 Set<String> keySet) { 1101 Set<String> diff = new LinkedHashSet<String>(mapItems); 1102 diff.removeAll(keySet); 1103 if (diff.size() != 0) { 1104 errln(title + diff); 1105 } 1106 } 1107 getRegionName(String region)1108 private String getRegionName(String region) { 1109 return testInfo.getEnglish().getName(CLDRFile.TERRITORY_NAME, region); 1110 } 1111 getRecursiveContainment(String region, Relation<String, String> map, Map<String, Integer> result, int depth)1112 private Map<String, Integer> getRecursiveContainment(String region, 1113 Relation<String, String> map, Map<String, Integer> result, int depth) { 1114 Set<String> contained = map.getAll(region); 1115 if (contained == null) { 1116 return result; 1117 } 1118 for (String item : contained) { 1119 if (result.containsKey(item)) { 1120 logln("Duplicate containment " + item + "\t" 1121 + getRegionName(item)); 1122 continue; 1123 } 1124 result.put(item, depth); 1125 getRecursiveContainment(item, map, result, depth + 1); 1126 } 1127 return result; 1128 } 1129 TestMacrolanguages()1130 public void TestMacrolanguages() { 1131 Set<String> languageCodes = STANDARD_CODES 1132 .getAvailableCodes("language"); 1133 Map<String, Map<String, R2<List<String>, String>>> typeToTagToReplacement = SUPPLEMENTAL 1134 .getLocaleAliasInfo(); 1135 Map<String, R2<List<String>, String>> tagToReplacement = typeToTagToReplacement 1136 .get("language"); 1137 1138 Relation<String, String> replacementToReplaced = Relation.of( 1139 new TreeMap<String, Set<String>>(), TreeSet.class); 1140 for (String language : tagToReplacement.keySet()) { 1141 List<String> replacements = tagToReplacement.get(language).get0(); 1142 if (replacements != null) { 1143 replacementToReplaced.putAll(replacements, language); 1144 } 1145 } 1146 replacementToReplaced.freeze(); 1147 1148 Map<String, Map<String, Map<String, String>>> lstreg = StandardCodes 1149 .getLStreg(); 1150 Map<String, Map<String, String>> lstregLanguageInfo = lstreg 1151 .get("language"); 1152 1153 Relation<Scope, String> scopeToCodes = Relation.of( 1154 new TreeMap<Scope, Set<String>>(), TreeSet.class); 1155 // the invariant is that every macrolanguage has exactly 1 encompassed 1156 // language that maps to it 1157 1158 main: for (String language : Builder.with(new TreeSet<String>()) 1159 .addAll(languageCodes).addAll(Iso639Data.getAvailable()).get()) { 1160 if (language.equals("no") || language.equals("sh")) 1161 continue; // special cases 1162 Scope languageScope = getScope(language, lstregLanguageInfo); 1163 if (languageScope == Scope.Macrolanguage) { 1164 if (Iso639Data.getHeirarchy(language) != null) { 1165 continue main; // is real family 1166 } 1167 Set<String> replacements = replacementToReplaced 1168 .getAll(language); 1169 if (replacements == null || replacements.size() == 0) { 1170 scopeToCodes.put(languageScope, language); 1171 } else { 1172 // it still might be bad, if we don't have a mapping to a 1173 // regular language 1174 for (String replacement : replacements) { 1175 Scope replacementScope = getScope(replacement, 1176 lstregLanguageInfo); 1177 if (replacementScope == Scope.Individual) { 1178 continue main; 1179 } 1180 } 1181 scopeToCodes.put(languageScope, language); 1182 } 1183 } 1184 } 1185 // now show the items we found 1186 for (Scope scope : scopeToCodes.keySet()) { 1187 for (String language : scopeToCodes.getAll(scope)) { 1188 String name = testInfo.getEnglish().getName(language); 1189 if (name == null || name.equals(language)) { 1190 Set<String> set = Iso639Data.getNames(language); 1191 if (set != null) { 1192 name = set.iterator().next(); 1193 } else { 1194 Map<String, String> languageInfo = lstregLanguageInfo 1195 .get(language); 1196 if (languageInfo != null) { 1197 name = languageInfo.get("Description"); 1198 } 1199 } 1200 } 1201 errln(scope + "\t" + language + "\t" + name + "\t" 1202 + Iso639Data.getType(language)); 1203 } 1204 } 1205 } 1206 getScope(String language, Map<String, Map<String, String>> lstregLanguageInfo)1207 private Scope getScope(String language, 1208 Map<String, Map<String, String>> lstregLanguageInfo) { 1209 Scope languageScope = Iso639Data.getScope(language); 1210 Map<String, String> languageInfo = lstregLanguageInfo.get(language); 1211 if (languageInfo == null) { 1212 // System.out.println("Couldn't get lstreg info for " + language); 1213 } else { 1214 String lstregScope = languageInfo.get("Scope"); 1215 if (lstregScope != null) { 1216 Scope scope2 = Scope.fromString(lstregScope); 1217 if (languageScope != scope2) { 1218 // System.out.println("Mismatch in scope between LSTR and ISO 639:\t" 1219 // + scope2 + "\t" + 1220 // languageScope); 1221 languageScope = scope2; 1222 } 1223 } 1224 } 1225 return languageScope; 1226 } 1227 1228 static final boolean LOCALES_FIXED = true; 1229 TestPopulation()1230 public void TestPopulation() { 1231 Set<String> languages = SUPPLEMENTAL 1232 .getLanguagesForTerritoriesPopulationData(); 1233 Relation<String, String> baseToLanguages = Relation.of( 1234 new TreeMap<String, Set<String>>(), TreeSet.class); 1235 LanguageTagParser ltp = new LanguageTagParser(); 1236 LanguageTagCanonicalizer ltc = new LanguageTagCanonicalizer(false); 1237 1238 for (String language : languages) { 1239 if (LOCALES_FIXED) { 1240 String canonicalForm = ltc.transform(language); 1241 if (!assertEquals("Canonical form", canonicalForm, language)) { 1242 int debug = 0; 1243 } 1244 } 1245 1246 String base = ltp.set(language).getLanguage(); 1247 String script = ltp.getScript(); 1248 baseToLanguages.put(base, language); 1249 1250 // add basic data, basically just for wo! 1251 // if there are primary scripts, they must include script (if not 1252 // empty) 1253 Set<String> primaryScripts = Collections.emptySet(); 1254 Map<Type, BasicLanguageData> basicData = SUPPLEMENTAL 1255 .getBasicLanguageDataMap(base); 1256 if (basicData != null) { 1257 BasicLanguageData s = basicData 1258 .get(BasicLanguageData.Type.primary); 1259 if (s != null) { 1260 primaryScripts = s.getScripts(); 1261 } 1262 } 1263 1264 // do some consistency tests; if there is a script, it must be in 1265 // primaryScripts 1266 if (!script.isEmpty() && !primaryScripts.contains(script)) { 1267 errln(base + ": Script found in territory data (" + script 1268 + ") is not in primary scripts :\t" + primaryScripts); 1269 } 1270 1271 // if there are multiple primary scripts, they will be in 1272 // baseToLanguages 1273 if (primaryScripts.size() > 1) { 1274 for (String script2 : primaryScripts) { 1275 baseToLanguages.put(base, base + "_" + script2); 1276 } 1277 } 1278 } 1279 1280 if (!LOCALES_FIXED) { 1281 // the invariants are that if we have a base, we must not have a script. 1282 // and if we don't have a base, we must have two items 1283 for (String base : baseToLanguages.keySet()) { 1284 Set<String> languagesForBase = baseToLanguages.getAll(base); 1285 if (languagesForBase.contains(base)) { 1286 if (languagesForBase.size() > 1) { 1287 errln("Cannot have base alone with other scripts:\t" 1288 + languagesForBase); 1289 } 1290 } else { 1291 if (languagesForBase.size() == 1) { 1292 errln("Cannot have only one script for language:\t" 1293 + languagesForBase); 1294 } 1295 } 1296 } 1297 } 1298 } 1299 TestCompleteness()1300 public void TestCompleteness() { 1301 if (SUPPLEMENTAL.getSkippedElements().size() > 0) { 1302 logln("SupplementalDataInfo API doesn't support: " 1303 + SUPPLEMENTAL.getSkippedElements().toString()); 1304 } 1305 } 1306 1307 // these are settings for exceptional cases we want to allow 1308 private static final Set<String> EXCEPTION_CURRENCIES_WITH_NEW = new TreeSet<String>( 1309 Arrays.asList("ILS", "NZD", "PGK", "TWD")); 1310 1311 // ok since there is no problem with confusion 1312 private static final Set<String> OK_TO_NOT_HAVE_OLD = new TreeSet<String>( 1313 Arrays.asList("ADP", "ATS", "BEF", "CYP", "DEM", "ESP", "FIM", 1314 "FRF", "GRD", "IEP", "ITL", "LUF", "MTL", "MTP", "NLG", 1315 "PTE", "YUM", "ARA", "BAD", "BGL", "BOP", "BRC", "BRN", 1316 "BRR", "BUK", "CSK", "ECS", "GEK", "GNS", "GQE", "HRD", 1317 "ILP", "LTT", "LVR", "MGF", "MLF", "MZE", "NIC", "PEI", 1318 "PES", "SIT", "SRG", "SUR", "TJR", "TPE", "UAK", "YUD", 1319 "YUN", "ZRZ", "GWE")); 1320 1321 private static final Date LIMIT_FOR_NEW_CURRENCY = new Date( 1322 new Date().getYear() - 5, 1, 1); 1323 private static final Date NOW = new Date(); 1324 private Matcher oldMatcher = Pattern.compile( 1325 "\\bold\\b|\\([0-9]{4}-[0-9]{4}\\)", Pattern.CASE_INSENSITIVE) 1326 .matcher(""); 1327 private Matcher newMatcher = Pattern.compile("\\bnew\\b", 1328 Pattern.CASE_INSENSITIVE).matcher(""); 1329 1330 /** 1331 * Test that access to currency info in supplemental data is ok. At this 1332 * point just a simple test. 1333 * 1334 * @param args 1335 */ TestCurrency()1336 public void TestCurrency() { 1337 IsoCurrencyParser isoCodes = IsoCurrencyParser.getInstance(); 1338 Set<String> currencyCodes = STANDARD_CODES 1339 .getGoodAvailableCodes("currency"); 1340 Relation<String, Pair<String, CurrencyDateInfo>> nonModernCurrencyCodes = Relation 1341 .of(new TreeMap<String, Set<Pair<String, CurrencyDateInfo>>>(), 1342 TreeSet.class); 1343 Relation<String, Pair<String, CurrencyDateInfo>> modernCurrencyCodes = Relation 1344 .of(new TreeMap<String, Set<Pair<String, CurrencyDateInfo>>>(), 1345 TreeSet.class); 1346 Set<String> territoriesWithoutModernCurrencies = new TreeSet<String>( 1347 STANDARD_CODES.getGoodAvailableCodes("territory")); 1348 Map<String, Date> currencyFirstValid = new TreeMap<String, Date>(); 1349 Map<String, Date> currencyLastValid = new TreeMap<String, Date>(); 1350 territoriesWithoutModernCurrencies.remove("ZZ"); 1351 1352 for (String territory : STANDARD_CODES 1353 .getGoodAvailableCodes("territory")) { 1354 /* "EU" behaves like a country for purposes of this test */ 1355 if ((SUPPLEMENTAL.getContained(territory) != null) 1356 && !territory.equals("EU")) { 1357 territoriesWithoutModernCurrencies.remove(territory); 1358 continue; 1359 } 1360 Set<CurrencyDateInfo> currencyInfo = SUPPLEMENTAL 1361 .getCurrencyDateInfo(territory); 1362 if (currencyInfo == null) { 1363 continue; // error, but will pick up below. 1364 } 1365 for (CurrencyDateInfo dateInfo : currencyInfo) { 1366 final String currency = dateInfo.getCurrency(); 1367 final Date start = dateInfo.getStart(); 1368 final Date end = dateInfo.getEnd(); 1369 if (dateInfo.getErrors().length() != 0) { 1370 logln("parsing " + territory + "\t" + dateInfo.toString() 1371 + "\t" + dateInfo.getErrors()); 1372 } 1373 Date firstValue = currencyFirstValid.get(currency); 1374 if (firstValue == null || firstValue.compareTo(start) < 0) { 1375 currencyFirstValid.put(currency, start); 1376 } 1377 Date lastValue = currencyLastValid.get(currency); 1378 if (lastValue == null || lastValue.compareTo(end) > 0) { 1379 currencyLastValid.put(currency, end); 1380 } 1381 if (start.compareTo(NOW) < 0 && end.compareTo(NOW) >= 0) { // Non-tender 1382 // is 1383 // OK... 1384 modernCurrencyCodes.put(currency, 1385 new Pair<String, CurrencyDateInfo>(territory, 1386 dateInfo)); 1387 territoriesWithoutModernCurrencies.remove(territory); 1388 } else { 1389 nonModernCurrencyCodes.put(currency, 1390 new Pair<String, CurrencyDateInfo>(territory, 1391 dateInfo)); 1392 } 1393 logln(territory 1394 + "\t" 1395 + dateInfo.toString() 1396 + "\t" 1397 + testInfo.getEnglish().getName(CLDRFile.CURRENCY_NAME, 1398 currency)); 1399 } 1400 } 1401 // fix up 1402 nonModernCurrencyCodes.removeAll(modernCurrencyCodes.keySet()); 1403 Relation<String, String> isoCurrenciesToCountries = Relation.of( 1404 new TreeMap<String, Set<String>>(), TreeSet.class) 1405 .addAllInverted(isoCodes.getCountryToCodes()); 1406 // now print error messages 1407 logln("Modern Codes: " + modernCurrencyCodes.size() + "\t" 1408 + modernCurrencyCodes); 1409 Set<String> missing = new TreeSet<String>( 1410 isoCurrenciesToCountries.keySet()); 1411 missing.removeAll(modernCurrencyCodes.keySet()); 1412 if (missing.size() != 0) { 1413 errln("Missing codes compared to ISO: " + missing.toString()); 1414 } 1415 1416 for (String currency : modernCurrencyCodes.keySet()) { 1417 Set<Pair<String, CurrencyDateInfo>> data = modernCurrencyCodes 1418 .getAll(currency); 1419 final String name = testInfo.getEnglish().getName( 1420 CLDRFile.CURRENCY_NAME, currency); 1421 1422 Set<String> isoCountries = isoCurrenciesToCountries 1423 .getAll(currency); 1424 if (isoCountries == null) { 1425 isoCountries = new TreeSet<String>(); 1426 } 1427 1428 TreeSet<String> cldrCountries = new TreeSet<String>(); 1429 for (Pair<String, CurrencyDateInfo> x : data) { 1430 cldrCountries.add(x.getFirst()); 1431 } 1432 if (!isoCountries.equals(cldrCountries)) { 1433 if (!logKnownIssue("cldrbug:10765", "Missing codes compared to ISO: " + missing.toString())) { 1434 1435 errln("Mismatch between ISO and Cldr modern currencies for " 1436 + currency + "\tISO:" + isoCountries + "\tCLDR:" 1437 + cldrCountries); 1438 showCountries("iso-cldr", isoCountries, cldrCountries, missing); 1439 showCountries("cldr-iso", cldrCountries, isoCountries, missing); 1440 } 1441 } 1442 1443 if (oldMatcher.reset(name).find()) { 1444 errln("Has 'old' in name but still used " + "\t" + currency 1445 + "\t" + name + "\t" + data); 1446 } 1447 if (newMatcher.reset(name).find() 1448 && !EXCEPTION_CURRENCIES_WITH_NEW.contains(currency)) { 1449 // find the first use. If older than 5 years, flag as error 1450 if (currencyFirstValid.get(currency).compareTo( 1451 LIMIT_FOR_NEW_CURRENCY) < 0) { 1452 errln("Has 'new' in name but used since " 1453 + CurrencyDateInfo.formatDate(currencyFirstValid 1454 .get(currency)) 1455 + "\t" + currency + "\t" 1456 + name + "\t" + data); 1457 } else { 1458 logln("Has 'new' in name but used since " 1459 + CurrencyDateInfo.formatDate(currencyFirstValid 1460 .get(currency)) 1461 + "\t" + currency + "\t" 1462 + name + "\t" + data); 1463 } 1464 } 1465 } 1466 logln("Non-Modern Codes (with dates): " + nonModernCurrencyCodes.size() 1467 + "\t" + nonModernCurrencyCodes); 1468 for (String currency : nonModernCurrencyCodes.keySet()) { 1469 final String name = testInfo.getEnglish().getName( 1470 CLDRFile.CURRENCY_NAME, currency); 1471 if (newMatcher.reset(name).find() 1472 && !EXCEPTION_CURRENCIES_WITH_NEW.contains(currency)) { 1473 logln("Has 'new' in name but NOT used since " 1474 + CurrencyDateInfo.formatDate(currencyLastValid 1475 .get(currency)) 1476 + "\t" + currency + "\t" + name 1477 + "\t" + nonModernCurrencyCodes.getAll(currency)); 1478 } else if (!oldMatcher.reset(name).find() 1479 && !OK_TO_NOT_HAVE_OLD.contains(currency)) { 1480 logln("Doesn't have 'old' or date range in name but NOT used since " 1481 + CurrencyDateInfo.formatDate(currencyLastValid 1482 .get(currency)) 1483 + "\t" 1484 + currency 1485 + "\t" 1486 + name 1487 + "\t" + nonModernCurrencyCodes.getAll(currency)); 1488 for (Pair<String, CurrencyDateInfo> pair : nonModernCurrencyCodes 1489 .getAll(currency)) { 1490 final String territory = pair.getFirst(); 1491 Set<CurrencyDateInfo> currencyInfo = SUPPLEMENTAL 1492 .getCurrencyDateInfo(territory); 1493 for (CurrencyDateInfo dateInfo : currencyInfo) { 1494 if (dateInfo.getEnd().compareTo(NOW) < 0) { 1495 continue; 1496 } 1497 logln("\tCurrencies used instead: " 1498 + territory 1499 + "\t" 1500 + dateInfo 1501 + "\t" 1502 + testInfo.getEnglish().getName( 1503 CLDRFile.CURRENCY_NAME, 1504 dateInfo.getCurrency())); 1505 1506 } 1507 } 1508 1509 } 1510 } 1511 Set<String> remainder = new TreeSet<String>(); 1512 remainder.addAll(currencyCodes); 1513 remainder.removeAll(nonModernCurrencyCodes.keySet()); 1514 // TODO make this an error, except for allowed exceptions. 1515 logln("Currencies without Territories: " + remainder); 1516 if (territoriesWithoutModernCurrencies.size() != 0) { 1517 errln("Modern territory missing currency: " 1518 + territoriesWithoutModernCurrencies); 1519 } 1520 } 1521 showCountries(final String title, Set<String> isoCountries, Set<String> cldrCountries, Set<String> missing)1522 private void showCountries(final String title, Set<String> isoCountries, 1523 Set<String> cldrCountries, Set<String> missing) { 1524 missing.clear(); 1525 missing.addAll(isoCountries); 1526 missing.removeAll(cldrCountries); 1527 for (String country : missing) { 1528 logln("\t\tExtra in " + title + "\t" + country + " - " 1529 + getRegionName(country)); 1530 } 1531 } 1532 TestCurrencyDecimalPlaces()1533 public void TestCurrencyDecimalPlaces() { 1534 IsoCurrencyParser isoCodes = IsoCurrencyParser.getInstance(); 1535 Relation<String, IsoCurrencyParser.Data> codeList = isoCodes 1536 .getCodeList(); 1537 Set<String> currencyCodes = STANDARD_CODES 1538 .getGoodAvailableCodes("currency"); 1539 for (String cc : currencyCodes) { 1540 Set<IsoCurrencyParser.Data> d = codeList.get(cc); 1541 if (d != null) { 1542 for (IsoCurrencyParser.Data x : d) { 1543 CurrencyNumberInfo cni = SUPPLEMENTAL.getCurrencyNumberInfo(cc); 1544 if (cni.digits != x.getMinorUnit()) { 1545 logln("Mismatch between ISO/CLDR for decimal places for currency => " + cc + 1546 ". ISO = " + x.getMinorUnit() + " CLDR = " + cni.digits); 1547 } 1548 } 1549 } 1550 } 1551 } 1552 1553 /** 1554 * Verify that we have a default script for every CLDR base language 1555 */ TestDefaultScripts()1556 public void TestDefaultScripts() { 1557 SupplementalDataInfo supp = SUPPLEMENTAL; 1558 Map<String, String> likelyData = supp.getLikelySubtags(); 1559 Map<String, String> baseToDefaultContentScript = new HashMap<String, String>(); 1560 for (CLDRLocale locale : supp.getDefaultContentCLDRLocales()) { 1561 String script = locale.getScript(); 1562 if (!script.isEmpty() && locale.getCountry().isEmpty()) { 1563 baseToDefaultContentScript.put(locale.getLanguage(), script); 1564 } 1565 } 1566 for (String locale : testInfo.getCldrFactory().getAvailableLanguages()) { 1567 if ("root".equals(locale)) { 1568 continue; 1569 } 1570 CLDRLocale loc = CLDRLocale.getInstance(locale); 1571 String baseLanguage = loc.getLanguage(); 1572 String defaultScript = supp.getDefaultScript(baseLanguage); 1573 1574 String defaultContentScript = baseToDefaultContentScript 1575 .get(baseLanguage); 1576 if (defaultContentScript != null) { 1577 assertEquals(loc + " defaultContentScript = default", 1578 defaultScript, defaultContentScript); 1579 } 1580 String likely = likelyData.get(baseLanguage); 1581 String likelyScript = likely == null ? null : CLDRLocale 1582 .getInstance(likely).getScript(); 1583 Map<Type, BasicLanguageData> scriptInfo = supp 1584 .getBasicLanguageDataMap(baseLanguage); 1585 if (scriptInfo == null) { 1586 errln(loc + ": has no BasicLanguageData"); 1587 } else { 1588 BasicLanguageData data = scriptInfo.get(Type.primary); 1589 if (data == null) { 1590 data = scriptInfo.get(Type.secondary); 1591 } 1592 if (data == null) { 1593 errln(loc + ": has no scripts in BasicLanguageData"); 1594 } else if (!data.getScripts().contains(defaultScript)) { 1595 errln(loc + ": " + defaultScript 1596 + " not in BasicLanguageData " + data.getScripts()); 1597 } 1598 } 1599 1600 assertEquals(loc + " likely = default", defaultScript, likelyScript); 1601 1602 assertNotNull(loc + ": needs default script", defaultScript); 1603 1604 if (!loc.getScript().isEmpty()) { 1605 if (!loc.getScript().equals(defaultScript)) { 1606 assertNotEquals(locale 1607 + ": only include script if not default", 1608 loc.getScript(), defaultScript); 1609 } 1610 } 1611 1612 } 1613 } 1614 1615 enum CoverageIssue { 1616 log, warn, error 1617 } 1618 TestPluralCompleteness()1619 public void TestPluralCompleteness() { 1620 // Set<String> cardinalLocales = new 1621 // TreeSet<String>(SUPPLEMENTAL.getPluralLocales(PluralType.cardinal)); 1622 // Set<String> ordinalLocales = new 1623 // TreeSet<String>(SUPPLEMENTAL.getPluralLocales(PluralType.ordinal)); 1624 // Map<ULocale, PluralRulesFactory.SamplePatterns> sampleCardinals = 1625 // PluralRulesFactory.getLocaleToSamplePatterns(); 1626 // Set<ULocale> sampleCardinalLocales = PluralRulesFactory.getLocales(); 1627 // // new HashSet(PluralRulesFactory.getSampleCounts(uLocale, 1628 // type).keySet()); 1629 // Map<ULocale, PluralRules> overrideCardinals = 1630 // PluralRulesFactory.getPluralOverrides(); 1631 // Set<ULocale> overrideCardinalLocales = new 1632 // HashSet<ULocale>(overrideCardinals.keySet()); 1633 1634 Set<String> testLocales = STANDARD_CODES.getLocaleCoverageLocales( 1635 Organization.google, EnumSet.of(Level.MODERN)); 1636 Set<String> allLocales = testInfo.getCldrFactory().getAvailable(); 1637 LanguageTagParser ltp = new LanguageTagParser(); 1638 for (String locale : allLocales) { 1639 // the only known case where plural rules depend on region or script 1640 // is pt_PT 1641 if (locale.equals("root")) { 1642 continue; 1643 } 1644 ltp.set(locale); 1645 if (!ltp.getRegion().isEmpty() || !ltp.getScript().isEmpty()) { 1646 continue; 1647 } 1648 CoverageIssue needsCoverage = testLocales.contains(locale) 1649 ? CoverageIssue.error 1650 : CoverageIssue.log; 1651 CoverageIssue needsCoverage2 = needsCoverage == CoverageIssue.error ? CoverageIssue.warn : needsCoverage; 1652 1653 // if (logKnownIssue("Cldrbug:8809", "Missing plural rules/samples be and ga locales")) { 1654 // if (locale.equals("be") || locale.equals("ga")) { 1655 // needsCoverage = CoverageIssue.warn; 1656 // } 1657 // } 1658 PluralRulesFactory prf = PluralRulesFactory 1659 .getInstance(CLDRConfig.getInstance() 1660 .getSupplementalDataInfo()); 1661 1662 for (PluralType type : PluralType.values()) { 1663 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(type, locale, 1664 false); 1665 if (pluralInfo == null) { 1666 errOrLog(needsCoverage, locale + "\t" + type + " \tmissing plural rules", "Cldrbug:7839", "Missing plural data for modern locales"); 1667 continue; 1668 } 1669 Set<Count> counts = pluralInfo.getCounts(); 1670 // if (counts.size() == 1) { 1671 // continue; // skip checking samples 1672 // } 1673 HashSet<String> samples = new HashSet<String>(); 1674 EnumSet<Count> countsWithNoSamples = EnumSet 1675 .noneOf(Count.class); 1676 Relation<String, Count> samplesToCounts = Relation.of( 1677 new HashMap(), LinkedHashSet.class); 1678 Set<Count> countsFound = prf.getSampleCounts(locale, 1679 type.standardType); 1680 StringBuilder failureCases = new StringBuilder(); 1681 for (Count count : counts) { 1682 String pattern = prf.getSamplePattern(locale, type.standardType, count); 1683 final String rangeLine = getRangeLine(count, pluralInfo.getPluralRules(), pattern); 1684 failureCases.append('\n').append(locale).append('\t').append(type).append('\t').append(rangeLine); 1685 if (countsFound == null || !countsFound.contains(count)) { 1686 countsWithNoSamples.add(count); 1687 } else { 1688 samplesToCounts.put(pattern, count); 1689 logln(locale + "\t" + type + "\t" + count + "\t" 1690 + pattern); 1691 } 1692 } 1693 if (!countsWithNoSamples.isEmpty()) { 1694 errOrLog(needsCoverage, locale + "\t" + type + "\t missing samples:\t" + countsWithNoSamples, 1695 "cldrbug:7075", "Missing ordinal minimal pairs"); 1696 errOrLog(needsCoverage2, failureCases.toString()); 1697 } 1698 for (Entry<String, Set<Count>> entry : samplesToCounts 1699 .keyValuesSet()) { 1700 if (entry.getValue().size() != 1) { 1701 errOrLog(needsCoverage, locale + "\t" + type + "\t duplicate samples: " + entry.getValue() 1702 + " => «" + entry.getKey() + "»", "cldrbug:7119", "Some duplicate minimal pairs"); 1703 errOrLog(needsCoverage2, failureCases.toString()); 1704 } 1705 } 1706 } 1707 } 1708 } 1709 errOrLog(CoverageIssue causeError, String message, String logTicket, String logComment)1710 public void errOrLog(CoverageIssue causeError, String message, String logTicket, String logComment) { 1711 switch (causeError) { 1712 case error: 1713 if (logTicket == null) { 1714 errln(message); 1715 break; 1716 } 1717 logKnownIssue(logTicket, logComment); 1718 // fall through 1719 case warn: 1720 warnln(message); 1721 break; 1722 case log: 1723 logln(message); 1724 break; 1725 } 1726 } 1727 errOrLog(CoverageIssue causeError, String message)1728 public void errOrLog(CoverageIssue causeError, String message) { 1729 errOrLog(causeError, message, null, null); 1730 } 1731 TestNumberingSystemDigits()1732 public void TestNumberingSystemDigits() { 1733 1734 // Don't worry about digits from supplemental planes yet ( ICU can't 1735 // handle them anyways ) 1736 // hanidec is the only known non codepoint order numbering system 1737 // TODO: Fix so that it works properly on non-BMP digit strings. 1738 String[] knownExceptions = { "brah", "cakm", "hanidec", "osma", "shrd", 1739 "sora", "takr" }; 1740 List<String> knownExceptionList = Arrays.asList(knownExceptions); 1741 for (String ns : SUPPLEMENTAL.getNumericNumberingSystems()) { 1742 if (knownExceptionList.contains(ns)) { 1743 continue; 1744 } 1745 String digits = SUPPLEMENTAL.getDigits(ns); 1746 int previousChar = 0; 1747 int ch; 1748 1749 for (int i = 0; i < digits.length(); i += Character.charCount(ch)) { 1750 ch = digits.codePointAt(i); 1751 if (i > 0 && ch != previousChar + 1) { 1752 errln("Digits for numbering system " 1753 + ns 1754 + " are not in code point order. Previous char = U+" 1755 + Utility.hex(previousChar, 4) 1756 + " Current char = U+" + Utility.hex(ch, 4)); 1757 break; 1758 } 1759 previousChar = ch; 1760 } 1761 } 1762 } 1763 TestNumberingSystemDigitCompleteness()1764 public void TestNumberingSystemDigitCompleteness() { 1765 List<Integer> unicodeDigits = new ArrayList<Integer>(); 1766 for (int cp = UCharacter.MIN_CODE_POINT; cp <= UCharacter.MAX_CODE_POINT; cp++) { 1767 if (UCharacter.getType(cp) == UCharacterEnums.ECharacterCategory.DECIMAL_DIGIT_NUMBER) { 1768 unicodeDigits.add(Integer.valueOf(cp)); 1769 } 1770 } 1771 1772 for (String ns : SUPPLEMENTAL.getNumericNumberingSystems()) { 1773 String digits = SUPPLEMENTAL.getDigits(ns); 1774 int ch; 1775 1776 for (int i = 0; i < digits.length(); i += Character.charCount(ch)) { 1777 ch = digits.codePointAt(i); 1778 unicodeDigits.remove(Integer.valueOf(ch)); 1779 } 1780 } 1781 1782 if (unicodeDigits.size() > 0) { 1783 for (Integer i : unicodeDigits) { 1784 errln("Unicode digit: " + UCharacter.getName(i) + " is not in any numbering system. Script = " 1785 + UScript.getShortName(UScript.getScript(i))); 1786 } 1787 } 1788 } 1789 TestMetazones()1790 public void TestMetazones() { 1791 Date goalMin = new Date(70, 0, 1); 1792 Date goalMax = new Date(300, 0, 2); 1793 ImmutableSet<String> knownTZWithoutMetazone = ImmutableSet.of("America/Montreal", "Asia/Barnaul", "Asia/Tomsk", "Europe/Kirov"); 1794 for (String timezoneRaw : TimeZone.getAvailableIDs()) { 1795 String timezone = TimeZone.getCanonicalID(timezoneRaw); 1796 String region = TimeZone.getRegion(timezone); 1797 if (!timezone.equals(timezoneRaw) || "001".equals(region)) { 1798 continue; 1799 } 1800 if (knownTZWithoutMetazone.contains(timezone)) { 1801 continue; 1802 } 1803 final Set<MetaZoneRange> ranges = SUPPLEMENTAL 1804 .getMetaZoneRanges(timezone); 1805 1806 if (assertNotNull("metazones for " + timezone, ranges)) { 1807 long min = Long.MAX_VALUE; 1808 long max = Long.MIN_VALUE; 1809 for (MetaZoneRange range : ranges) { 1810 if (range.dateRange.from != DateRange.START_OF_TIME) { 1811 min = Math.min(min, range.dateRange.from); 1812 } 1813 if (range.dateRange.to != DateRange.END_OF_TIME) { 1814 max = Math.max(max, range.dateRange.to); 1815 } 1816 } 1817 assertRelation(timezone + " has metazone before 1970?", true, 1818 goalMin, LEQ, new Date(min)); 1819 assertRelation(timezone 1820 + " has metazone until way in the future?", true, 1821 goalMax, GEQ, new Date(max)); 1822 } 1823 } 1824 com.google.common.collect.Interners i; 1825 } 1826 Test9924()1827 public void Test9924() { 1828 PopulationData zhCNData = SUPPLEMENTAL.getLanguageAndTerritoryPopulationData(LOCALES_FIXED ? "zh" : "zh_Hans", "CN"); 1829 PopulationData yueCNData = SUPPLEMENTAL.getLanguageAndTerritoryPopulationData("yue_Hans", "CN"); 1830 assertTrue("yue*10 < zh", yueCNData.getPopulation() < zhCNData.getPopulation()); 1831 } 1832 Test10765()1833 public void Test10765() { // 1834 Set<String> surveyToolLanguages = SUPPLEMENTAL.getCLDRLanguageCodes(); // codes that show up in Survey Tool 1835 Set<String> mainLanguages = new TreeSet<>(); 1836 LanguageTagParser ltp = new LanguageTagParser(); 1837 for (String locale : testInfo.getCldrFactory().getAvailableLanguages()) { 1838 mainLanguages.add(ltp.set(locale).getLanguage()); 1839 } 1840 // add special codes we want to see anyway 1841 mainLanguages.add("und"); 1842 mainLanguages.add("mul"); 1843 mainLanguages.add("zxx"); 1844 1845 if (!mainLanguages.containsAll(surveyToolLanguages)) { 1846 CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(SUPPLEMENTAL, "ja"); // pick "neutral" locale 1847 Set<String> temp = new TreeSet<>(surveyToolLanguages); 1848 temp.removeAll(mainLanguages); 1849 Set<String> modern = new TreeSet<>(); 1850 Set<String> comprehensive = new TreeSet<>(); 1851 for (String lang : temp) { 1852 Level level = coverageLevel.getLevel(CLDRFile.getKey(CLDRFile.LANGUAGE_NAME, lang)); 1853 if (level.compareTo(Level.MODERN) <= 0) { 1854 modern.add(lang); 1855 } else { 1856 comprehensive.add(lang); 1857 } 1858 } 1859 warnln("«Modern» Languages in <variable id='$language' type='choice'> that aren't in main/* : " + getNames(modern)); 1860 logln("«Comprehensive» Languages in <variable id='$language' type='choice'> that aren't in main/* : " + getNames(comprehensive)); 1861 } 1862 if (!surveyToolLanguages.containsAll(mainLanguages)) { 1863 mainLanguages.removeAll(surveyToolLanguages); 1864 assertEquals("No main/* languages are missing from Survey Tool:language names (eg <variable id='$language' type='choice'>) ", 1865 Collections.EMPTY_SET, mainLanguages); 1866 } 1867 } 1868 getNames(Set<String> temp)1869 private Set<String> getNames(Set<String> temp) { 1870 Set<String> tempNames = new TreeSet<>(); 1871 for (String langCode : temp) { 1872 tempNames.add(testInfo.getEnglish().getName(CLDRFile.LANGUAGE_NAME, langCode) + " (" + langCode + ")"); 1873 } 1874 return tempNames; 1875 } 1876 } 1877