1 package org.unicode.cldr.unittest; 2 3 import java.util.ArrayList; 4 import java.util.Arrays; 5 import java.util.Collection; 6 import java.util.Collections; 7 import java.util.Date; 8 import java.util.EnumMap; 9 import java.util.EnumSet; 10 import java.util.HashMap; 11 import java.util.HashSet; 12 import java.util.Iterator; 13 import java.util.LinkedHashMap; 14 import java.util.LinkedHashSet; 15 import java.util.List; 16 import java.util.Locale; 17 import java.util.Map; 18 import java.util.Map.Entry; 19 import java.util.Set; 20 import java.util.TreeMap; 21 import java.util.TreeSet; 22 import java.util.logging.Logger; 23 import java.util.regex.Matcher; 24 import java.util.regex.Pattern; 25 26 import org.unicode.cldr.draft.ScriptMetadata; 27 import org.unicode.cldr.test.CoverageLevel2; 28 import org.unicode.cldr.tool.LikelySubtags; 29 import org.unicode.cldr.tool.PluralMinimalPairs; 30 import org.unicode.cldr.tool.PluralRulesFactory; 31 import org.unicode.cldr.util.Builder; 32 import org.unicode.cldr.util.CLDRConfig; 33 import org.unicode.cldr.util.CLDRFile; 34 import org.unicode.cldr.util.CLDRFile.WinningChoice; 35 import org.unicode.cldr.util.CLDRLocale; 36 import org.unicode.cldr.util.CldrUtility; 37 import org.unicode.cldr.util.GrammarInfo; 38 import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature; 39 import org.unicode.cldr.util.GrammarInfo.GrammaticalScope; 40 import org.unicode.cldr.util.GrammarInfo.GrammaticalTarget; 41 import org.unicode.cldr.util.Iso639Data; 42 import org.unicode.cldr.util.Iso639Data.Scope; 43 import org.unicode.cldr.util.IsoCurrencyParser; 44 import org.unicode.cldr.util.LanguageTagCanonicalizer; 45 import org.unicode.cldr.util.LanguageTagParser; 46 import org.unicode.cldr.util.Level; 47 import org.unicode.cldr.util.Organization; 48 import org.unicode.cldr.util.Pair; 49 import org.unicode.cldr.util.PluralRanges; 50 import org.unicode.cldr.util.PreferredAndAllowedHour; 51 import org.unicode.cldr.util.PreferredAndAllowedHour.HourStyle; 52 import org.unicode.cldr.util.StandardCodes; 53 import org.unicode.cldr.util.StandardCodes.CodeType; 54 import org.unicode.cldr.util.StandardCodes.LstrType; 55 import org.unicode.cldr.util.SupplementalDataInfo; 56 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData; 57 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type; 58 import org.unicode.cldr.util.SupplementalDataInfo.ContainmentStyle; 59 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo; 60 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyNumberInfo; 61 import org.unicode.cldr.util.SupplementalDataInfo.DateRange; 62 import org.unicode.cldr.util.SupplementalDataInfo.MetaZoneRange; 63 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus; 64 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 65 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 66 import org.unicode.cldr.util.SupplementalDataInfo.PluralType; 67 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; 68 import org.unicode.cldr.util.SupplementalDataInfo.SampleList; 69 import org.unicode.cldr.util.Validity; 70 import org.unicode.cldr.util.Validity.Status; 71 72 import com.google.common.base.Joiner; 73 import com.google.common.collect.ImmutableSet; 74 import com.google.common.collect.Multimap; 75 import com.google.common.collect.TreeMultimap; 76 import com.ibm.icu.impl.Relation; 77 import com.ibm.icu.impl.Row; 78 import com.ibm.icu.impl.Row.R2; 79 import com.ibm.icu.impl.Row.R3; 80 import com.ibm.icu.impl.Utility; 81 import com.ibm.icu.impl.number.DecimalQuantity; 82 import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD; 83 import com.ibm.icu.lang.UCharacter; 84 import com.ibm.icu.lang.UCharacterEnums; 85 import com.ibm.icu.lang.UScript; 86 import com.ibm.icu.text.PluralRules; 87 import com.ibm.icu.text.PluralRules.DecimalQuantitySamples; 88 import com.ibm.icu.text.PluralRules.DecimalQuantitySamplesRange; 89 import com.ibm.icu.text.PluralRules.SampleType; 90 import com.ibm.icu.text.StringTransform; 91 import com.ibm.icu.text.UnicodeSet; 92 import com.ibm.icu.util.Output; 93 import com.ibm.icu.util.TimeZone; 94 import com.ibm.icu.util.ULocale; 95 96 public class TestSupplementalInfo extends TestFmwkPlus { 97 static CLDRConfig testInfo = CLDRConfig.getInstance(); 98 99 private static final StandardCodes STANDARD_CODES = StandardCodes.make(); 100 101 private static final SupplementalDataInfo SUPPLEMENTAL = testInfo 102 .getSupplementalDataInfo(); 103 main(String[] args)104 public static void main(String[] args) { 105 new TestSupplementalInfo().run(args); 106 } 107 TestPluralSampleOrder()108 public void TestPluralSampleOrder() { 109 HashSet<PluralInfo> seen = new HashSet<>(); 110 for (String locale : SUPPLEMENTAL.getPluralLocales()) { 111 if (locale.equals("root")) { 112 continue; 113 } 114 PluralInfo pi = SUPPLEMENTAL.getPlurals(locale); 115 if (seen.contains(pi)) { 116 continue; 117 } 118 seen.add(pi); 119 for (SampleType s : SampleType.values()) { 120 for (Count c : pi.getCounts(s)) { 121 DecimalQuantitySamples sSamples = pi.getPluralRules() 122 .getDecimalSamples(c.toString(), s); 123 if (sSamples == null) { 124 errln(locale + " no sample for " + c); 125 continue; 126 } 127 if (s == SampleType.DECIMAL) { 128 continue; // skip 129 } 130 DecimalQuantitySamplesRange lastSample = null; 131 for (DecimalQuantitySamplesRange sample : sSamples.getSamples()) { 132 if (lastSample != null) { 133 if (compare(lastSample.start,sample.start) > 0) { 134 errln(locale + ":" + c + ": out of order with " 135 + lastSample + " > " + sample); 136 } else if (false) { 137 logln(locale + ":" + c + ": in order with " 138 + lastSample + " < " + sample); 139 } 140 } 141 lastSample = sample; 142 } 143 } 144 } 145 } 146 } 147 compare(DecimalQuantity me, DecimalQuantity other)148 public static int compare(DecimalQuantity me, DecimalQuantity other) { 149 // We place exponent notation samples entirely after ones without exponent 150 if (me.getExponent() != other.getExponent()) { 151 return me.getExponent() < other.getExponent() ? -1 : 1; 152 } 153 154 return (int) (me.toDouble() - other.toDouble()); 155 } 156 TestPluralRanges()157 public void TestPluralRanges() { 158 PluralRulesFactory prf = PluralRulesFactory.getInstance(SUPPLEMENTAL); 159 Set<String> localesToTest = new TreeSet<>( 160 SUPPLEMENTAL.getPluralRangesLocales()); 161 for (String locale : StandardCodes.make().getLocaleCoverageLocales( 162 "google")) { // superset 163 if (locale.equals("*") || locale.contains("_")) { 164 continue; 165 } 166 localesToTest.add(locale); 167 } 168 Set<String> modernLocales = StandardCodes.make() 169 .getLocaleCoverageLocales(Organization.cldr, 170 EnumSet.of(Level.MODERN)); 171 172 Output<DecimalQuantity> maxSample = new Output<>(); 173 Output<DecimalQuantity> minSample = new Output<>(); 174 175 for (String locale : localesToTest) { 176 final String templateLine = "Template for " + ULocale.getDisplayName(locale, "en") + " (" + locale + ") translators to fix:"; 177 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(locale); 178 Set<Count> counts = pluralInfo.getCounts(); 179 180 final PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(new ULocale(locale).toString()); 181 182 // check that there are no null values 183 PluralRanges pluralRanges = SUPPLEMENTAL.getPluralRanges(locale); 184 if (pluralRanges == null) { 185 if (!modernLocales.contains(locale)) { 186 logln("Missing plural ranges for " + locale); 187 } else { 188 errOrLog(CoverageIssue.error, locale + "\tMissing plural ranges", "Cldrbug:7839", "Missing plural data for modern locales"); 189 StringBuilder failureCases = new StringBuilder(templateLine); 190 for (Count start : counts) { 191 for (Count end : counts) { 192 pluralInfo.rangeExists(start, end, minSample, maxSample); 193 final String rangeLine = getRangeLine(start, end, null, maxSample, minSample, samplePatterns); 194 failureCases.append("\n" + locale + "\t" + rangeLine); 195 } 196 } 197 errOrLog(CoverageIssue.warn, failureCases.toString()); 198 } 199 continue; 200 } 201 EnumSet<Count> found = EnumSet.noneOf(Count.class); 202 for (Count count : Count.values()) { 203 if (pluralRanges.isExplicitlySet(count) 204 && !counts.contains(count)) { 205 assertTrue( 206 locale 207 + "\t pluralRanges categories must be valid for locale:\t" 208 + count + " must be in " + counts, 209 !pluralRanges.isExplicitlySet(count)); 210 } 211 for (Count end : Count.values()) { 212 Count result = pluralRanges.getExplicit(count, end); 213 if (result != null) { 214 found.add(result); 215 } 216 } 217 } 218 219 // check empty range results 220 if (found.isEmpty()) { 221 errOrLog(CoverageIssue.error, "Empty range results for " + locale, "Cldrbug:7839", "Missing plural data for modern locales"); 222 } else { 223 if (samplePatterns == null) { 224 errOrLog(CoverageIssue.error, locale + "\tMissing sample patterns", "Cldrbug:7839", "Missing plural data for modern locales"); 225 } else { 226 for (Count result : found) { 227 String samplePattern = samplePatterns.get( 228 PluralRules.PluralType.CARDINAL, result); 229 if (samplePattern != null && !samplePattern.contains("{0}")) { 230 errln("Plural Ranges cannot have results that don't use {0} in samples: " 231 + locale 232 + ", " 233 + result 234 + "\t«" 235 + samplePattern + "»"); 236 } 237 } 238 } 239 if (isVerbose()) { 240 logln("Range results for " + locale + ":\t" + found); 241 } 242 } 243 244 // check for missing values 245 boolean failure = false; 246 StringBuilder failureCases = new StringBuilder(templateLine); 247 for (Count start : counts) { 248 for (Count end : counts) { 249 boolean needsValue = pluralInfo.rangeExists(start, end, 250 minSample, maxSample); 251 Count explicitValue = pluralRanges.getExplicit(start, end); 252 final String rangeLine = getRangeLine(start, end, explicitValue, maxSample, minSample, samplePatterns); 253 failureCases.append("\n" + locale + "\t" + rangeLine); 254 if (needsValue && explicitValue == null) { 255 errOrLog(CoverageIssue.error, locale + "\tNo explicit value for range: " 256 + rangeLine, 257 "Cldrbug:7839", "Missing plural data for modern locales"); 258 failure = true; 259 failureCases.append("\tError — need explicit result"); 260 } else if (!needsValue && explicitValue != null) { 261 errOrLog(CoverageIssue.error, locale + "\tDoesn't need explicit value, but has one: " 262 + PluralRanges.showRange(start, end, explicitValue), 263 "Cldrbug:7839", "Missing plural data for modern locales"); 264 failureCases.append("\tUnnecessary"); 265 failure = true; 266 } else { 267 failureCases.append("\tOK"); 268 } 269 } 270 } 271 if (failure) { 272 errOrLog(CoverageIssue.warn, failureCases.toString()); 273 } 274 } 275 } 276 getRangeLine(Count start, Count end, Count result, Output<DecimalQuantity> maxSample, Output<DecimalQuantity> minSample, PluralMinimalPairs samplePatterns)277 private String getRangeLine(Count start, Count end, Count result, 278 Output<DecimalQuantity> maxSample, Output<DecimalQuantity> minSample, 279 PluralMinimalPairs samplePatterns) { 280 final String range = minSample + "–" + maxSample; 281 String example = range; 282 if (samplePatterns != null) { 283 example = ""; 284 if (result != null) { 285 String pat = samplePatterns.get(PluralRules.PluralType.CARDINAL, result); 286 example += "«" + (pat == null ? "MISSING-PATTERN" : pat.replace("{0}", range)) + "»"; 287 } else { 288 for (Count c : new TreeSet<>(Arrays.asList(start, end, Count.other))) { 289 String pat = samplePatterns.get(PluralRules.PluralType.CARDINAL, c); 290 example += c + ":«" + (pat == null ? "MISSING-PATTERN" : pat.replace("{0}", range)) + "»" + "?\tOR "; 291 } 292 example += " …"; 293 } 294 } 295 return start + "\t" + end + "\t" + (result == null ? "?" : result.toString()) + "\t" + example; 296 } 297 getRangeLine(Count count, PluralRules pluralRules, String pattern)298 private String getRangeLine(Count count, PluralRules pluralRules, String pattern) { 299 String sample = "?"; 300 DecimalQuantitySamples exampleList = pluralRules.getDecimalSamples(count.toString(), PluralRules.SampleType.INTEGER); 301 if (exampleList == null) { 302 exampleList = pluralRules.getDecimalSamples(count.toString(), PluralRules.SampleType.DECIMAL); 303 } 304 DecimalQuantity sampleDecimal = PluralInfo.getNonZeroSampleIfPossible(exampleList); 305 sample = sampleDecimal.toString(); 306 307 String example = pattern == null ? "NO-SAMPLE!" : "«" + pattern.replace("{0}", sample) + "»"; 308 return count + "\t" + example; 309 } 310 TestPluralSamples()311 public void TestPluralSamples() { 312 String[][] test = { { "en", "ordinal", "1", "one" }, 313 { "en", "ordinal", "2", "two" }, 314 { "en", "ordinal", "3", "few" }, 315 { "en", "ordinal", "4", "other" }, 316 { "sl", "cardinal", "2", "two" }, }; 317 for (String[] row : test) { 318 checkPluralSamples(row); 319 } 320 } 321 TestPluralSamples2()322 public void TestPluralSamples2() { 323 PluralRulesFactory prf = PluralRulesFactory.getInstance(SUPPLEMENTAL); 324 for (String locale : prf.getLocales()) { 325 if (locale.equals("und")) { 326 continue; 327 } 328 if (locale.equals("pl")) { 329 int debug = 0; 330 } 331 final PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(locale); 332 for (PluralRules.PluralType type : PluralRules.PluralType.values()) { 333 PluralInfo rules = SUPPLEMENTAL.getPlurals( 334 SupplementalDataInfo.PluralType.fromStandardType(type), 335 locale.toString()); 336 if (rules.getCounts().size() == 1) { 337 continue; // don't require rules for unary cases 338 } 339 Multimap<String, Count> sampleToCount = TreeMultimap.create(); 340 341 for (Count count : rules.getCounts()) { 342 String sample = samplePatterns.get(type, count); 343 if (sample == null) { 344 errOrLog(CoverageIssue.error, locale + "\t" + type + " \tmissing samples for " + count, "cldrbug:7075", 345 "Missing ordinal minimal pairs"); 346 } else { 347 sampleToCount.put(sample, count); 348 PluralRules pRules = rules.getPluralRules(); 349 double unique = pRules.getUniqueKeywordValue(count 350 .toString()); 351 if (unique == PluralRules.NO_UNIQUE_VALUE 352 && !sample.contains("{0}")) { 353 errln("Missing {0} in sample: " + locale + ", " + type + ", " + count + " «" + sample + "»"); 354 } 355 } 356 } 357 for (Entry<String, Collection<Count>> entry : sampleToCount.asMap().entrySet()) { 358 if (entry.getValue().size() > 1) { 359 errln("Colliding minimal pair samples: " + locale + ", " + type + ", " + entry.getValue() + " «" + entry.getKey() + "»"); 360 } 361 } 362 } 363 } 364 } 365 TestCldrScriptCodes()366 public void TestCldrScriptCodes() { 367 Set<String> codes = SUPPLEMENTAL.getCLDRScriptCodes(); 368 369 Set<String> unicodeScripts = ScriptMetadata.getScripts(); 370 assertRelation("getCLDRScriptCodes contains Unicode Scripts", true, codes, CONTAINS_ALL, unicodeScripts); 371 372 ImmutableSet<String> allSpecials = ImmutableSet.of("Zinh", "Zmth", "Zsye", "Zsym", "Zxxx", "Zyyy", "Zzzz"); 373 assertRelation("getCLDRScriptCodes contains allSpecials", true, codes, CONTAINS_ALL, allSpecials); 374 375 ImmutableSet<String> allCompos = ImmutableSet.of("Hanb", "Hrkt", "Jamo", "Jpan", "Kore"); 376 assertRelation("getCLDRScriptCodes contains allCompos", true, codes, CONTAINS_ALL, allCompos); 377 378 Map<Status, Set<String>> scripts = Validity.getInstance().getStatusToCodes(LstrType.script); 379 for (Entry<Status, Set<String>> e : scripts.entrySet()) { 380 switch (e.getKey()) { 381 case regular: 382 case special: 383 case unknown: 384 assertRelation("getCLDRScriptCodes contains " + e.getKey(), true, codes, CONTAINS_ALL, e.getValue()); 385 break; 386 default: 387 break; // do nothin 388 } 389 } 390 391 ImmutableSet<String> variants = ImmutableSet.of("Cyrs", "Geok", "Latf", "Latg", "Syre", "Syrj", "Syrn"); 392 assertRelation("getCLDRScriptCodes contains variants", false, codes, CONTAINS_SOME, variants); 393 } 394 checkPluralSamples(String... row)395 public void checkPluralSamples(String... row) { 396 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals( 397 PluralType.valueOf(row[1]), row[0]); 398 Count count = pluralInfo.getCount(DecimalQuantity_DualStorageBCD.fromExponentString(row[2])); 399 assertEquals(String.join(", ", row), 400 Count.valueOf(row[3]), count); 401 } 402 TestPluralLocales()403 public void TestPluralLocales() { 404 // get the unique rules 405 for (PluralType type : PluralType.values()) { 406 Relation<PluralInfo, String> pluralsToLocale = Relation.of( 407 new HashMap<PluralInfo, Set<String>>(), TreeSet.class); 408 for (String locale : new TreeSet<>( 409 SUPPLEMENTAL.getPluralLocales(type))) { 410 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(type, locale); 411 pluralsToLocale.put(pluralInfo, locale); 412 } 413 414 String[][] equivalents = { { "mo", "ro" }, { "tl", "fil" }, 415 { "he", "iw" }, { "in", "id" }, { "jw", "jv" }, 416 { "ji", "yi" }, { "sh", "sr" }, }; 417 for (Entry<PluralInfo, Set<String>> pluralInfoEntry : pluralsToLocale 418 .keyValuesSet()) { 419 PluralInfo pluralInfo2 = pluralInfoEntry.getKey(); 420 Set<String> locales = pluralInfoEntry.getValue(); 421 // check that equivalent locales are either both in or both out 422 for (String[] row : equivalents) { 423 assertEquals( 424 type + " must be equivalent: " + Arrays.asList(row), 425 locales.contains(row[0]), locales.contains(row[1])); 426 } 427 // check that no rules contain 'within' 428 for (Count count : pluralInfo2.getCounts()) { 429 String rule = pluralInfo2.getRule(count); 430 if (rule == null) { 431 continue; 432 } 433 assertFalse( 434 "Rule '" + rule + "' for " + Arrays.asList(locales) 435 + " doesn't contain 'within'", 436 rule.contains("within")); 437 } 438 } 439 } 440 } 441 TestDigitPluralCases()442 public void TestDigitPluralCases() { 443 String[][] tests = { 444 { "en", "one", "1", "1" }, 445 { "en", "one", "2", "" }, 446 { "en", "one", "3", "" }, 447 { "en", "one", "4", "" }, 448 { "en", "other", "1", "0, 2-9, 0.0, 0.1, 0.2, …" }, 449 { "en", "other", "2", "10-99, 10.0, 10.1, 10.2, …" }, 450 { "en", "other", "3", "100-999, 100.0, 100.1, 100.2, …" }, 451 { "en", "other", "4", "1000-9999, 1000.0, 1000.1, 1000.2, …" }, 452 { "hr", "one", "1", "1, 0.1, 2.10, 1.1, …" }, 453 { "hr", "one", "2", 454 "21, 31, 41, 51, 61, 71, …, 10.1, 12.10, 11.1, …" }, 455 { "hr", "one", "3", 456 "101, 121, 131, 141, 151, 161, …, 100.1, 102.10, 101.1, …" }, 457 { "hr", "one", "4", 458 "1001, 1021, 1031, 1041, 1051, 1061, …, 1000.1, 1002.10, 1001.1, …" }, 459 { "hr", "few", "1", "2-4, 0.2, 0.3, 0.4, …" }, 460 { "hr", "few", "2", 461 "22-24, 32-34, 42-44, …, 10.2, 10.3, 10.4, …" }, 462 { "hr", "few", "3", 463 "102-104, 122-124, 132-134, …, 100.2, 100.3, 100.4, …" }, 464 { "hr", "few", "4", 465 "1002-1004, 1022-1024, 1032-1034, …, 1000.2, 1000.3, 1000.4, …" }, 466 { "hr", "other", "1", "0, 5-9, 0.0, 0.5, 0.6, …" }, 467 { "hr", "other", "2", 468 "10-20, 25-30, 35-40, …, 10.0, 10.5, 10.6, …" }, 469 { "hr", "other", "3", 470 "100, 105-120, 125-130, 135-140, …, 100.0, 100.5, 100.6, …" }, 471 { "hr", "other", "4", 472 "1000, 1005-1020, 1025-1030, 1035-1040, …, 1000.0, 1000.5, 1000.6, …" }, }; 473 for (String[] row : tests) { 474 PluralInfo plurals = SUPPLEMENTAL.getPlurals(row[0]); 475 SampleList uset = plurals.getSamples9999(Count.valueOf(row[1]), 476 Integer.parseInt(row[2])); 477 assertEquals(row[0] + ", " + row[1] + ", " + row[2], row[3], 478 uset.toString()); 479 } 480 } 481 TestDigitPluralCompleteness()482 public void TestDigitPluralCompleteness() { 483 String[][] exceptionStrings = { 484 // defaults 485 { "*", "zero", "0,00,000,0000" }, 486 { "*", "one", "0" }, 487 { "*", "two", "0,00,000,0000" }, 488 { "*", "few", "0,00,000,0000" }, 489 { "*", "many", "0,00,000,0000" }, 490 { "*", "other", "0,00,000,0000" }, 491 // others 492 { "mo", "other", "00,000,0000" }, // 493 { "ro", "other", "00,000,0000" }, // 494 { "cs", "few", "0" }, // j in 2..4 495 { "sk", "few", "0" }, // j in 2..4 496 { "da", "one", "0" }, // j is 1 or t is not 0 and n within 0..2 497 { "is", "one", "0,00,000,0000" }, // j is 1 or f is 1 498 { "sv", "one", "0" }, // j is 1 499 { "he", "two", "0" }, // j is 2 500 { "ru", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100 501 // is not 11 502 { "uk", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100 503 // is not 11 504 { "bs", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100 505 // is not 11 or f mod 10 is 506 // 1 and f mod 100 is not 11 507 { "hr", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100 508 // is not 11 or f mod 10 is 509 // 1 and f mod 100 is not 11 510 { "sh", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100 511 // is not 11 or f mod 10 is 512 // 1 and f mod 100 is not 11 513 { "sr", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100 514 // is not 11 or f mod 10 is 515 // 1 and f mod 100 is not 11 516 { "mk", "one", "0,00,000,0000" }, // j mod 10 is 1 or f mod 10 517 // is 1 518 { "sl", "one", "0,000,0000" }, // j mod 100 is 1 519 { "sl", "two", "0,000,0000" }, // j mod 100 is 2 520 { "he", "many", "00,000,0000" }, // j not in 0..10 and j mod 10 521 // is 0 522 { "tzm", "one", "0,00" }, // n in 0..1 or n in 11..99 523 { "gd", "one", "0,00" }, // n in 1,11 524 { "gd", "two", "0,00" }, // n in 2,12 525 { "shi", "few", "0,00" }, // n in 2..10 526 { "gd", "few", "0,00" }, // n in 3..10,13..19 527 { "ga", "few", "0" }, // n in 3..6 528 { "ga", "many", "0,00" }, // n in 7..10 529 { "ar", "zero", "0" }, // n is 0 530 { "cy", "zero", "0" }, // n is 0 531 { "ksh", "zero", "0" }, // n is 0 532 { "lag", "zero", "0" }, // n is 0 533 { "pt", "one", "0" }, // i = 1 and v = 0 or i = 0 and t = 1 534 { "pt_PT", "one", "0" }, // n = 1 and v = 0 535 { "ar", "two", "0" }, // n is 2 536 { "cy", "two", "0" }, // n is 2 537 { "ga", "two", "0" }, // n is 2 538 { "iu", "two", "0" }, // n is 2 539 { "naq", "two", "0" }, // n is 2 540 { "se", "two", "0" }, // n is 2 541 { "sma", "two", "0" }, // n is 2 542 { "smi", "two", "0" }, // n is 2 543 { "smj", "two", "0" }, // n is 2 544 { "smn", "two", "0" }, // n is 2 545 { "sms", "two", "0" }, // n is 2 546 { "cy", "few", "0" }, // n is 3 547 { "cy", "many", "0" }, // n is 6 548 { "br", "many", "" }, // n is not 0 and n mod 1000000 is 0 549 { "gv", "one", "0,00,000,0000" }, // n mod 10 is 1 550 { "be", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100 551 // is not 11 552 { "lv", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100 553 // is not 11 or v is 2 and f 554 // mod 10 is 1 and f mod 100 555 // is not 11 or v is not 2 556 // and f mod 10 is 1 557 { "br", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100 558 // not in 11,71,91 559 { "lt", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100 560 // not in 11..19 561 { "fil", "one", "0,00,000,0000" }, // v = 0 and i = 1,2,3 or v = 562 // 0 and i % 10 != 4,6,9 or 563 // v != 0 and f % 10 != 564 // 4,6,9 565 { "tl", "one", "0,00,000,0000" }, // v = 0 and i = 1,2,3 or v = 566 // 0 and i % 10 != 4,6,9 or 567 // v != 0 and f % 10 != 568 // 4,6,9 569 { "dsb", "one", "0,00,000,0000" }, // v = 0 and i % 100 = 1 or f 570 // % 100 = 1 571 {"kw", "many", "00,000,0000"}, // n != 1 and n % 100 = 1,21,41,61,81 572 {"kw", "zero", "0"}, // n = 0 573 {"mt", "two", "0"}, 574 {"fr", "many", ""}, // e is special 575 {"ca", "many", ""}, // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5 576 {"es", "many", ""}, // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5 577 {"it", "many", ""}, // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5 578 {"pt", "many", ""}, // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5 579 {"pt_PT", "many", ""}, // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5 580 }; 581 // parse out the exceptions 582 Map<PluralInfo, Relation<Count, Integer>> exceptions = new HashMap<>(); 583 Relation<Count, Integer> fallback = Relation.of( 584 new EnumMap<Count, Set<Integer>>(Count.class), TreeSet.class); 585 for (String[] row : exceptionStrings) { 586 Relation<Count, Integer> countToDigits; 587 if (row[0].equals("*")) { 588 countToDigits = fallback; 589 } else { 590 PluralInfo plurals = SUPPLEMENTAL.getPlurals(row[0]); 591 countToDigits = exceptions.get(plurals); 592 if (countToDigits == null) { 593 exceptions.put( 594 plurals, 595 countToDigits = Relation.of( 596 new EnumMap<Count, Set<Integer>>( 597 Count.class), 598 TreeSet.class)); 599 } 600 } 601 Count c = Count.valueOf(row[1]); 602 for (String digit : row[2].split(",")) { 603 // "99" is special, just to have the result be non-empty 604 countToDigits.put(c, digit.length()); 605 } 606 } 607 Set<PluralInfo> seen = new HashSet<>(); 608 Set<String> sorted = new TreeSet<>( 609 SUPPLEMENTAL.getPluralLocales(PluralType.cardinal)); 610 Relation<String, String> ruleToExceptions = Relation.of( 611 new TreeMap<String, Set<String>>(), TreeSet.class); 612 613 for (String locale : sorted) { 614 PluralInfo plurals = SUPPLEMENTAL.getPlurals(locale); 615 if (seen.contains(plurals)) { // skip identicals 616 continue; 617 } 618 Relation<Count, Integer> countToDigits = exceptions.get(plurals); 619 if (countToDigits == null) { 620 countToDigits = fallback; 621 } 622 for (Count c : plurals.getCounts()) { 623 List<String> compose = new ArrayList<>(); 624 boolean needLine = false; 625 Set<Integer> digitSet = countToDigits.get(c); 626 if (digitSet == null) { 627 digitSet = fallback.get(c); 628 } 629 for (int digits = 1; digits < 5; ++digits) { 630 boolean expected = digitSet.contains(digits); 631 boolean hasSamples = plurals.hasSamples(c, digits); 632 if (hasSamples) { 633 compose.add(Utility.repeat("0", digits)); 634 } 635 if (!assertEquals(locale + ", " + digits + ", " + c, 636 expected, hasSamples)) { 637 needLine = true; 638 } 639 } 640 if (needLine) { 641 String countRules = plurals.getPluralRules().getRules( 642 c.toString()); 643 ruleToExceptions.put(countRules == null ? "" : countRules, 644 "{\"" + locale + "\", \"" + c + "\", \"" 645 + Joiner.on(",").join(compose) 646 + "\"},"); 647 } 648 } 649 } 650 if (!ruleToExceptions.isEmpty()) { 651 System.out 652 .println("To fix the above, review the following, then replace in TestDigitPluralCompleteness"); 653 for (Entry<String, String> entry : ruleToExceptions.entrySet()) { 654 System.out.println(entry.getValue() + "\t// " + entry.getKey()); 655 } 656 } 657 } 658 TestLikelyCode()659 public void TestLikelyCode() { 660 Map<String, String> likely = SUPPLEMENTAL.getLikelySubtags(); 661 String[][] tests = { { "it_AQ", "it_Latn_AQ" }, 662 { "it_Arab", "it_Arab_IT" }, { "az_Cyrl", "az_Cyrl_AZ" }, }; 663 for (String[] pair : tests) { 664 String newMax = LikelySubtags.maximize(pair[0], likely); 665 assertEquals("Likely", pair[1], newMax); 666 } 667 668 } 669 TestLikelySubtagCompleteness()670 public void TestLikelySubtagCompleteness() { 671 Map<String, String> likely = SUPPLEMENTAL.getLikelySubtags(); 672 673 for (String language : SUPPLEMENTAL.getCLDRLanguageCodes()) { 674 if (!likely.containsKey(language)) { 675 logln("WARNING: No likely subtag for CLDR language code (" 676 + language + ")"); 677 } 678 } 679 for (String script : SUPPLEMENTAL.getCLDRScriptCodes()) { 680 if (!likely.containsKey("und_" + script) 681 && !script.equals("Latn") 682 && !script.equals("Zinh") 683 && !script.equals("Zyyy") 684 && ScriptMetadata.getInfo(script) != null 685 && ScriptMetadata.getInfo(script).idUsage != ScriptMetadata.IdUsage.EXCLUSION 686 && ScriptMetadata.getInfo(script).idUsage != ScriptMetadata.IdUsage.UNKNOWN) { 687 errln("No likely subtag for CLDR script code (und_" + script 688 + ")"); 689 } 690 } 691 692 } 693 TestEquivalentLocales()694 public void TestEquivalentLocales() { 695 Set<Set<String>> seen = new HashSet<>(); 696 Set<String> toTest = new TreeSet<>(testInfo.getCldrFactory() 697 .getAvailable()); 698 toTest.addAll(SUPPLEMENTAL.getLikelySubtags().keySet()); 699 toTest.addAll(SUPPLEMENTAL.getLikelySubtags().values()); 700 toTest.addAll(SUPPLEMENTAL.getDefaultContentLocales()); 701 LanguageTagParser ltp = new LanguageTagParser(); 702 main: for (String locale : toTest) { 703 if (locale.startsWith("und") || locale.equals("root")) { 704 continue; 705 } 706 Set<String> s = SUPPLEMENTAL.getEquivalentsForLocale(locale); 707 if (seen.contains(s)) { 708 continue; 709 } 710 711 List<String> ss = new ArrayList<>(s); 712 String last = ss.get(ss.size() - 1); 713 ltp.set(last); 714 if (!ltp.getVariants().isEmpty() || !ltp.getExtensions().isEmpty()) { 715 continue; // skip variants for now. 716 } 717 String language = ltp.getLanguage(); 718 String script = ltp.getScript(); 719 String region = ltp.getRegion(); 720 if (!script.isEmpty() && !region.isEmpty()) { 721 String noScript = ltp.setScript("").toString(); 722 String noRegion = ltp.setScript(script).setRegion("") 723 .toString(); 724 switch (s.size()) { 725 case 1: // ok if already maximized and strange script/country, 726 // eg it_Arab_JA 727 continue main; 728 case 2: // ok if adds default country/script, eg {en_Cyrl, 729 // en_Cyrl_US} or {en_GB, en_Latn_GB} 730 String first = ss.get(0); 731 if (first.equals(noScript) || first.equals(noRegion)) { 732 continue main; 733 } 734 break; 735 case 3: // ok if different script in different country, eg 736 // {az_IR, az_Arab, az_Arab_IR} 737 if (noScript.equals(ss.get(0)) 738 && noRegion.equals(ss.get(1))) { 739 continue main; 740 } 741 break; 742 case 4: // ok if all combinations, eg {en, en_US, en_Latn, 743 // en_Latn_US} 744 if (language.equals(ss.get(0)) 745 && noScript.equals(ss.get(1)) 746 && noRegion.equals(ss.get(2))) { 747 continue main; 748 } 749 break; 750 } 751 } 752 errln("Strange size or composition:\t" + s + " \t" 753 + showLocaleParts(s)); 754 seen.add(s); 755 } 756 } 757 showLocaleParts(Set<String> s)758 private String showLocaleParts(Set<String> s) { 759 LanguageTagParser ltp = new LanguageTagParser(); 760 Set<String> b = new LinkedHashSet<>(); 761 for (String ss : s) { 762 ltp.set(ss); 763 addName(CLDRFile.LANGUAGE_NAME, ltp.getLanguage(), b); 764 addName(CLDRFile.SCRIPT_NAME, ltp.getScript(), b); 765 addName(CLDRFile.TERRITORY_NAME, ltp.getRegion(), b); 766 } 767 return Joiner.on("; ").join(b); 768 } 769 addName(int languageName, String code, Set<String> b)770 private void addName(int languageName, String code, Set<String> b) { 771 if (code.isEmpty()) { 772 return; 773 } 774 String name = testInfo.getEnglish().getName(languageName, code); 775 if (!code.equals(name)) { 776 b.add(code + "=" + name); 777 } 778 } 779 TestDefaultScriptCompleteness()780 public void TestDefaultScriptCompleteness() { 781 Relation<String, String> scriptToBase = Relation.of( 782 new LinkedHashMap<String, Set<String>>(), TreeSet.class); 783 main: for (String locale : testInfo.getCldrFactory() 784 .getAvailableLanguages()) { 785 if (!locale.contains("_") && !"root".equals(locale)) { 786 String defaultScript = SUPPLEMENTAL.getDefaultScript(locale); 787 if (defaultScript != null) { 788 continue; 789 } 790 CLDRFile cldrFile = testInfo.getCLDRFile(locale, 791 false); 792 UnicodeSet set = cldrFile.getExemplarSet("", 793 WinningChoice.NORMAL); 794 for (String s : set) { 795 int script = UScript.getScript(s.codePointAt(0)); 796 if (script != UScript.UNKNOWN && script != UScript.COMMON 797 && script != UScript.INHERITED) { 798 scriptToBase.put(UScript.getShortName(script), locale); 799 continue main; 800 } 801 } 802 scriptToBase.put(UScript.getShortName(UScript.UNKNOWN), locale); 803 } 804 } 805 if (scriptToBase.size() != 0) { 806 for (Entry<String, Set<String>> entry : scriptToBase.keyValuesSet()) { 807 errln("Default Scripts missing:\t" + entry.getKey() + "\t" 808 + entry.getValue()); 809 } 810 } 811 } 812 TestTimeData()813 public void TestTimeData() { 814 Map<String, PreferredAndAllowedHour> timeData = SUPPLEMENTAL 815 .getTimeData(); 816 Set<String> regionsSoFar = new HashSet<>(); 817 Set<String> current24only = new HashSet<>(); 818 Set<String> current12preferred = new HashSet<>(); 819 820 boolean haveWorld = false; 821 822 ImmutableSet<HourStyle> oldSchool = ImmutableSet.copyOf(EnumSet.of(HourStyle.H, HourStyle.h, HourStyle.K, HourStyle.k)); 823 824 for (Entry<String, PreferredAndAllowedHour> e : timeData.entrySet()) { 825 String region = e.getKey(); 826 if (region.equals("001")) { 827 haveWorld = true; 828 } 829 regionsSoFar.add(region); 830 PreferredAndAllowedHour preferredAndAllowedHour = e.getValue(); 831 assertNotNull("Preferred must not be null", preferredAndAllowedHour.preferred); 832 833 // find first h or H 834 HourStyle found = null; 835 836 for (HourStyle item : preferredAndAllowedHour.allowed) { 837 if (oldSchool.contains(item)) { 838 found = item; 839 if (item != preferredAndAllowedHour.preferred) { 840 String message = "Inconsistent values for " + region + ": preferred=" + preferredAndAllowedHour.preferred 841 + " but that isn't the first " + oldSchool + " in allowed: " + preferredAndAllowedHour.allowed; 842 //if (!logKnownIssue("cldrbug:11448", message)) { 843 errln(message); 844 //} 845 } 846 break; 847 } 848 } 849 if (found == null) { 850 errln(region + ": preferred " + preferredAndAllowedHour.preferred 851 + " not in " + preferredAndAllowedHour.allowed); 852 } 853 // final HourStyle firstAllowed = preferredAndAllowedHour.allowed.iterator().next(); 854 // if (preferredAndAllowedHour.preferred == HourStyle.H && firstAllowed == HourStyle.h 855 // || preferredAndAllowedHour.preferred == HourStyle.H && firstAllowed == HourStyle.hb 856 // || preferredAndAllowedHour.preferred == HourStyle.h && firstAllowed == HourStyle.H) { 857 // errln(region + ": allowed " + preferredAndAllowedHour.allowed 858 // + " starts with preferred " + preferredAndAllowedHour.preferred); 859 // } else if (isVerbose()) { 860 // logln(region + ": allowed " + preferredAndAllowedHour.allowed 861 // + " starts with preferred " + preferredAndAllowedHour.preferred); 862 // } 863 // for (HourStyle c : preferredAndAllowedHour.allowed) { 864 // if (!PreferredAndAllowedHour.HOURS.contains(c)) { 865 // errln(region + ": illegal character in " + 866 // preferredAndAllowedHour.allowed + ". It contains " + c 867 // + " which is not in " + PreferredAndAllowedHour.HOURS); 868 // } 869 // } 870 if (!preferredAndAllowedHour.allowed.contains(HourStyle.h) 871 && !preferredAndAllowedHour.allowed.contains(HourStyle.hb)) { 872 current24only.add(region); 873 } 874 if (preferredAndAllowedHour.preferred == HourStyle.h) { 875 current12preferred.add(region); 876 } 877 } 878 Set<String> missing = new TreeSet<>( 879 STANDARD_CODES.getGoodAvailableCodes(CodeType.territory)); 880 missing.removeAll(regionsSoFar); 881 for (Iterator<String> it = missing.iterator(); it.hasNext();) { 882 if (!StandardCodes.isCountry(it.next())) { 883 it.remove(); 884 } 885 } 886 887 // if we don't have 001, then we can't miss any regions 888 if (!missing.isEmpty()) { 889 if (haveWorld) { 890 logln("Implicit regions: " + missing); 891 } else { 892 errln("Missing regions: " + missing); 893 } 894 } 895 896 // The feedback gathered from our translators is that the following use 897 // 24 hour time ONLY: 898 Set<String> only24lang = new TreeSet<>( 899 Arrays.asList(("sq, br, bu, ca, hr, cs, da, de, nl, et, eu, fi, " 900 + "fr, gl, he, is, id, it, no, pt, ro, ru, sr, sk, sl, sv, tr, hy") 901 .split(",\\s*"))); 902 // With the new preferences, this is changed 903 Set<String> only24region = new TreeSet<>(); 904 Set<String> either24or12region = new TreeSet<>(); 905 906 // get all countries where official or de-facto official 907 // add them two one of two lists, based on the above list of languages 908 for (String language : SUPPLEMENTAL 909 .getLanguagesForTerritoriesPopulationData()) { 910 boolean a24lang = only24lang.contains(language); 911 for (String region : SUPPLEMENTAL 912 .getTerritoriesForPopulationData(language)) { 913 PopulationData pop = SUPPLEMENTAL 914 .getLanguageAndTerritoryPopulationData(language, region); 915 if (pop.getOfficialStatus().compareTo( 916 OfficialStatus.de_facto_official) < 0) { 917 continue; 918 } 919 if (a24lang) { 920 only24region.add(region); 921 } else { 922 either24or12region.add(region); 923 } 924 } 925 } 926 // if we have a case like CA, where en uses 12/24 but fr uses 24, remove 927 // it for safety 928 only24region.removeAll(either24or12region); 929 // There are always exceptions... Remove SM (San Marino) and VA (Vatican), 930 // since they allows 12/24 but the de facto langauge is Italian. 931 only24region.remove("SM"); 932 only24region.remove("VA"); 933 // also remove all the regions where 'h' is preferred 934 only24region.removeAll(current12preferred); 935 // now verify 936 if (!current24only.containsAll(only24region)) { 937 Set<String> missing24only = new TreeSet<>(only24region); 938 missing24only.removeAll(current24only); 939 940 errln("24-hour-only doesn't include needed items:\n" 941 + " add " 942 + CldrUtility.join(missing24only, " ") 943 + "\n\t\t" 944 + CldrUtility.join(missing24only, "\n\t\t", 945 new NameCodeTransform(testInfo.getEnglish(), 946 CLDRFile.TERRITORY_NAME))); 947 } 948 } 949 950 public static class NameCodeTransform implements StringTransform { 951 private final CLDRFile file; 952 private final int codeType; 953 NameCodeTransform(CLDRFile file, int code)954 public NameCodeTransform(CLDRFile file, int code) { 955 this.file = file; 956 this.codeType = code; 957 } 958 959 @Override transform(String code)960 public String transform(String code) { 961 return file.getName(codeType, code) + " [" + code + "]"; 962 } 963 } 964 TestAliases()965 public void TestAliases() { 966 StandardCodes.make(); 967 Map<String, Map<String, Map<String, String>>> bcp47Data = StandardCodes 968 .getLStreg(); 969 Map<String, Map<String, R2<List<String>, String>>> aliases = SUPPLEMENTAL 970 .getLocaleAliasInfo(); 971 972 for (Entry<String, Map<String, R2<List<String>, String>>> typeMap : aliases 973 .entrySet()) { 974 String type = typeMap.getKey(); 975 Map<String, R2<List<String>, String>> codeReplacement = typeMap 976 .getValue(); 977 978 Map<String, Map<String, String>> bcp47DataTypeData = bcp47Data 979 .get(type.equals("territory") ? "region" : type); 980 if (bcp47DataTypeData == null) { 981 logln("skipping BCP47 test for " + type); 982 } else { 983 for (Entry<String, Map<String, String>> codeData : bcp47DataTypeData 984 .entrySet()) { 985 String code = codeData.getKey(); 986 if (codeReplacement.containsKey(code) 987 || codeReplacement.containsKey(code 988 .toUpperCase(Locale.ENGLISH))) { 989 continue; 990 // TODO, check the value 991 } 992 Map<String, String> data = codeData.getValue(); 993 if (data.containsKey("Deprecated") 994 && SUPPLEMENTAL.getCLDRLanguageCodes().contains( 995 code)) { 996 errln("supplementalMetadata.xml: alias is missing <languageAlias type=\"" 997 + code + "\" ... /> " + "\t" + data); 998 } 999 } 1000 } 1001 1002 Set<R3<String, List<String>, List<String>>> failures = new LinkedHashSet<>(); 1003 Set<String> nullReplacements = new TreeSet<>(); 1004 for (Entry<String, R2<List<String>, String>> codeRep : codeReplacement 1005 .entrySet()) { 1006 String code = codeRep.getKey(); 1007 List<String> replacements = codeRep.getValue().get0(); 1008 if (replacements == null) { 1009 nullReplacements.add(code); 1010 continue; 1011 } 1012 Set<String> fixedReplacements = new LinkedHashSet<>(); 1013 for (String replacement : replacements) { 1014 R2<List<String>, String> newReplacement = codeReplacement 1015 .get(replacement); 1016 if (newReplacement != null) { 1017 List<String> list = newReplacement.get0(); 1018 if (list != null) { 1019 fixedReplacements.addAll(list); 1020 } 1021 } else { 1022 fixedReplacements.add(replacement); 1023 } 1024 } 1025 List<String> fixedList = new ArrayList<>( 1026 fixedReplacements); 1027 if (!replacements.equals(fixedList)) { 1028 R3<String, List<String>, List<String>> row = Row.of(code, 1029 replacements, fixedList); 1030 System.out.println(row.toString()); 1031 failures.add(row); 1032 } 1033 } 1034 1035 if (failures.size() != 0) { 1036 for (R3<String, List<String>, List<String>> item : failures) { 1037 String code = item.get0(); 1038 List<String> oldReplacement = item.get1(); 1039 List<String> newReplacement = item.get2(); 1040 1041 errln(code + "\t=>\t" + oldReplacement + "\tshould be:\n\t" 1042 + "<" + type + "Alias type=\"" + code 1043 + "\" replacement=\"" 1044 + Joiner.on(" ").join(newReplacement) 1045 + "\" reason=\"XXX\"/> <!-- YYY -->\n"); 1046 } 1047 } 1048 if (nullReplacements.size() != 0) { 1049 logln("No Replacements\t" + type + "\t" + nullReplacements); 1050 } 1051 } 1052 } 1053 1054 static final List<String> oldRegions = Arrays 1055 .asList("NT, YD, QU, SU, DD, FX, ZR, AN, BU, TP, CS, YU" 1056 .split(", ")); 1057 TestTerritoryContainment()1058 public void TestTerritoryContainment() { 1059 Relation<String, String> map = SUPPLEMENTAL 1060 .getTerritoryToContained(ContainmentStyle.all); 1061 Relation<String, String> mapCore = SUPPLEMENTAL.getContainmentCore(); 1062 Set<String> mapItems = new LinkedHashSet<>(); 1063 // get all the items 1064 for (String item : map.keySet()) { 1065 mapItems.add(item); 1066 mapItems.addAll(map.getAll(item)); 1067 } 1068 Map<String, Map<String, String>> bcp47RegionData = StandardCodes 1069 .getLStreg().get("region"); 1070 1071 // verify that all regions are covered 1072 Set<String> bcp47Regions = new LinkedHashSet<>( 1073 bcp47RegionData.keySet()); 1074 bcp47Regions.remove("ZZ"); // We don't care about ZZ since it is the 1075 // unknown region... 1076 for (Iterator<String> it = bcp47Regions.iterator(); it.hasNext();) { 1077 String region = it.next(); 1078 Map<String, String> data = bcp47RegionData.get(region); 1079 if (data.containsKey("Deprecated")) { 1080 logln("Removing deprecated " + region); 1081 it.remove(); 1082 } 1083 if ("Private use".equals(data.get("Description"))) { 1084 it.remove(); 1085 } 1086 } 1087 1088 if (!mapItems.equals(bcp47Regions)) { 1089 mapItems.removeAll(oldRegions); 1090 errlnDiff("containment items not in bcp47 regions: ", mapItems, 1091 bcp47Regions); 1092 errlnDiff("bcp47 regions not in containment items: ", bcp47Regions, 1093 mapItems); 1094 } 1095 1096 // verify that everything in the containment core can be reached 1097 // downwards from 001. 1098 1099 Map<String, Integer> from001 = getRecursiveContainment("001", map, 1100 new LinkedHashMap<String, Integer>(), 1); 1101 from001.put("001", 0); 1102 Set<String> keySet = from001.keySet(); 1103 for (String region : keySet) { 1104 logln(Utility.repeat("\t", from001.get(region)) + "\t" + region 1105 + "\t" + getRegionName(region)); 1106 } 1107 1108 // Populate mapItems with the core containment 1109 mapItems.clear(); 1110 for (String item : mapCore.keySet()) { 1111 mapItems.add(item); 1112 mapItems.addAll(mapCore.getAll(item)); 1113 } 1114 1115 if (!mapItems.equals(keySet)) { 1116 errlnDiff( 1117 "containment core items that can't be reached from 001: ", 1118 mapItems, keySet); 1119 } 1120 } 1121 errlnDiff(String title, Set<String> mapItems, Set<String> keySet)1122 private void errlnDiff(String title, Set<String> mapItems, 1123 Set<String> keySet) { 1124 Set<String> diff = new LinkedHashSet<>(mapItems); 1125 diff.removeAll(keySet); 1126 if (diff.size() != 0) { 1127 errln(title + diff); 1128 } 1129 } 1130 getRegionName(String region)1131 private String getRegionName(String region) { 1132 return testInfo.getEnglish().getName(CLDRFile.TERRITORY_NAME, region); 1133 } 1134 getRecursiveContainment(String region, Relation<String, String> map, Map<String, Integer> result, int depth)1135 private Map<String, Integer> getRecursiveContainment(String region, 1136 Relation<String, String> map, Map<String, Integer> result, int depth) { 1137 Set<String> contained = map.getAll(region); 1138 if (contained == null) { 1139 return result; 1140 } 1141 for (String item : contained) { 1142 if (result.containsKey(item)) { 1143 logln("Duplicate containment " + item + "\t" 1144 + getRegionName(item)); 1145 continue; 1146 } 1147 result.put(item, depth); 1148 getRecursiveContainment(item, map, result, depth + 1); 1149 } 1150 return result; 1151 } 1152 TestMacrolanguages()1153 public void TestMacrolanguages() { 1154 Set<String> languageCodes = STANDARD_CODES 1155 .getAvailableCodes("language"); 1156 Map<String, Map<String, R2<List<String>, String>>> typeToTagToReplacement = SUPPLEMENTAL 1157 .getLocaleAliasInfo(); 1158 Map<String, R2<List<String>, String>> tagToReplacement = typeToTagToReplacement 1159 .get("language"); 1160 1161 Relation<String, String> replacementToReplaced = Relation.of( 1162 new TreeMap<String, Set<String>>(), TreeSet.class); 1163 for (String language : tagToReplacement.keySet()) { 1164 List<String> replacements = tagToReplacement.get(language).get0(); 1165 if (replacements != null) { 1166 replacementToReplaced.putAll(replacements, language); 1167 } 1168 } 1169 replacementToReplaced.freeze(); 1170 1171 Map<String, Map<String, Map<String, String>>> lstreg = StandardCodes 1172 .getLStreg(); 1173 Map<String, Map<String, String>> lstregLanguageInfo = lstreg 1174 .get("language"); 1175 1176 Relation<Scope, String> scopeToCodes = Relation.of( 1177 new TreeMap<Scope, Set<String>>(), TreeSet.class); 1178 // the invariant is that every macrolanguage has exactly 1 encompassed 1179 // language that maps to it 1180 1181 main: for (String language : Builder.with(new TreeSet<String>()) 1182 .addAll(languageCodes).addAll(Iso639Data.getAvailable()).get()) { 1183 if (language.equals("no") || language.equals("sh")) 1184 continue; // special cases 1185 Scope languageScope = getScope(language, lstregLanguageInfo); 1186 if (languageScope == Scope.Macrolanguage) { 1187 if (Iso639Data.getHeirarchy(language) != null) { 1188 continue main; // is real family 1189 } 1190 Set<String> replacements = replacementToReplaced 1191 .getAll(language); 1192 if (replacements == null || replacements.size() == 0) { 1193 scopeToCodes.put(languageScope, language); 1194 } else { 1195 // it still might be bad, if we don't have a mapping to a 1196 // regular language 1197 for (String replacement : replacements) { 1198 Scope replacementScope = getScope(replacement, 1199 lstregLanguageInfo); 1200 if (replacementScope == Scope.Individual) { 1201 continue main; 1202 } 1203 } 1204 scopeToCodes.put(languageScope, language); 1205 } 1206 } 1207 } 1208 // now show the items we found 1209 for (Scope scope : scopeToCodes.keySet()) { 1210 for (String language : scopeToCodes.getAll(scope)) { 1211 String name = testInfo.getEnglish().getName(language); 1212 if (name == null || name.equals(language)) { 1213 Set<String> set = Iso639Data.getNames(language); 1214 if (set != null) { 1215 name = set.iterator().next(); 1216 } else { 1217 Map<String, String> languageInfo = lstregLanguageInfo 1218 .get(language); 1219 if (languageInfo != null) { 1220 name = languageInfo.get("Description"); 1221 } 1222 } 1223 } 1224 errln(scope + "\t" + language + "\t" + name + "\t" 1225 + Iso639Data.getType(language)); 1226 } 1227 } 1228 } 1229 getScope(String language, Map<String, Map<String, String>> lstregLanguageInfo)1230 private Scope getScope(String language, 1231 Map<String, Map<String, String>> lstregLanguageInfo) { 1232 Scope languageScope = Iso639Data.getScope(language); 1233 Map<String, String> languageInfo = lstregLanguageInfo.get(language); 1234 if (languageInfo == null) { 1235 // System.out.println("Couldn't get lstreg info for " + language); 1236 } else { 1237 String lstregScope = languageInfo.get("Scope"); 1238 if (lstregScope != null) { 1239 Scope scope2 = Scope.fromString(lstregScope); 1240 if (languageScope != scope2) { 1241 // System.out.println("Mismatch in scope between LSTR and ISO 639:\t" 1242 // + scope2 + "\t" + 1243 // languageScope); 1244 languageScope = scope2; 1245 } 1246 } 1247 } 1248 return languageScope; 1249 } 1250 1251 static final boolean LOCALES_FIXED = true; 1252 TestPopulation()1253 public void TestPopulation() { 1254 Set<String> languages = SUPPLEMENTAL 1255 .getLanguagesForTerritoriesPopulationData(); 1256 Relation<String, String> baseToLanguages = Relation.of( 1257 new TreeMap<String, Set<String>>(), TreeSet.class); 1258 LanguageTagParser ltp = new LanguageTagParser(); 1259 LanguageTagCanonicalizer ltc = new LanguageTagCanonicalizer(false); 1260 1261 for (String language : languages) { 1262 if (LOCALES_FIXED) { 1263 String canonicalForm = ltc.transform(language); 1264 if (!assertEquals("Canonical form", canonicalForm, language)) { 1265 int debug = 0; 1266 } 1267 } 1268 1269 String base = ltp.set(language).getLanguage(); 1270 String script = ltp.getScript(); 1271 baseToLanguages.put(base, language); 1272 1273 // add basic data, basically just for wo! 1274 // if there are primary scripts, they must include script (if not 1275 // empty) 1276 Set<String> primaryScripts = Collections.emptySet(); 1277 Set<String> secondaryScripts = Collections.emptySet(); 1278 Map<Type, BasicLanguageData> basicData = SUPPLEMENTAL 1279 .getBasicLanguageDataMap(base); 1280 if (basicData != null) { 1281 BasicLanguageData s = basicData 1282 .get(BasicLanguageData.Type.primary); 1283 if (s != null) { 1284 primaryScripts = s.getScripts(); 1285 } 1286 s = basicData.get(BasicLanguageData.Type.secondary); 1287 if (s != null) { 1288 secondaryScripts = s.getScripts(); 1289 } 1290 } 1291 1292 // do some consistency tests; if there is a script, it must be in 1293 // primaryScripts or secondaryScripts 1294 if (!script.isEmpty() && !primaryScripts.contains(script) && !secondaryScripts.contains(script)) { 1295 errln(base + ": Script found in territory data (" + script 1296 + ") is not in primary scripts :\t" + primaryScripts 1297 + " and not in secondary scripts :\t" + secondaryScripts); 1298 } 1299 1300 // if there are multiple primary scripts, they will be in 1301 // baseToLanguages 1302 if (primaryScripts.size() > 1) { 1303 for (String script2 : primaryScripts) { 1304 baseToLanguages.put(base, base + "_" + script2); 1305 } 1306 } 1307 } 1308 1309 if (!LOCALES_FIXED) { 1310 // the invariants are that if we have a base, we must not have a script. 1311 // and if we don't have a base, we must have two items 1312 for (String base : baseToLanguages.keySet()) { 1313 Set<String> languagesForBase = baseToLanguages.getAll(base); 1314 if (languagesForBase.contains(base)) { 1315 if (languagesForBase.size() > 1) { 1316 errln("Cannot have base alone with other scripts:\t" 1317 + languagesForBase); 1318 } 1319 } else { 1320 if (languagesForBase.size() == 1) { 1321 errln("Cannot have only one script for language:\t" 1322 + languagesForBase); 1323 } 1324 } 1325 } 1326 } 1327 } 1328 TestCompleteness()1329 public void TestCompleteness() { 1330 if (SUPPLEMENTAL.getSkippedElements().size() > 0) { 1331 logln("SupplementalDataInfo API doesn't support: " 1332 + SUPPLEMENTAL.getSkippedElements().toString()); 1333 } 1334 } 1335 1336 // these are settings for exceptional cases we want to allow 1337 private static final Set<String> EXCEPTION_CURRENCIES_WITH_NEW = new TreeSet<>( 1338 Arrays.asList("ILS", "NZD", "PGK", "TWD")); 1339 1340 // ok since there is no problem with confusion 1341 private static final Set<String> OK_TO_NOT_HAVE_OLD = new TreeSet<>( 1342 Arrays.asList("ADP", "ATS", "BEF", "CYP", "DEM", "ESP", "FIM", 1343 "FRF", "GRD", "IEP", "ITL", "LUF", "MTL", "MTP", "NLG", 1344 "PTE", "YUM", "ARA", "BAD", "BGL", "BOP", "BRC", "BRN", 1345 "BRR", "BUK", "CSK", "ECS", "GEK", "GNS", "GQE", "HRD", 1346 "ILP", "LTT", "LVR", "MGF", "MLF", "MZE", "NIC", "PEI", 1347 "PES", "SIT", "SRG", "SUR", "TJR", "TPE", "UAK", "YUD", 1348 "YUN", "ZRZ", "GWE")); 1349 1350 private static final Date LIMIT_FOR_NEW_CURRENCY = new Date( 1351 new Date().getYear() - 5, 1, 1); 1352 private static final Date NOW = new Date(); 1353 1354 private Matcher oldMatcher = Pattern.compile( 1355 "\\bold\\b|\\([0-9]{4}-[0-9]{4}\\)", Pattern.CASE_INSENSITIVE) 1356 .matcher(""); 1357 private Matcher newMatcher = Pattern.compile("\\bnew\\b", 1358 Pattern.CASE_INSENSITIVE).matcher(""); 1359 1360 /** 1361 * Test that access to currency info in supplemental data is ok. At this 1362 * point just a simple test. 1363 * 1364 * @param args 1365 */ TestCurrency()1366 public void TestCurrency() { 1367 IsoCurrencyParser isoCodes = IsoCurrencyParser.getInstance(); 1368 Set<String> currencyCodes = STANDARD_CODES 1369 .getGoodAvailableCodes("currency"); 1370 Relation<String, Pair<String, CurrencyDateInfo>> nonModernCurrencyCodes = Relation 1371 .of(new TreeMap<String, Set<Pair<String, CurrencyDateInfo>>>(), 1372 TreeSet.class); 1373 Relation<String, Pair<String, CurrencyDateInfo>> modernCurrencyCodes = Relation 1374 .of(new TreeMap<String, Set<Pair<String, CurrencyDateInfo>>>(), 1375 TreeSet.class); 1376 Set<String> territoriesWithoutModernCurrencies = new TreeSet<>( 1377 STANDARD_CODES.getGoodAvailableCodes("territory")); 1378 Map<String, Date> currencyFirstValid = new TreeMap<>(); 1379 Map<String, Date> currencyLastValid = new TreeMap<>(); 1380 territoriesWithoutModernCurrencies.remove("ZZ"); 1381 1382 for (String territory : STANDARD_CODES 1383 .getGoodAvailableCodes("territory")) { 1384 /* "EU" behaves like a country for purposes of this test */ 1385 if ((SUPPLEMENTAL.getContained(territory) != null) 1386 && !territory.equals("EU")) { 1387 territoriesWithoutModernCurrencies.remove(territory); 1388 continue; 1389 } 1390 Set<CurrencyDateInfo> currencyInfo = SUPPLEMENTAL 1391 .getCurrencyDateInfo(territory); 1392 if (currencyInfo == null) { 1393 continue; // error, but will pick up below. 1394 } 1395 for (CurrencyDateInfo dateInfo : currencyInfo) { 1396 final String currency = dateInfo.getCurrency(); 1397 final Date start = dateInfo.getStart(); 1398 final Date end = dateInfo.getEnd(); 1399 if (dateInfo.getErrors().length() != 0) { 1400 logln("parsing " + territory + "\t" + dateInfo.toString() 1401 + "\t" + dateInfo.getErrors()); 1402 } 1403 Date firstValue = currencyFirstValid.get(currency); 1404 if (firstValue == null || firstValue.compareTo(start) < 0) { 1405 currencyFirstValid.put(currency, start); 1406 } 1407 Date lastValue = currencyLastValid.get(currency); 1408 if (lastValue == null || lastValue.compareTo(end) > 0) { 1409 currencyLastValid.put(currency, end); 1410 } 1411 if (start.compareTo(NOW) < 0 && end.compareTo(NOW) >= 0) { // Non-tender 1412 // is 1413 // OK... 1414 modernCurrencyCodes.put(currency, 1415 new Pair<>(territory, 1416 dateInfo)); 1417 territoriesWithoutModernCurrencies.remove(territory); 1418 } else { 1419 nonModernCurrencyCodes.put(currency, 1420 new Pair<>(territory, 1421 dateInfo)); 1422 } 1423 logln(territory 1424 + "\t" 1425 + dateInfo.toString() 1426 + "\t" 1427 + testInfo.getEnglish().getName(CLDRFile.CURRENCY_NAME, 1428 currency)); 1429 } 1430 } 1431 // fix up 1432 nonModernCurrencyCodes.removeAll(modernCurrencyCodes.keySet()); 1433 Relation<String, String> isoCurrenciesToCountries = Relation.of( 1434 new TreeMap<String, Set<String>>(), TreeSet.class) 1435 .addAllInverted(isoCodes.getCountryToCodes()); 1436 // now print error messages 1437 logln("Modern Codes: " + modernCurrencyCodes.size() + "\t" 1438 + modernCurrencyCodes); 1439 Set<String> missing = new TreeSet<>( 1440 isoCurrenciesToCountries.keySet()); 1441 missing.removeAll(modernCurrencyCodes.keySet()); 1442 if (missing.size() != 0) { 1443 errln("Missing codes compared to ISO: " + missing.toString()); 1444 } 1445 1446 for (String currency : modernCurrencyCodes.keySet()) { 1447 Set<Pair<String, CurrencyDateInfo>> data = modernCurrencyCodes 1448 .getAll(currency); 1449 final String name = testInfo.getEnglish().getName( 1450 CLDRFile.CURRENCY_NAME, currency); 1451 1452 Set<String> isoCountries = isoCurrenciesToCountries 1453 .getAll(currency); 1454 if (isoCountries == null) { 1455 isoCountries = new TreeSet<>(); 1456 } 1457 1458 TreeSet<String> cldrCountries = new TreeSet<>(); 1459 for (Pair<String, CurrencyDateInfo> x : data) { 1460 cldrCountries.add(x.getFirst()); 1461 } 1462 if (!isoCountries.equals(cldrCountries)) { 1463 if (!logKnownIssue("cldrbug:10765", "Missing codes compared to ISO: " + missing.toString())) { 1464 1465 errln("Mismatch between ISO and Cldr modern currencies for " 1466 + currency + "\tISO:" + isoCountries + "\tCLDR:" 1467 + cldrCountries); 1468 showCountries("iso-cldr", isoCountries, cldrCountries, missing); 1469 showCountries("cldr-iso", cldrCountries, isoCountries, missing); 1470 } 1471 } 1472 1473 if (oldMatcher.reset(name).find()) { 1474 errln("Has 'old' in name but still used " + "\t" + currency 1475 + "\t" + name + "\t" + data); 1476 } 1477 if (newMatcher.reset(name).find() 1478 && !EXCEPTION_CURRENCIES_WITH_NEW.contains(currency)) { 1479 // find the first use. If older than 5 years, flag as error 1480 if (currencyFirstValid.get(currency).compareTo( 1481 LIMIT_FOR_NEW_CURRENCY) < 0) { 1482 errln("Has 'new' in name but used since " 1483 + CurrencyDateInfo.formatDate(currencyFirstValid 1484 .get(currency)) 1485 + "\t" + currency + "\t" 1486 + name + "\t" + data); 1487 } else { 1488 logln("Has 'new' in name but used since " 1489 + CurrencyDateInfo.formatDate(currencyFirstValid 1490 .get(currency)) 1491 + "\t" + currency + "\t" 1492 + name + "\t" + data); 1493 } 1494 } 1495 } 1496 logln("Non-Modern Codes (with dates): " + nonModernCurrencyCodes.size() 1497 + "\t" + nonModernCurrencyCodes); 1498 for (String currency : nonModernCurrencyCodes.keySet()) { 1499 final String name = testInfo.getEnglish().getName( 1500 CLDRFile.CURRENCY_NAME, currency); 1501 if (name == null) { 1502 errln("No English name for currency " + currency); 1503 continue; 1504 } 1505 if (newMatcher.reset(name).find() 1506 && !EXCEPTION_CURRENCIES_WITH_NEW.contains(currency)) { 1507 logln("Has 'new' in name but NOT used since " 1508 + CurrencyDateInfo.formatDate(currencyLastValid 1509 .get(currency)) 1510 + "\t" + currency + "\t" + name 1511 + "\t" + nonModernCurrencyCodes.getAll(currency)); 1512 } else if (!oldMatcher.reset(name).find() 1513 && !OK_TO_NOT_HAVE_OLD.contains(currency)) { 1514 logln("Doesn't have 'old' or date range in name but NOT used since " 1515 + CurrencyDateInfo.formatDate(currencyLastValid 1516 .get(currency)) 1517 + "\t" 1518 + currency 1519 + "\t" 1520 + name 1521 + "\t" + nonModernCurrencyCodes.getAll(currency)); 1522 for (Pair<String, CurrencyDateInfo> pair : nonModernCurrencyCodes 1523 .getAll(currency)) { 1524 final String territory = pair.getFirst(); 1525 Set<CurrencyDateInfo> currencyInfo = SUPPLEMENTAL 1526 .getCurrencyDateInfo(territory); 1527 for (CurrencyDateInfo dateInfo : currencyInfo) { 1528 if (dateInfo.getEnd().compareTo(NOW) < 0) { 1529 continue; 1530 } 1531 logln("\tCurrencies used instead: " 1532 + territory 1533 + "\t" 1534 + dateInfo 1535 + "\t" 1536 + testInfo.getEnglish().getName( 1537 CLDRFile.CURRENCY_NAME, 1538 dateInfo.getCurrency())); 1539 1540 } 1541 } 1542 1543 } 1544 } 1545 Set<String> remainder = new TreeSet<>(); 1546 remainder.addAll(currencyCodes); 1547 remainder.removeAll(nonModernCurrencyCodes.keySet()); 1548 // TODO make this an error, except for allowed exceptions. 1549 logln("Currencies without Territories: " + remainder); 1550 if (territoriesWithoutModernCurrencies.size() != 0) { 1551 errln("Modern territory missing currency: " 1552 + territoriesWithoutModernCurrencies); 1553 } 1554 } 1555 showCountries(final String title, Set<String> isoCountries, Set<String> cldrCountries, Set<String> missing)1556 private void showCountries(final String title, Set<String> isoCountries, 1557 Set<String> cldrCountries, Set<String> missing) { 1558 missing.clear(); 1559 missing.addAll(isoCountries); 1560 missing.removeAll(cldrCountries); 1561 for (String country : missing) { 1562 logln("\t\tExtra in " + title + "\t" + country + " - " 1563 + getRegionName(country)); 1564 } 1565 } 1566 TestCurrencyDecimalPlaces()1567 public void TestCurrencyDecimalPlaces() { 1568 IsoCurrencyParser isoCodes = IsoCurrencyParser.getInstance(); 1569 Relation<String, IsoCurrencyParser.Data> codeList = isoCodes 1570 .getCodeList(); 1571 Set<String> currencyCodes = STANDARD_CODES 1572 .getGoodAvailableCodes("currency"); 1573 for (String cc : currencyCodes) { 1574 Set<IsoCurrencyParser.Data> d = codeList.get(cc); 1575 if (d != null) { 1576 for (IsoCurrencyParser.Data x : d) { 1577 CurrencyNumberInfo cni = SUPPLEMENTAL.getCurrencyNumberInfo(cc); 1578 if (cni.digits != x.getMinorUnit()) { 1579 logln("Mismatch between ISO/CLDR for decimal places for currency => " + cc + 1580 ". ISO = " + x.getMinorUnit() + " CLDR = " + cni.digits); 1581 } 1582 } 1583 } 1584 } 1585 } 1586 1587 /** 1588 * Verify that we have a default script for every CLDR base language 1589 */ TestDefaultScripts()1590 public void TestDefaultScripts() { 1591 SupplementalDataInfo supp = SUPPLEMENTAL; 1592 Map<String, String> likelyData = supp.getLikelySubtags(); 1593 Map<String, String> baseToDefaultContentScript = new HashMap<>(); 1594 for (CLDRLocale locale : supp.getDefaultContentCLDRLocales()) { 1595 String script = locale.getScript(); 1596 if (!script.isEmpty() && locale.getCountry().isEmpty()) { 1597 baseToDefaultContentScript.put(locale.getLanguage(), script); 1598 } 1599 } 1600 for (String locale : testInfo.getCldrFactory().getAvailableLanguages()) { 1601 if ("root".equals(locale)) { 1602 continue; 1603 } 1604 CLDRLocale loc = CLDRLocale.getInstance(locale); 1605 String baseLanguage = loc.getLanguage(); 1606 String defaultScript = supp.getDefaultScript(baseLanguage); 1607 1608 String defaultContentScript = baseToDefaultContentScript 1609 .get(baseLanguage); 1610 if (defaultContentScript != null) { 1611 assertEquals(loc + " defaultContentScript = default", 1612 defaultScript, defaultContentScript); 1613 } 1614 String likely = likelyData.get(baseLanguage); 1615 String likelyScript = likely == null ? null : CLDRLocale 1616 .getInstance(likely).getScript(); 1617 Map<Type, BasicLanguageData> scriptInfo = supp 1618 .getBasicLanguageDataMap(baseLanguage); 1619 if (scriptInfo == null) { 1620 errln(loc + ": has no BasicLanguageData"); 1621 } else { 1622 BasicLanguageData data = scriptInfo.get(Type.primary); 1623 if (data == null) { 1624 data = scriptInfo.get(Type.secondary); 1625 } 1626 if (data == null) { 1627 errln(loc + ": has no scripts in BasicLanguageData"); 1628 } else if (!data.getScripts().contains(defaultScript)) { 1629 errln(loc + ": " + defaultScript 1630 + " not in BasicLanguageData - check <languages> in supplementalData.xml and language_script_raw.txt " + data.getScripts()); 1631 } 1632 } 1633 1634 assertEquals(loc + " likely = default", defaultScript, likelyScript); 1635 1636 assertNotNull(loc + ": needs default script", defaultScript); 1637 1638 if (!loc.getScript().isEmpty()) { 1639 if (!loc.getScript().equals(defaultScript)) { 1640 assertNotEquals(locale 1641 + ": only include script if not default", 1642 loc.getScript(), defaultScript); 1643 } 1644 } 1645 1646 } 1647 } 1648 1649 enum CoverageIssue { 1650 log, warn, error 1651 } 1652 TestPluralCompleteness()1653 public void TestPluralCompleteness() { 1654 // Set<String> cardinalLocales = new 1655 // TreeSet<String>(SUPPLEMENTAL.getPluralLocales(PluralType.cardinal)); 1656 // Set<String> ordinalLocales = new 1657 // TreeSet<String>(SUPPLEMENTAL.getPluralLocales(PluralType.ordinal)); 1658 // Map<ULocale, PluralRulesFactory.SamplePatterns> sampleCardinals = 1659 // PluralRulesFactory.getLocaleToSamplePatterns(); 1660 // Set<ULocale> sampleCardinalLocales = PluralRulesFactory.getLocales(); 1661 // // new HashSet(PluralRulesFactory.getSampleCounts(uLocale, 1662 // type).keySet()); 1663 // Map<ULocale, PluralRules> overrideCardinals = 1664 // PluralRulesFactory.getPluralOverrides(); 1665 // Set<ULocale> overrideCardinalLocales = new 1666 // HashSet<ULocale>(overrideCardinals.keySet()); 1667 1668 Set<String> testLocales = STANDARD_CODES.getLocaleCoverageLocales( 1669 Organization.google, EnumSet.of(Level.MODERN)); 1670 Set<String> allLocales = testInfo.getCldrFactory().getAvailable(); 1671 LanguageTagParser ltp = new LanguageTagParser(); 1672 for (String locale : allLocales) { 1673 // the only known case where plural rules depend on region or script 1674 // is pt_PT 1675 if (locale.equals("root")) { 1676 continue; 1677 } 1678 ltp.set(locale); 1679 if (!ltp.getRegion().isEmpty() || !ltp.getScript().isEmpty()) { 1680 continue; 1681 } 1682 CoverageIssue needsCoverage = testLocales.contains(locale) 1683 ? CoverageIssue.error 1684 : CoverageIssue.log; 1685 CoverageIssue needsCoverage2 = needsCoverage == CoverageIssue.error ? CoverageIssue.warn : needsCoverage; 1686 1687 // if (logKnownIssue("Cldrbug:8809", "Missing plural rules/samples be and ga locales")) { 1688 // if (locale.equals("be") || locale.equals("ga")) { 1689 // needsCoverage = CoverageIssue.warn; 1690 // } 1691 // } 1692 PluralRulesFactory prf = PluralRulesFactory 1693 .getInstance(CLDRConfig.getInstance() 1694 .getSupplementalDataInfo()); 1695 1696 for (PluralType type : PluralType.values()) { 1697 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(type, locale, 1698 false); 1699 if (pluralInfo == null) { 1700 errOrLog(needsCoverage, locale + "\t" + type + " \tmissing plural rules", "Cldrbug:7839", "Missing plural data for modern locales"); 1701 continue; 1702 } 1703 Set<Count> counts = pluralInfo.getCounts(); 1704 // if (counts.size() == 1) { 1705 // continue; // skip checking samples 1706 // } 1707 HashSet<String> samples = new HashSet<>(); 1708 EnumSet<Count> countsWithNoSamples = EnumSet 1709 .noneOf(Count.class); 1710 Relation<String, Count> samplesToCounts = Relation.of( 1711 new HashMap(), LinkedHashSet.class); 1712 Set<Count> countsFound = prf.getSampleCounts(locale, 1713 type.standardType); 1714 StringBuilder failureCases = new StringBuilder(); 1715 for (Count count : counts) { 1716 String pattern = PluralRulesFactory.getSamplePattern(locale, type.standardType, count); 1717 final String rangeLine = getRangeLine(count, pluralInfo.getPluralRules(), pattern); 1718 failureCases.append('\n').append(locale).append('\t').append(type).append('\t').append(rangeLine); 1719 if (countsFound == null || !countsFound.contains(count)) { 1720 countsWithNoSamples.add(count); 1721 } else { 1722 samplesToCounts.put(pattern, count); 1723 logln(locale + "\t" + type + "\t" + count + "\t" 1724 + pattern); 1725 } 1726 } 1727 if (!countsWithNoSamples.isEmpty()) { 1728 errOrLog(needsCoverage, locale + "\t" + type + "\t missing samples:\t" + countsWithNoSamples, 1729 "cldrbug:7075", "Missing ordinal minimal pairs"); 1730 errOrLog(needsCoverage2, failureCases.toString()); 1731 } 1732 for (Entry<String, Set<Count>> entry : samplesToCounts 1733 .keyValuesSet()) { 1734 if (entry.getValue().size() != 1) { 1735 errOrLog(needsCoverage, locale + "\t" + type + "\t duplicate samples: " + entry.getValue() 1736 + " => «" + entry.getKey() + "»", "cldrbug:7119", "Some duplicate minimal pairs"); 1737 errOrLog(needsCoverage2, failureCases.toString()); 1738 } 1739 } 1740 } 1741 } 1742 } 1743 errOrLog(CoverageIssue causeError, String message, String logTicket, String logComment)1744 public void errOrLog(CoverageIssue causeError, String message, String logTicket, String logComment) { 1745 switch (causeError) { 1746 case error: 1747 if (logTicket == null) { 1748 errln(message); 1749 break; 1750 } 1751 logKnownIssue(logTicket, logComment); 1752 // fall through 1753 case warn: 1754 warnln(message); 1755 break; 1756 case log: 1757 logln(message); 1758 break; 1759 } 1760 } 1761 errOrLog(CoverageIssue causeError, String message)1762 public void errOrLog(CoverageIssue causeError, String message) { 1763 errOrLog(causeError, message, null, null); 1764 } 1765 TestNumberingSystemDigits()1766 public void TestNumberingSystemDigits() { 1767 1768 // Don't worry about digits from supplemental planes yet ( ICU can't 1769 // handle them anyways ) 1770 // hanidec is the only known non codepoint order numbering system 1771 // TODO: Fix so that it works properly on non-BMP digit strings. 1772 String[] knownExceptions = { "brah", "cakm", "hanidec", "osma", "shrd", 1773 "sora", "takr" }; 1774 List<String> knownExceptionList = Arrays.asList(knownExceptions); 1775 for (String ns : SUPPLEMENTAL.getNumericNumberingSystems()) { 1776 if (knownExceptionList.contains(ns)) { 1777 continue; 1778 } 1779 String digits = SUPPLEMENTAL.getDigits(ns); 1780 int previousChar = 0; 1781 int ch; 1782 1783 for (int i = 0; i < digits.length(); i += Character.charCount(ch)) { 1784 ch = digits.codePointAt(i); 1785 if (i > 0 && ch != previousChar + 1) { 1786 errln("Digits for numbering system " 1787 + ns 1788 + " are not in code point order. Previous char = U+" 1789 + Utility.hex(previousChar, 4) 1790 + " Current char = U+" + Utility.hex(ch, 4)); 1791 break; 1792 } 1793 previousChar = ch; 1794 } 1795 } 1796 } 1797 TestNumberingSystemDigitCompleteness()1798 public void TestNumberingSystemDigitCompleteness() { 1799 List<Integer> unicodeDigits = new ArrayList<>(); 1800 for (int cp = UCharacter.MIN_CODE_POINT; cp <= UCharacter.MAX_CODE_POINT; cp++) { 1801 if (UCharacter.getType(cp) == UCharacterEnums.ECharacterCategory.DECIMAL_DIGIT_NUMBER) { 1802 unicodeDigits.add(Integer.valueOf(cp)); 1803 } 1804 } 1805 1806 for (String ns : SUPPLEMENTAL.getNumericNumberingSystems()) { 1807 String digits = SUPPLEMENTAL.getDigits(ns); 1808 int ch; 1809 1810 for (int i = 0; i < digits.length(); i += Character.charCount(ch)) { 1811 ch = digits.codePointAt(i); 1812 unicodeDigits.remove(Integer.valueOf(ch)); 1813 } 1814 } 1815 1816 if (unicodeDigits.size() > 0) { 1817 for (Integer i : unicodeDigits) { 1818 errln("Unicode digit: " + UCharacter.getName(i) + " is not in any numbering system. Script = " 1819 + UScript.getShortName(UScript.getScript(i))); 1820 } 1821 } 1822 } 1823 TestMetazones()1824 public void TestMetazones() { 1825 Date goalMin = new Date(70, 0, 1); 1826 Date goalMax = new Date(300, 0, 2); 1827 ImmutableSet<String> knownTZWithoutMetazone = ImmutableSet.of("America/Montreal", "Asia/Barnaul", "Asia/Tomsk", "Europe/Kirov"); 1828 for (String timezoneRaw : TimeZone.getAvailableIDs()) { 1829 String timezone = TimeZone.getCanonicalID(timezoneRaw); 1830 String region = TimeZone.getRegion(timezone); 1831 if (!timezone.equals(timezoneRaw) || "001".equals(region)) { 1832 continue; 1833 } 1834 if (knownTZWithoutMetazone.contains(timezone)) { 1835 continue; 1836 } 1837 final Set<MetaZoneRange> ranges = SUPPLEMENTAL 1838 .getMetaZoneRanges(timezone); 1839 1840 if (assertNotNull("metazones for " + timezone, ranges)) { 1841 long min = Long.MAX_VALUE; 1842 long max = Long.MIN_VALUE; 1843 for (MetaZoneRange range : ranges) { 1844 if (range.dateRange.from != DateRange.START_OF_TIME) { 1845 min = Math.min(min, range.dateRange.from); 1846 } 1847 if (range.dateRange.to != DateRange.END_OF_TIME) { 1848 max = Math.max(max, range.dateRange.to); 1849 } 1850 } 1851 assertRelation(timezone + " has metazone before 1970?", true, 1852 goalMin, LEQ, new Date(min)); 1853 assertRelation(timezone 1854 + " has metazone until way in the future?", true, 1855 goalMax, GEQ, new Date(max)); 1856 } 1857 } 1858 com.google.common.collect.Interners i; 1859 } 1860 Test9924()1861 public void Test9924() { 1862 Boolean b = org.unicode.cldr.unittest.TestSupplementalInfo.LOCALES_FIXED; 1863 PopulationData zhCNData = SUPPLEMENTAL.getLanguageAndTerritoryPopulationData(b ? "zh" : "zh_Hans", "CN"); 1864 PopulationData yueCNData = SUPPLEMENTAL.getLanguageAndTerritoryPopulationData("yue_Hans", "CN"); 1865 assertTrue("yue*10 < zh", yueCNData.getPopulation() < zhCNData.getPopulation()); 1866 } 1867 Test10765()1868 public void Test10765() { // 1869 Set<String> surveyToolLanguages = SUPPLEMENTAL.getCLDRLanguageCodes(); // codes that show up in Survey Tool 1870 Set<String> mainLanguages = new TreeSet<>(); 1871 LanguageTagParser ltp = new LanguageTagParser(); 1872 for (String locale : testInfo.getCldrFactory().getAvailableLanguages()) { 1873 mainLanguages.add(ltp.set(locale).getLanguage()); 1874 } 1875 // add special codes we want to see anyway 1876 mainLanguages.add("und"); 1877 mainLanguages.add("mul"); 1878 mainLanguages.add("zxx"); 1879 1880 if (!mainLanguages.containsAll(surveyToolLanguages)) { 1881 CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(SUPPLEMENTAL, "ja"); // pick "neutral" locale 1882 Set<String> temp = new TreeSet<>(surveyToolLanguages); 1883 temp.removeAll(mainLanguages); 1884 Set<String> modern = new TreeSet<>(); 1885 Set<String> comprehensive = new TreeSet<>(); 1886 for (String lang : temp) { 1887 Level level = coverageLevel.getLevel(CLDRFile.getKey(CLDRFile.LANGUAGE_NAME, lang)); 1888 if (level.compareTo(Level.MODERN) <= 0) { 1889 modern.add(lang); 1890 } else { 1891 comprehensive.add(lang); 1892 } 1893 } 1894 warnln("«Modern» Languages in <variable id='$language' type='choice'> that aren't in main/* : " + getNames(modern)); 1895 logln("«Comprehensive» Languages in <variable id='$language' type='choice'> that aren't in main/* : " + getNames(comprehensive)); 1896 } 1897 if (!surveyToolLanguages.containsAll(mainLanguages)) { 1898 mainLanguages.removeAll(surveyToolLanguages); 1899 // TODO: See https://unicode-org.atlassian.net/browse/CLDR-14974 1900 // Currently there is a requirement that all locales in main/* are in attributeValueValidity.xml 1901 assertEquals("main/* languages missing from <variable id='$language'/> in attributeValueValidity.xml", 1902 Collections.EMPTY_SET, mainLanguages); 1903 } 1904 } 1905 getNames(Set<String> temp)1906 private Set<String> getNames(Set<String> temp) { 1907 Set<String> tempNames = new TreeSet<>(); 1908 for (String langCode : temp) { 1909 tempNames.add(testInfo.getEnglish().getName(CLDRFile.LANGUAGE_NAME, langCode) + " (" + langCode + ")"); 1910 } 1911 return tempNames; 1912 } 1913 TestGrammarInfo()1914 public void TestGrammarInfo() { 1915 final Logger logger = getLogger(); 1916 Multimap<String,String> allValues = TreeMultimap.create(); 1917 for (String locale : SUPPLEMENTAL.hasGrammarInfo()) { 1918 if (locale.contentEquals("tr")) { 1919 int debug = 0; 1920 } 1921 GrammarInfo grammarInfo = SUPPLEMENTAL.getGrammarInfo(locale); 1922 for (GrammaticalTarget target : GrammaticalTarget.values()) { 1923 for (GrammaticalFeature feature : GrammaticalFeature.values()) { 1924 Collection<String> general = grammarInfo.get(target, feature, GrammaticalScope.general); 1925 for (GrammaticalScope scope : GrammaticalScope.values()) { 1926 Collection<String> units = grammarInfo.get(target, feature, scope); 1927 allValues.putAll(target + "/" + feature + "/" + scope, units); 1928 if (scope != GrammaticalScope.general) { 1929 assertTrue(general + " > " + scope + " " + units, general.containsAll(units)); 1930 } 1931 } 1932 } 1933 } 1934 logger.fine(grammarInfo.toString("\n" + locale + "\t")); 1935 } 1936 if (logger.isLoggable(java.util.logging.Level.FINE)) { // if level is at least FINE 1937 logger.fine(""); 1938 for (Entry<String, Collection<String>> entry : allValues.asMap().entrySet()) { 1939 logger.fine(entry.getKey() + "\t" + Joiner.on(", ").join(entry.getValue())); 1940 } 1941 } 1942 } 1943 } 1944