1 package org.unicode.cldr.unittest; 2 3 import java.util.ArrayList; 4 import java.util.Arrays; 5 import java.util.Collection; 6 import java.util.Collections; 7 import java.util.Date; 8 import java.util.EnumMap; 9 import java.util.EnumSet; 10 import java.util.HashMap; 11 import java.util.HashSet; 12 import java.util.Iterator; 13 import java.util.LinkedHashMap; 14 import java.util.LinkedHashSet; 15 import java.util.List; 16 import java.util.Locale; 17 import java.util.Map; 18 import java.util.Map.Entry; 19 import java.util.Set; 20 import java.util.TreeMap; 21 import java.util.TreeSet; 22 import java.util.logging.Logger; 23 import java.util.regex.Matcher; 24 import java.util.regex.Pattern; 25 26 import org.unicode.cldr.draft.ScriptMetadata; 27 import org.unicode.cldr.test.CoverageLevel2; 28 import org.unicode.cldr.tool.LikelySubtags; 29 import org.unicode.cldr.tool.PluralMinimalPairs; 30 import org.unicode.cldr.tool.PluralRulesFactory; 31 import org.unicode.cldr.util.Builder; 32 import org.unicode.cldr.util.CLDRConfig; 33 import org.unicode.cldr.util.CLDRFile; 34 import org.unicode.cldr.util.CLDRFile.WinningChoice; 35 import org.unicode.cldr.util.CLDRLocale; 36 import org.unicode.cldr.util.CldrUtility; 37 import org.unicode.cldr.util.GrammarInfo; 38 import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature; 39 import org.unicode.cldr.util.GrammarInfo.GrammaticalScope; 40 import org.unicode.cldr.util.GrammarInfo.GrammaticalTarget; 41 import org.unicode.cldr.util.Iso639Data; 42 import org.unicode.cldr.util.Iso639Data.Scope; 43 import org.unicode.cldr.util.IsoCurrencyParser; 44 import org.unicode.cldr.util.LanguageTagCanonicalizer; 45 import org.unicode.cldr.util.LanguageTagParser; 46 import org.unicode.cldr.util.Level; 47 import org.unicode.cldr.util.Organization; 48 import org.unicode.cldr.util.Pair; 49 import org.unicode.cldr.util.PluralRanges; 50 import org.unicode.cldr.util.PreferredAndAllowedHour; 51 import org.unicode.cldr.util.PreferredAndAllowedHour.HourStyle; 52 import org.unicode.cldr.util.StandardCodes; 53 import org.unicode.cldr.util.StandardCodes.CodeType; 54 import org.unicode.cldr.util.StandardCodes.LstrType; 55 import org.unicode.cldr.util.SupplementalDataInfo; 56 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData; 57 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type; 58 import org.unicode.cldr.util.SupplementalDataInfo.ContainmentStyle; 59 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo; 60 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyNumberInfo; 61 import org.unicode.cldr.util.SupplementalDataInfo.DateRange; 62 import org.unicode.cldr.util.SupplementalDataInfo.MetaZoneRange; 63 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus; 64 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 65 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 66 import org.unicode.cldr.util.SupplementalDataInfo.PluralType; 67 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; 68 import org.unicode.cldr.util.SupplementalDataInfo.SampleList; 69 import org.unicode.cldr.util.Validity; 70 import org.unicode.cldr.util.Validity.Status; 71 72 import com.google.common.base.Joiner; 73 import com.google.common.collect.ImmutableSet; 74 import com.google.common.collect.Multimap; 75 import com.google.common.collect.TreeMultimap; 76 import com.ibm.icu.impl.Relation; 77 import com.ibm.icu.impl.Row; 78 import com.ibm.icu.impl.Row.R2; 79 import com.ibm.icu.impl.Row.R3; 80 import com.ibm.icu.impl.Utility; 81 import com.ibm.icu.lang.UCharacter; 82 import com.ibm.icu.lang.UCharacterEnums; 83 import com.ibm.icu.lang.UScript; 84 import com.ibm.icu.text.PluralRules; 85 import com.ibm.icu.text.PluralRules.FixedDecimal; 86 import com.ibm.icu.text.PluralRules.FixedDecimalRange; 87 import com.ibm.icu.text.PluralRules.FixedDecimalSamples; 88 import com.ibm.icu.text.PluralRules.Operand; 89 import com.ibm.icu.text.PluralRules.SampleType; 90 import com.ibm.icu.text.StringTransform; 91 import com.ibm.icu.text.UnicodeSet; 92 import com.ibm.icu.util.Output; 93 import com.ibm.icu.util.TimeZone; 94 import com.ibm.icu.util.ULocale; 95 96 public class TestSupplementalInfo extends TestFmwkPlus { 97 static CLDRConfig testInfo = CLDRConfig.getInstance(); 98 99 private static final StandardCodes STANDARD_CODES = StandardCodes.make(); 100 101 private static final SupplementalDataInfo SUPPLEMENTAL = testInfo 102 .getSupplementalDataInfo(); 103 main(String[] args)104 public static void main(String[] args) { 105 new TestSupplementalInfo().run(args); 106 } 107 TestPluralSampleOrder()108 public void TestPluralSampleOrder() { 109 HashSet<PluralInfo> seen = new HashSet<>(); 110 for (String locale : SUPPLEMENTAL.getPluralLocales()) { 111 if (locale.equals("root")) { 112 continue; 113 } 114 PluralInfo pi = SUPPLEMENTAL.getPlurals(locale); 115 if (seen.contains(pi)) { 116 continue; 117 } 118 seen.add(pi); 119 for (SampleType s : SampleType.values()) { 120 for (Count c : pi.getCounts(s)) { 121 FixedDecimalSamples sSamples = pi.getPluralRules() 122 .getDecimalSamples(c.toString(), s); 123 if (sSamples == null) { 124 errln(locale + " no sample for " + c); 125 continue; 126 } 127 if (s == SampleType.DECIMAL) { 128 continue; // skip 129 } 130 FixedDecimalRange lastSample = null; 131 for (FixedDecimalRange sample : sSamples.samples) { 132 if (lastSample != null) { 133 if (compare(lastSample.start,sample.start) > 0) { 134 errln(locale + ":" + c + ": out of order with " 135 + lastSample + " > " + sample); 136 } else if (false) { 137 logln(locale + ":" + c + ": in order with " 138 + lastSample + " < " + sample); 139 } 140 } 141 lastSample = sample; 142 } 143 } 144 } 145 } 146 } 147 148 /** 149 * Hack until ICU's FixedDecimal is fixed 150 * 151 */ compare(PluralRules.FixedDecimal me, PluralRules.FixedDecimal other)152 public static int compare(PluralRules.FixedDecimal me, PluralRules.FixedDecimal other) { 153 if (me.getPluralOperand(Operand.e) != other.getPluralOperand(Operand.e)) { 154 return me.getPluralOperand(Operand.e) < other.getPluralOperand(Operand.e) ? -1 : 1; 155 } 156 if (me.getIntegerValue() != other.getIntegerValue()) { 157 return me.getIntegerValue() < other.getIntegerValue() ? -1 : 1; 158 } 159 if (me.getSource() != other.getSource()) { 160 return me.getSource() < other.getSource() ? -1 : 1; 161 } 162 if (me.getVisibleDecimalDigitCount() != other.getVisibleDecimalDigitCount()) { 163 return me.getVisibleDecimalDigitCount() < other.getVisibleDecimalDigitCount() ? -1 : 1; 164 } 165 long diff = me.getDecimalDigits() - other.getDecimalDigits(); 166 if (diff != 0) { 167 return diff < 0 ? -1 : 1; 168 } 169 return 0; 170 } 171 TestPluralRanges()172 public void TestPluralRanges() { 173 PluralRulesFactory prf = PluralRulesFactory.getInstance(SUPPLEMENTAL); 174 Set<String> localesToTest = new TreeSet<>( 175 SUPPLEMENTAL.getPluralRangesLocales()); 176 for (String locale : StandardCodes.make().getLocaleCoverageLocales( 177 "google")) { // superset 178 if (locale.equals("*") || locale.contains("_")) { 179 continue; 180 } 181 localesToTest.add(locale); 182 } 183 Set<String> modernLocales = StandardCodes.make() 184 .getLocaleCoverageLocales(Organization.cldr, 185 EnumSet.of(Level.MODERN)); 186 187 Output<FixedDecimal> maxSample = new Output<>(); 188 Output<FixedDecimal> minSample = new Output<>(); 189 190 for (String locale : localesToTest) { 191 final String templateLine = "Template for " + ULocale.getDisplayName(locale, "en") + " (" + locale + ") translators to fix:"; 192 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(locale); 193 Set<Count> counts = pluralInfo.getCounts(); 194 195 final PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(new ULocale(locale).toString()); 196 197 // check that there are no null values 198 PluralRanges pluralRanges = SUPPLEMENTAL.getPluralRanges(locale); 199 if (pluralRanges == null) { 200 if (!modernLocales.contains(locale)) { 201 logln("Missing plural ranges for " + locale); 202 } else { 203 errOrLog(CoverageIssue.error, locale + "\tMissing plural ranges", "Cldrbug:7839", "Missing plural data for modern locales"); 204 StringBuilder failureCases = new StringBuilder(templateLine); 205 for (Count start : counts) { 206 for (Count end : counts) { 207 pluralInfo.rangeExists(start, end, minSample, maxSample); 208 final String rangeLine = getRangeLine(start, end, null, maxSample, minSample, samplePatterns); 209 failureCases.append("\n" + locale + "\t" + rangeLine); 210 } 211 } 212 errOrLog(CoverageIssue.warn, failureCases.toString()); 213 } 214 continue; 215 } 216 EnumSet<Count> found = EnumSet.noneOf(Count.class); 217 for (Count count : Count.values()) { 218 if (pluralRanges.isExplicitlySet(count) 219 && !counts.contains(count)) { 220 assertTrue( 221 locale 222 + "\t pluralRanges categories must be valid for locale:\t" 223 + count + " must be in " + counts, 224 !pluralRanges.isExplicitlySet(count)); 225 } 226 for (Count end : Count.values()) { 227 Count result = pluralRanges.getExplicit(count, end); 228 if (result != null) { 229 found.add(result); 230 } 231 } 232 } 233 234 // check empty range results 235 if (found.isEmpty()) { 236 errOrLog(CoverageIssue.error, "Empty range results for " + locale, "Cldrbug:7839", "Missing plural data for modern locales"); 237 } else { 238 if (samplePatterns == null) { 239 errOrLog(CoverageIssue.error, locale + "\tMissing sample patterns", "Cldrbug:7839", "Missing plural data for modern locales"); 240 } else { 241 for (Count result : found) { 242 String samplePattern = samplePatterns.get( 243 PluralRules.PluralType.CARDINAL, result); 244 if (samplePattern != null && !samplePattern.contains("{0}")) { 245 errln("Plural Ranges cannot have results that don't use {0} in samples: " 246 + locale 247 + ", " 248 + result 249 + "\t«" 250 + samplePattern + "»"); 251 } 252 } 253 } 254 if (isVerbose()) { 255 logln("Range results for " + locale + ":\t" + found); 256 } 257 } 258 259 // check for missing values 260 boolean failure = false; 261 StringBuilder failureCases = new StringBuilder(templateLine); 262 for (Count start : counts) { 263 for (Count end : counts) { 264 boolean needsValue = pluralInfo.rangeExists(start, end, 265 minSample, maxSample); 266 Count explicitValue = pluralRanges.getExplicit(start, end); 267 final String rangeLine = getRangeLine(start, end, explicitValue, maxSample, minSample, samplePatterns); 268 failureCases.append("\n" + locale + "\t" + rangeLine); 269 if (needsValue && explicitValue == null) { 270 errOrLog(CoverageIssue.error, locale + "\tNo explicit value for range: " 271 + rangeLine, 272 "Cldrbug:7839", "Missing plural data for modern locales"); 273 failure = true; 274 failureCases.append("\tError — need explicit result"); 275 } else if (!needsValue && explicitValue != null) { 276 errOrLog(CoverageIssue.error, locale + "\tDoesn't need explicit value, but has one: " 277 + PluralRanges.showRange(start, end, explicitValue), 278 "Cldrbug:7839", "Missing plural data for modern locales"); 279 failureCases.append("\tUnnecessary"); 280 failure = true; 281 } else { 282 failureCases.append("\tOK"); 283 } 284 } 285 } 286 if (failure) { 287 errOrLog(CoverageIssue.warn, failureCases.toString()); 288 } 289 } 290 } 291 getRangeLine(Count start, Count end, Count result, Output<FixedDecimal> maxSample, Output<FixedDecimal> minSample, PluralMinimalPairs samplePatterns)292 private String getRangeLine(Count start, Count end, Count result, 293 Output<FixedDecimal> maxSample, Output<FixedDecimal> minSample, 294 PluralMinimalPairs samplePatterns) { 295 final String range = minSample + "–" + maxSample; 296 String example = range; 297 if (samplePatterns != null) { 298 example = ""; 299 if (result != null) { 300 String pat = samplePatterns.get(PluralRules.PluralType.CARDINAL, result); 301 example += "«" + (pat == null ? "MISSING-PATTERN" : pat.replace("{0}", range)) + "»"; 302 } else { 303 for (Count c : new TreeSet<>(Arrays.asList(start, end, Count.other))) { 304 String pat = samplePatterns.get(PluralRules.PluralType.CARDINAL, c); 305 example += c + ":«" + (pat == null ? "MISSING-PATTERN" : pat.replace("{0}", range)) + "»" + "?\tOR "; 306 } 307 example += " …"; 308 } 309 } 310 return start + "\t" + end + "\t" + (result == null ? "?" : result.toString()) + "\t" + example; 311 } 312 getRangeLine(Count count, PluralRules pluralRules, String pattern)313 private String getRangeLine(Count count, PluralRules pluralRules, String pattern) { 314 String sample = "?"; 315 FixedDecimalSamples exampleList = pluralRules.getDecimalSamples(count.toString(), PluralRules.SampleType.INTEGER); 316 if (exampleList == null) { 317 exampleList = pluralRules.getDecimalSamples(count.toString(), PluralRules.SampleType.DECIMAL); 318 } 319 FixedDecimal sampleDecimal = PluralInfo.getNonZeroSampleIfPossible(exampleList); 320 sample = sampleDecimal.toString(); 321 322 String example = pattern == null ? "NO-SAMPLE!" : "«" + pattern.replace("{0}", sample) + "»"; 323 return count + "\t" + example; 324 } 325 TestPluralSamples()326 public void TestPluralSamples() { 327 String[][] test = { { "en", "ordinal", "1", "one" }, 328 { "en", "ordinal", "2", "two" }, 329 { "en", "ordinal", "3", "few" }, 330 { "en", "ordinal", "4", "other" }, 331 { "sl", "cardinal", "2", "two" }, }; 332 for (String[] row : test) { 333 checkPluralSamples(row); 334 } 335 } 336 TestPluralSamples2()337 public void TestPluralSamples2() { 338 PluralRulesFactory prf = PluralRulesFactory.getInstance(SUPPLEMENTAL); 339 for (String locale : prf.getLocales()) { 340 if (locale.equals("und")) { 341 continue; 342 } 343 if (locale.equals("pl")) { 344 int debug = 0; 345 } 346 final PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(locale); 347 for (PluralRules.PluralType type : PluralRules.PluralType.values()) { 348 PluralInfo rules = SUPPLEMENTAL.getPlurals( 349 SupplementalDataInfo.PluralType.fromStandardType(type), 350 locale.toString()); 351 if (rules.getCounts().size() == 1) { 352 continue; // don't require rules for unary cases 353 } 354 Multimap<String, Count> sampleToCount = TreeMultimap.create(); 355 356 for (Count count : rules.getCounts()) { 357 String sample = samplePatterns.get(type, count); 358 if (sample == null) { 359 errOrLog(CoverageIssue.error, locale + "\t" + type + " \tmissing samples for " + count, "cldrbug:7075", 360 "Missing ordinal minimal pairs"); 361 } else { 362 sampleToCount.put(sample, count); 363 PluralRules pRules = rules.getPluralRules(); 364 double unique = pRules.getUniqueKeywordValue(count 365 .toString()); 366 if (unique == PluralRules.NO_UNIQUE_VALUE 367 && !sample.contains("{0}")) { 368 errln("Missing {0} in sample: " + locale + ", " + type + ", " + count + " «" + sample + "»"); 369 } 370 } 371 } 372 for (Entry<String, Collection<Count>> entry : sampleToCount.asMap().entrySet()) { 373 if (entry.getValue().size() > 1) { 374 errln("Colliding minimal pair samples: " + locale + ", " + type + ", " + entry.getValue() + " «" + entry.getKey() + "»"); 375 } 376 } 377 } 378 } 379 } 380 TestCldrScriptCodes()381 public void TestCldrScriptCodes() { 382 Set<String> codes = SUPPLEMENTAL.getCLDRScriptCodes(); 383 384 Set<String> unicodeScripts = ScriptMetadata.getScripts(); 385 assertRelation("getCLDRScriptCodes contains Unicode Scripts", true, codes, CONTAINS_ALL, unicodeScripts); 386 387 ImmutableSet<String> allSpecials = ImmutableSet.of("Zinh", "Zmth", "Zsye", "Zsym", "Zxxx", "Zyyy", "Zzzz"); 388 assertRelation("getCLDRScriptCodes contains allSpecials", true, codes, CONTAINS_ALL, allSpecials); 389 390 ImmutableSet<String> allCompos = ImmutableSet.of("Hanb", "Hrkt", "Jamo", "Jpan", "Kore"); 391 assertRelation("getCLDRScriptCodes contains allCompos", true, codes, CONTAINS_ALL, allCompos); 392 393 Map<Status, Set<String>> scripts = Validity.getInstance().getStatusToCodes(LstrType.script); 394 for (Entry<Status, Set<String>> e : scripts.entrySet()) { 395 switch (e.getKey()) { 396 case regular: 397 case special: 398 case unknown: 399 assertRelation("getCLDRScriptCodes contains " + e.getKey(), true, codes, CONTAINS_ALL, e.getValue()); 400 break; 401 default: 402 break; // do nothin 403 } 404 } 405 406 ImmutableSet<String> variants = ImmutableSet.of("Cyrs", "Geok", "Latf", "Latg", "Syre", "Syrj", "Syrn"); 407 assertRelation("getCLDRScriptCodes contains variants", false, codes, CONTAINS_SOME, variants); 408 } 409 checkPluralSamples(String... row)410 public void checkPluralSamples(String... row) { 411 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals( 412 PluralType.valueOf(row[1]), row[0]); 413 Count count = pluralInfo.getCount(new FixedDecimal(row[2])); 414 assertEquals(String.join(", ", row), 415 Count.valueOf(row[3]), count); 416 } 417 TestPluralLocales()418 public void TestPluralLocales() { 419 // get the unique rules 420 for (PluralType type : PluralType.values()) { 421 Relation<PluralInfo, String> pluralsToLocale = Relation.of( 422 new HashMap<PluralInfo, Set<String>>(), TreeSet.class); 423 for (String locale : new TreeSet<>( 424 SUPPLEMENTAL.getPluralLocales(type))) { 425 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(type, locale); 426 pluralsToLocale.put(pluralInfo, locale); 427 } 428 429 String[][] equivalents = { { "mo", "ro" }, { "tl", "fil" }, 430 { "he", "iw" }, { "in", "id" }, { "jw", "jv" }, 431 { "ji", "yi" }, { "sh", "sr" }, }; 432 for (Entry<PluralInfo, Set<String>> pluralInfoEntry : pluralsToLocale 433 .keyValuesSet()) { 434 PluralInfo pluralInfo2 = pluralInfoEntry.getKey(); 435 Set<String> locales = pluralInfoEntry.getValue(); 436 // check that equivalent locales are either both in or both out 437 for (String[] row : equivalents) { 438 assertEquals( 439 type + " must be equivalent: " + Arrays.asList(row), 440 locales.contains(row[0]), locales.contains(row[1])); 441 } 442 // check that no rules contain 'within' 443 for (Count count : pluralInfo2.getCounts()) { 444 String rule = pluralInfo2.getRule(count); 445 if (rule == null) { 446 continue; 447 } 448 assertFalse( 449 "Rule '" + rule + "' for " + Arrays.asList(locales) 450 + " doesn't contain 'within'", 451 rule.contains("within")); 452 } 453 } 454 } 455 } 456 TestDigitPluralCases()457 public void TestDigitPluralCases() { 458 String[][] tests = { 459 { "en", "one", "1", "1" }, 460 { "en", "one", "2", "" }, 461 { "en", "one", "3", "" }, 462 { "en", "one", "4", "" }, 463 { "en", "other", "1", "0, 2-9, 0.0, 0.1, 0.2, …" }, 464 { "en", "other", "2", "10-99, 10.0, 10.1, 10.2, …" }, 465 { "en", "other", "3", "100-999, 100.0, 100.1, 100.2, …" }, 466 { "en", "other", "4", "1000-9999, 1000.0, 1000.1, 1000.2, …" }, 467 { "hr", "one", "1", "1, 0.1, 2.10, 1.1, …" }, 468 { "hr", "one", "2", 469 "21, 31, 41, 51, 61, 71, …, 10.1, 12.10, 11.1, …" }, 470 { "hr", "one", "3", 471 "101, 121, 131, 141, 151, 161, …, 100.1, 102.10, 101.1, …" }, 472 { "hr", "one", "4", 473 "1001, 1021, 1031, 1041, 1051, 1061, …, 1000.1, 1002.10, 1001.1, …" }, 474 { "hr", "few", "1", "2-4, 0.2, 0.3, 0.4, …" }, 475 { "hr", "few", "2", 476 "22-24, 32-34, 42-44, …, 10.2, 10.3, 10.4, …" }, 477 { "hr", "few", "3", 478 "102-104, 122-124, 132-134, …, 100.2, 100.3, 100.4, …" }, 479 { "hr", "few", "4", 480 "1002-1004, 1022-1024, 1032-1034, …, 1000.2, 1000.3, 1000.4, …" }, 481 { "hr", "other", "1", "0, 5-9, 0.0, 0.5, 0.6, …" }, 482 { "hr", "other", "2", 483 "10-20, 25-30, 35-40, …, 10.0, 10.5, 10.6, …" }, 484 { "hr", "other", "3", 485 "100, 105-120, 125-130, 135-140, …, 100.0, 100.5, 100.6, …" }, 486 { "hr", "other", "4", 487 "1000, 1005-1020, 1025-1030, 1035-1040, …, 1000.0, 1000.5, 1000.6, …" }, }; 488 for (String[] row : tests) { 489 PluralInfo plurals = SUPPLEMENTAL.getPlurals(row[0]); 490 SampleList uset = plurals.getSamples9999(Count.valueOf(row[1]), 491 Integer.parseInt(row[2])); 492 assertEquals(row[0] + ", " + row[1] + ", " + row[2], row[3], 493 uset.toString()); 494 } 495 } 496 TestDigitPluralCompleteness()497 public void TestDigitPluralCompleteness() { 498 String[][] exceptionStrings = { 499 // defaults 500 { "*", "zero", "0,00,000,0000" }, 501 { "*", "one", "0" }, 502 { "*", "two", "0,00,000,0000" }, 503 { "*", "few", "0,00,000,0000" }, 504 { "*", "many", "0,00,000,0000" }, 505 { "*", "other", "0,00,000,0000" }, 506 // others 507 { "mo", "other", "00,000,0000" }, // 508 { "ro", "other", "00,000,0000" }, // 509 { "cs", "few", "0" }, // j in 2..4 510 { "sk", "few", "0" }, // j in 2..4 511 { "da", "one", "0" }, // j is 1 or t is not 0 and n within 0..2 512 { "is", "one", "0,00,000,0000" }, // j is 1 or f is 1 513 { "sv", "one", "0" }, // j is 1 514 { "he", "two", "0" }, // j is 2 515 { "ru", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100 516 // is not 11 517 { "uk", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100 518 // is not 11 519 { "bs", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100 520 // is not 11 or f mod 10 is 521 // 1 and f mod 100 is not 11 522 { "hr", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100 523 // is not 11 or f mod 10 is 524 // 1 and f mod 100 is not 11 525 { "sh", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100 526 // is not 11 or f mod 10 is 527 // 1 and f mod 100 is not 11 528 { "sr", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100 529 // is not 11 or f mod 10 is 530 // 1 and f mod 100 is not 11 531 { "mk", "one", "0,00,000,0000" }, // j mod 10 is 1 or f mod 10 532 // is 1 533 { "sl", "one", "0,000,0000" }, // j mod 100 is 1 534 { "sl", "two", "0,000,0000" }, // j mod 100 is 2 535 { "he", "many", "00,000,0000" }, // j not in 0..10 and j mod 10 536 // is 0 537 { "tzm", "one", "0,00" }, // n in 0..1 or n in 11..99 538 { "gd", "one", "0,00" }, // n in 1,11 539 { "gd", "two", "0,00" }, // n in 2,12 540 { "shi", "few", "0,00" }, // n in 2..10 541 { "gd", "few", "0,00" }, // n in 3..10,13..19 542 { "ga", "few", "0" }, // n in 3..6 543 { "ga", "many", "0,00" }, // n in 7..10 544 { "ar", "zero", "0" }, // n is 0 545 { "cy", "zero", "0" }, // n is 0 546 { "ksh", "zero", "0" }, // n is 0 547 { "lag", "zero", "0" }, // n is 0 548 { "pt", "one", "0" }, // i = 1 and v = 0 or i = 0 and t = 1 549 { "pt_PT", "one", "0" }, // n = 1 and v = 0 550 { "ar", "two", "0" }, // n is 2 551 { "cy", "two", "0" }, // n is 2 552 { "ga", "two", "0" }, // n is 2 553 { "iu", "two", "0" }, // n is 2 554 { "naq", "two", "0" }, // n is 2 555 { "se", "two", "0" }, // n is 2 556 { "sma", "two", "0" }, // n is 2 557 { "smi", "two", "0" }, // n is 2 558 { "smj", "two", "0" }, // n is 2 559 { "smn", "two", "0" }, // n is 2 560 { "sms", "two", "0" }, // n is 2 561 { "cy", "few", "0" }, // n is 3 562 { "cy", "many", "0" }, // n is 6 563 { "br", "many", "" }, // n is not 0 and n mod 1000000 is 0 564 { "gv", "one", "0,00,000,0000" }, // n mod 10 is 1 565 { "be", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100 566 // is not 11 567 { "lv", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100 568 // is not 11 or v is 2 and f 569 // mod 10 is 1 and f mod 100 570 // is not 11 or v is not 2 571 // and f mod 10 is 1 572 { "br", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100 573 // not in 11,71,91 574 { "lt", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100 575 // not in 11..19 576 { "fil", "one", "0,00,000,0000" }, // v = 0 and i = 1,2,3 or v = 577 // 0 and i % 10 != 4,6,9 or 578 // v != 0 and f % 10 != 579 // 4,6,9 580 { "tl", "one", "0,00,000,0000" }, // v = 0 and i = 1,2,3 or v = 581 // 0 and i % 10 != 4,6,9 or 582 // v != 0 and f % 10 != 583 // 4,6,9 584 { "dsb", "one", "0,00,000,0000" }, // v = 0 and i % 100 = 1 or f 585 // % 100 = 1 586 {"kw", "many", "00,000,0000"}, // n != 1 and n % 100 = 1,21,41,61,81 587 {"kw", "zero", "0"}, // n = 0 588 {"fr", "many", ""}, // e is special 589 {"it", "many", ""}, // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5 590 {"es", "many", ""}, // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5 591 {"pt", "many", ""}, // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5 592 {"pt_PT", "many", ""}, // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5 593 }; 594 // parse out the exceptions 595 Map<PluralInfo, Relation<Count, Integer>> exceptions = new HashMap<>(); 596 Relation<Count, Integer> fallback = Relation.of( 597 new EnumMap<Count, Set<Integer>>(Count.class), TreeSet.class); 598 for (String[] row : exceptionStrings) { 599 Relation<Count, Integer> countToDigits; 600 if (row[0].equals("*")) { 601 countToDigits = fallback; 602 } else { 603 PluralInfo plurals = SUPPLEMENTAL.getPlurals(row[0]); 604 countToDigits = exceptions.get(plurals); 605 if (countToDigits == null) { 606 exceptions.put( 607 plurals, 608 countToDigits = Relation.of( 609 new EnumMap<Count, Set<Integer>>( 610 Count.class), 611 TreeSet.class)); 612 } 613 } 614 Count c = Count.valueOf(row[1]); 615 for (String digit : row[2].split(",")) { 616 // "99" is special, just to have the result be non-empty 617 countToDigits.put(c, digit.length()); 618 } 619 } 620 Set<PluralInfo> seen = new HashSet<>(); 621 Set<String> sorted = new TreeSet<>( 622 SUPPLEMENTAL.getPluralLocales(PluralType.cardinal)); 623 Relation<String, String> ruleToExceptions = Relation.of( 624 new TreeMap<String, Set<String>>(), TreeSet.class); 625 626 for (String locale : sorted) { 627 PluralInfo plurals = SUPPLEMENTAL.getPlurals(locale); 628 if (seen.contains(plurals)) { // skip identicals 629 continue; 630 } 631 Relation<Count, Integer> countToDigits = exceptions.get(plurals); 632 if (countToDigits == null) { 633 countToDigits = fallback; 634 } 635 for (Count c : plurals.getCounts()) { 636 List<String> compose = new ArrayList<>(); 637 boolean needLine = false; 638 Set<Integer> digitSet = countToDigits.get(c); 639 if (digitSet == null) { 640 digitSet = fallback.get(c); 641 } 642 for (int digits = 1; digits < 5; ++digits) { 643 boolean expected = digitSet.contains(digits); 644 boolean hasSamples = plurals.hasSamples(c, digits); 645 if (hasSamples) { 646 compose.add(Utility.repeat("0", digits)); 647 } 648 if (!assertEquals(locale + ", " + digits + ", " + c, 649 expected, hasSamples)) { 650 needLine = true; 651 } 652 } 653 if (needLine) { 654 String countRules = plurals.getPluralRules().getRules( 655 c.toString()); 656 ruleToExceptions.put(countRules == null ? "" : countRules, 657 "{\"" + locale + "\", \"" + c + "\", \"" 658 + Joiner.on(",").join(compose) 659 + "\"},"); 660 } 661 } 662 } 663 if (!ruleToExceptions.isEmpty()) { 664 System.out 665 .println("To fix the above, review the following, then replace in TestDigitPluralCompleteness"); 666 for (Entry<String, String> entry : ruleToExceptions.entrySet()) { 667 System.out.println(entry.getValue() + "\t// " + entry.getKey()); 668 } 669 } 670 } 671 TestLikelyCode()672 public void TestLikelyCode() { 673 Map<String, String> likely = SUPPLEMENTAL.getLikelySubtags(); 674 String[][] tests = { { "it_AQ", "it_Latn_AQ" }, 675 { "it_Arab", "it_Arab_IT" }, { "az_Cyrl", "az_Cyrl_AZ" }, }; 676 for (String[] pair : tests) { 677 String newMax = LikelySubtags.maximize(pair[0], likely); 678 assertEquals("Likely", pair[1], newMax); 679 } 680 681 } 682 TestLikelySubtagCompleteness()683 public void TestLikelySubtagCompleteness() { 684 Map<String, String> likely = SUPPLEMENTAL.getLikelySubtags(); 685 686 for (String language : SUPPLEMENTAL.getCLDRLanguageCodes()) { 687 if (!likely.containsKey(language)) { 688 logln("WARNING: No likely subtag for CLDR language code (" 689 + language + ")"); 690 } 691 } 692 for (String script : SUPPLEMENTAL.getCLDRScriptCodes()) { 693 if (!likely.containsKey("und_" + script) 694 && !script.equals("Latn") 695 && !script.equals("Zinh") 696 && !script.equals("Zyyy") 697 && ScriptMetadata.getInfo(script) != null 698 && ScriptMetadata.getInfo(script).idUsage != ScriptMetadata.IdUsage.EXCLUSION 699 && ScriptMetadata.getInfo(script).idUsage != ScriptMetadata.IdUsage.UNKNOWN) { 700 errln("No likely subtag for CLDR script code (und_" + script 701 + ")"); 702 } 703 } 704 705 } 706 TestEquivalentLocales()707 public void TestEquivalentLocales() { 708 Set<Set<String>> seen = new HashSet<>(); 709 Set<String> toTest = new TreeSet<>(testInfo.getCldrFactory() 710 .getAvailable()); 711 toTest.addAll(SUPPLEMENTAL.getLikelySubtags().keySet()); 712 toTest.addAll(SUPPLEMENTAL.getLikelySubtags().values()); 713 toTest.addAll(SUPPLEMENTAL.getDefaultContentLocales()); 714 LanguageTagParser ltp = new LanguageTagParser(); 715 main: for (String locale : toTest) { 716 if (locale.startsWith("und") || locale.equals("root")) { 717 continue; 718 } 719 Set<String> s = SUPPLEMENTAL.getEquivalentsForLocale(locale); 720 if (seen.contains(s)) { 721 continue; 722 } 723 724 List<String> ss = new ArrayList<>(s); 725 String last = ss.get(ss.size() - 1); 726 ltp.set(last); 727 if (!ltp.getVariants().isEmpty() || !ltp.getExtensions().isEmpty()) { 728 continue; // skip variants for now. 729 } 730 String language = ltp.getLanguage(); 731 String script = ltp.getScript(); 732 String region = ltp.getRegion(); 733 if (!script.isEmpty() && !region.isEmpty()) { 734 String noScript = ltp.setScript("").toString(); 735 String noRegion = ltp.setScript(script).setRegion("") 736 .toString(); 737 switch (s.size()) { 738 case 1: // ok if already maximized and strange script/country, 739 // eg it_Arab_JA 740 continue main; 741 case 2: // ok if adds default country/script, eg {en_Cyrl, 742 // en_Cyrl_US} or {en_GB, en_Latn_GB} 743 String first = ss.get(0); 744 if (first.equals(noScript) || first.equals(noRegion)) { 745 continue main; 746 } 747 break; 748 case 3: // ok if different script in different country, eg 749 // {az_IR, az_Arab, az_Arab_IR} 750 if (noScript.equals(ss.get(0)) 751 && noRegion.equals(ss.get(1))) { 752 continue main; 753 } 754 break; 755 case 4: // ok if all combinations, eg {en, en_US, en_Latn, 756 // en_Latn_US} 757 if (language.equals(ss.get(0)) 758 && noScript.equals(ss.get(1)) 759 && noRegion.equals(ss.get(2))) { 760 continue main; 761 } 762 break; 763 } 764 } 765 errln("Strange size or composition:\t" + s + " \t" 766 + showLocaleParts(s)); 767 seen.add(s); 768 } 769 } 770 showLocaleParts(Set<String> s)771 private String showLocaleParts(Set<String> s) { 772 LanguageTagParser ltp = new LanguageTagParser(); 773 Set<String> b = new LinkedHashSet<>(); 774 for (String ss : s) { 775 ltp.set(ss); 776 addName(CLDRFile.LANGUAGE_NAME, ltp.getLanguage(), b); 777 addName(CLDRFile.SCRIPT_NAME, ltp.getScript(), b); 778 addName(CLDRFile.TERRITORY_NAME, ltp.getRegion(), b); 779 } 780 return Joiner.on("; ").join(b); 781 } 782 addName(int languageName, String code, Set<String> b)783 private void addName(int languageName, String code, Set<String> b) { 784 if (code.isEmpty()) { 785 return; 786 } 787 String name = testInfo.getEnglish().getName(languageName, code); 788 if (!code.equals(name)) { 789 b.add(code + "=" + name); 790 } 791 } 792 TestDefaultScriptCompleteness()793 public void TestDefaultScriptCompleteness() { 794 Relation<String, String> scriptToBase = Relation.of( 795 new LinkedHashMap<String, Set<String>>(), TreeSet.class); 796 main: for (String locale : testInfo.getCldrFactory() 797 .getAvailableLanguages()) { 798 if (!locale.contains("_") && !"root".equals(locale)) { 799 String defaultScript = SUPPLEMENTAL.getDefaultScript(locale); 800 if (defaultScript != null) { 801 continue; 802 } 803 CLDRFile cldrFile = testInfo.getCLDRFile(locale, 804 false); 805 UnicodeSet set = cldrFile.getExemplarSet("", 806 WinningChoice.NORMAL); 807 for (String s : set) { 808 int script = UScript.getScript(s.codePointAt(0)); 809 if (script != UScript.UNKNOWN && script != UScript.COMMON 810 && script != UScript.INHERITED) { 811 scriptToBase.put(UScript.getShortName(script), locale); 812 continue main; 813 } 814 } 815 scriptToBase.put(UScript.getShortName(UScript.UNKNOWN), locale); 816 } 817 } 818 if (scriptToBase.size() != 0) { 819 for (Entry<String, Set<String>> entry : scriptToBase.keyValuesSet()) { 820 errln("Default Scripts missing:\t" + entry.getKey() + "\t" 821 + entry.getValue()); 822 } 823 } 824 } 825 TestTimeData()826 public void TestTimeData() { 827 Map<String, PreferredAndAllowedHour> timeData = SUPPLEMENTAL 828 .getTimeData(); 829 Set<String> regionsSoFar = new HashSet<>(); 830 Set<String> current24only = new HashSet<>(); 831 Set<String> current12preferred = new HashSet<>(); 832 833 boolean haveWorld = false; 834 835 ImmutableSet<HourStyle> oldSchool = ImmutableSet.copyOf(EnumSet.of(HourStyle.H, HourStyle.h, HourStyle.K, HourStyle.k)); 836 837 for (Entry<String, PreferredAndAllowedHour> e : timeData.entrySet()) { 838 String region = e.getKey(); 839 if (region.equals("001")) { 840 haveWorld = true; 841 } 842 regionsSoFar.add(region); 843 PreferredAndAllowedHour preferredAndAllowedHour = e.getValue(); 844 assertNotNull("Preferred must not be null", preferredAndAllowedHour.preferred); 845 846 // find first h or H 847 HourStyle found = null; 848 849 for (HourStyle item : preferredAndAllowedHour.allowed) { 850 if (oldSchool.contains(item)) { 851 found = item; 852 if (item != preferredAndAllowedHour.preferred) { 853 String message = "Inconsistent values for " + region + ": preferred=" + preferredAndAllowedHour.preferred 854 + " but that isn't the first " + oldSchool + " in allowed: " + preferredAndAllowedHour.allowed; 855 //if (!logKnownIssue("cldrbug:11448", message)) { 856 errln(message); 857 //} 858 } 859 break; 860 } 861 } 862 if (found == null) { 863 errln(region + ": preferred " + preferredAndAllowedHour.preferred 864 + " not in " + preferredAndAllowedHour.allowed); 865 } 866 // final HourStyle firstAllowed = preferredAndAllowedHour.allowed.iterator().next(); 867 // if (preferredAndAllowedHour.preferred == HourStyle.H && firstAllowed == HourStyle.h 868 // || preferredAndAllowedHour.preferred == HourStyle.H && firstAllowed == HourStyle.hb 869 // || preferredAndAllowedHour.preferred == HourStyle.h && firstAllowed == HourStyle.H) { 870 // errln(region + ": allowed " + preferredAndAllowedHour.allowed 871 // + " starts with preferred " + preferredAndAllowedHour.preferred); 872 // } else if (isVerbose()) { 873 // logln(region + ": allowed " + preferredAndAllowedHour.allowed 874 // + " starts with preferred " + preferredAndAllowedHour.preferred); 875 // } 876 // for (HourStyle c : preferredAndAllowedHour.allowed) { 877 // if (!PreferredAndAllowedHour.HOURS.contains(c)) { 878 // errln(region + ": illegal character in " + 879 // preferredAndAllowedHour.allowed + ". It contains " + c 880 // + " which is not in " + PreferredAndAllowedHour.HOURS); 881 // } 882 // } 883 if (!preferredAndAllowedHour.allowed.contains(HourStyle.h) 884 && !preferredAndAllowedHour.allowed.contains(HourStyle.hb)) { 885 current24only.add(region); 886 } 887 if (preferredAndAllowedHour.preferred == HourStyle.h) { 888 current12preferred.add(region); 889 } 890 } 891 Set<String> missing = new TreeSet<>( 892 STANDARD_CODES.getGoodAvailableCodes(CodeType.territory)); 893 missing.removeAll(regionsSoFar); 894 for (Iterator<String> it = missing.iterator(); it.hasNext();) { 895 if (!StandardCodes.isCountry(it.next())) { 896 it.remove(); 897 } 898 } 899 900 // if we don't have 001, then we can't miss any regions 901 if (!missing.isEmpty()) { 902 if (haveWorld) { 903 logln("Implicit regions: " + missing); 904 } else { 905 errln("Missing regions: " + missing); 906 } 907 } 908 909 // The feedback gathered from our translators is that the following use 910 // 24 hour time ONLY: 911 Set<String> only24lang = new TreeSet<>( 912 Arrays.asList(("sq, br, bu, ca, hr, cs, da, de, nl, et, eu, fi, " 913 + "fr, gl, he, is, id, it, no, pt, ro, ru, sr, sk, sl, sv, tr, hy") 914 .split(",\\s*"))); 915 // With the new preferences, this is changed 916 Set<String> only24region = new TreeSet<>(); 917 Set<String> either24or12region = new TreeSet<>(); 918 919 // get all countries where official or de-facto official 920 // add them two one of two lists, based on the above list of languages 921 for (String language : SUPPLEMENTAL 922 .getLanguagesForTerritoriesPopulationData()) { 923 boolean a24lang = only24lang.contains(language); 924 for (String region : SUPPLEMENTAL 925 .getTerritoriesForPopulationData(language)) { 926 PopulationData pop = SUPPLEMENTAL 927 .getLanguageAndTerritoryPopulationData(language, region); 928 if (pop.getOfficialStatus().compareTo( 929 OfficialStatus.de_facto_official) < 0) { 930 continue; 931 } 932 if (a24lang) { 933 only24region.add(region); 934 } else { 935 either24or12region.add(region); 936 } 937 } 938 } 939 // if we have a case like CA, where en uses 12/24 but fr uses 24, remove 940 // it for safety 941 only24region.removeAll(either24or12region); 942 // There are always exceptions... Remove SM (San Marino) and VA (Vatican), 943 // since they allows 12/24 but the de facto langauge is Italian. 944 only24region.remove("SM"); 945 only24region.remove("VA"); 946 // also remove all the regions where 'h' is preferred 947 only24region.removeAll(current12preferred); 948 // now verify 949 if (!current24only.containsAll(only24region)) { 950 Set<String> missing24only = new TreeSet<>(only24region); 951 missing24only.removeAll(current24only); 952 953 errln("24-hour-only doesn't include needed items:\n" 954 + " add " 955 + CldrUtility.join(missing24only, " ") 956 + "\n\t\t" 957 + CldrUtility.join(missing24only, "\n\t\t", 958 new NameCodeTransform(testInfo.getEnglish(), 959 CLDRFile.TERRITORY_NAME))); 960 } 961 } 962 963 public static class NameCodeTransform implements StringTransform { 964 private final CLDRFile file; 965 private final int codeType; 966 NameCodeTransform(CLDRFile file, int code)967 public NameCodeTransform(CLDRFile file, int code) { 968 this.file = file; 969 this.codeType = code; 970 } 971 972 @Override transform(String code)973 public String transform(String code) { 974 return file.getName(codeType, code) + " [" + code + "]"; 975 } 976 } 977 TestAliases()978 public void TestAliases() { 979 StandardCodes.make(); 980 Map<String, Map<String, Map<String, String>>> bcp47Data = StandardCodes 981 .getLStreg(); 982 Map<String, Map<String, R2<List<String>, String>>> aliases = SUPPLEMENTAL 983 .getLocaleAliasInfo(); 984 985 for (Entry<String, Map<String, R2<List<String>, String>>> typeMap : aliases 986 .entrySet()) { 987 String type = typeMap.getKey(); 988 Map<String, R2<List<String>, String>> codeReplacement = typeMap 989 .getValue(); 990 991 Map<String, Map<String, String>> bcp47DataTypeData = bcp47Data 992 .get(type.equals("territory") ? "region" : type); 993 if (bcp47DataTypeData == null) { 994 logln("skipping BCP47 test for " + type); 995 } else { 996 for (Entry<String, Map<String, String>> codeData : bcp47DataTypeData 997 .entrySet()) { 998 String code = codeData.getKey(); 999 if (codeReplacement.containsKey(code) 1000 || codeReplacement.containsKey(code 1001 .toUpperCase(Locale.ENGLISH))) { 1002 continue; 1003 // TODO, check the value 1004 } 1005 Map<String, String> data = codeData.getValue(); 1006 if (data.containsKey("Deprecated") 1007 && SUPPLEMENTAL.getCLDRLanguageCodes().contains( 1008 code)) { 1009 errln("supplementalMetadata.xml: alias is missing <languageAlias type=\"" 1010 + code + "\" ... /> " + "\t" + data); 1011 } 1012 } 1013 } 1014 1015 Set<R3<String, List<String>, List<String>>> failures = new LinkedHashSet<>(); 1016 Set<String> nullReplacements = new TreeSet<>(); 1017 for (Entry<String, R2<List<String>, String>> codeRep : codeReplacement 1018 .entrySet()) { 1019 String code = codeRep.getKey(); 1020 List<String> replacements = codeRep.getValue().get0(); 1021 if (replacements == null) { 1022 nullReplacements.add(code); 1023 continue; 1024 } 1025 Set<String> fixedReplacements = new LinkedHashSet<>(); 1026 for (String replacement : replacements) { 1027 R2<List<String>, String> newReplacement = codeReplacement 1028 .get(replacement); 1029 if (newReplacement != null) { 1030 List<String> list = newReplacement.get0(); 1031 if (list != null) { 1032 fixedReplacements.addAll(list); 1033 } 1034 } else { 1035 fixedReplacements.add(replacement); 1036 } 1037 } 1038 List<String> fixedList = new ArrayList<>( 1039 fixedReplacements); 1040 if (!replacements.equals(fixedList)) { 1041 R3<String, List<String>, List<String>> row = Row.of(code, 1042 replacements, fixedList); 1043 System.out.println(row.toString()); 1044 failures.add(row); 1045 } 1046 } 1047 1048 if (failures.size() != 0) { 1049 for (R3<String, List<String>, List<String>> item : failures) { 1050 String code = item.get0(); 1051 List<String> oldReplacement = item.get1(); 1052 List<String> newReplacement = item.get2(); 1053 1054 errln(code + "\t=>\t" + oldReplacement + "\tshould be:\n\t" 1055 + "<" + type + "Alias type=\"" + code 1056 + "\" replacement=\"" 1057 + Joiner.on(" ").join(newReplacement) 1058 + "\" reason=\"XXX\"/> <!-- YYY -->\n"); 1059 } 1060 } 1061 if (nullReplacements.size() != 0) { 1062 logln("No Replacements\t" + type + "\t" + nullReplacements); 1063 } 1064 } 1065 } 1066 1067 static final List<String> oldRegions = Arrays 1068 .asList("NT, YD, QU, SU, DD, FX, ZR, AN, BU, TP, CS, YU" 1069 .split(", ")); 1070 TestTerritoryContainment()1071 public void TestTerritoryContainment() { 1072 Relation<String, String> map = SUPPLEMENTAL 1073 .getTerritoryToContained(ContainmentStyle.all); 1074 Relation<String, String> mapCore = SUPPLEMENTAL.getContainmentCore(); 1075 Set<String> mapItems = new LinkedHashSet<>(); 1076 // get all the items 1077 for (String item : map.keySet()) { 1078 mapItems.add(item); 1079 mapItems.addAll(map.getAll(item)); 1080 } 1081 Map<String, Map<String, String>> bcp47RegionData = StandardCodes 1082 .getLStreg().get("region"); 1083 1084 // verify that all regions are covered 1085 Set<String> bcp47Regions = new LinkedHashSet<>( 1086 bcp47RegionData.keySet()); 1087 bcp47Regions.remove("ZZ"); // We don't care about ZZ since it is the 1088 // unknown region... 1089 for (Iterator<String> it = bcp47Regions.iterator(); it.hasNext();) { 1090 String region = it.next(); 1091 Map<String, String> data = bcp47RegionData.get(region); 1092 if (data.containsKey("Deprecated")) { 1093 logln("Removing deprecated " + region); 1094 it.remove(); 1095 } 1096 if ("Private use".equals(data.get("Description"))) { 1097 it.remove(); 1098 } 1099 } 1100 1101 if (!mapItems.equals(bcp47Regions)) { 1102 mapItems.removeAll(oldRegions); 1103 errlnDiff("containment items not in bcp47 regions: ", mapItems, 1104 bcp47Regions); 1105 errlnDiff("bcp47 regions not in containment items: ", bcp47Regions, 1106 mapItems); 1107 } 1108 1109 // verify that everything in the containment core can be reached 1110 // downwards from 001. 1111 1112 Map<String, Integer> from001 = getRecursiveContainment("001", map, 1113 new LinkedHashMap<String, Integer>(), 1); 1114 from001.put("001", 0); 1115 Set<String> keySet = from001.keySet(); 1116 for (String region : keySet) { 1117 logln(Utility.repeat("\t", from001.get(region)) + "\t" + region 1118 + "\t" + getRegionName(region)); 1119 } 1120 1121 // Populate mapItems with the core containment 1122 mapItems.clear(); 1123 for (String item : mapCore.keySet()) { 1124 mapItems.add(item); 1125 mapItems.addAll(mapCore.getAll(item)); 1126 } 1127 1128 if (!mapItems.equals(keySet)) { 1129 errlnDiff( 1130 "containment core items that can't be reached from 001: ", 1131 mapItems, keySet); 1132 } 1133 } 1134 errlnDiff(String title, Set<String> mapItems, Set<String> keySet)1135 private void errlnDiff(String title, Set<String> mapItems, 1136 Set<String> keySet) { 1137 Set<String> diff = new LinkedHashSet<>(mapItems); 1138 diff.removeAll(keySet); 1139 if (diff.size() != 0) { 1140 errln(title + diff); 1141 } 1142 } 1143 getRegionName(String region)1144 private String getRegionName(String region) { 1145 return testInfo.getEnglish().getName(CLDRFile.TERRITORY_NAME, region); 1146 } 1147 getRecursiveContainment(String region, Relation<String, String> map, Map<String, Integer> result, int depth)1148 private Map<String, Integer> getRecursiveContainment(String region, 1149 Relation<String, String> map, Map<String, Integer> result, int depth) { 1150 Set<String> contained = map.getAll(region); 1151 if (contained == null) { 1152 return result; 1153 } 1154 for (String item : contained) { 1155 if (result.containsKey(item)) { 1156 logln("Duplicate containment " + item + "\t" 1157 + getRegionName(item)); 1158 continue; 1159 } 1160 result.put(item, depth); 1161 getRecursiveContainment(item, map, result, depth + 1); 1162 } 1163 return result; 1164 } 1165 TestMacrolanguages()1166 public void TestMacrolanguages() { 1167 Set<String> languageCodes = STANDARD_CODES 1168 .getAvailableCodes("language"); 1169 Map<String, Map<String, R2<List<String>, String>>> typeToTagToReplacement = SUPPLEMENTAL 1170 .getLocaleAliasInfo(); 1171 Map<String, R2<List<String>, String>> tagToReplacement = typeToTagToReplacement 1172 .get("language"); 1173 1174 Relation<String, String> replacementToReplaced = Relation.of( 1175 new TreeMap<String, Set<String>>(), TreeSet.class); 1176 for (String language : tagToReplacement.keySet()) { 1177 List<String> replacements = tagToReplacement.get(language).get0(); 1178 if (replacements != null) { 1179 replacementToReplaced.putAll(replacements, language); 1180 } 1181 } 1182 replacementToReplaced.freeze(); 1183 1184 Map<String, Map<String, Map<String, String>>> lstreg = StandardCodes 1185 .getLStreg(); 1186 Map<String, Map<String, String>> lstregLanguageInfo = lstreg 1187 .get("language"); 1188 1189 Relation<Scope, String> scopeToCodes = Relation.of( 1190 new TreeMap<Scope, Set<String>>(), TreeSet.class); 1191 // the invariant is that every macrolanguage has exactly 1 encompassed 1192 // language that maps to it 1193 1194 main: for (String language : Builder.with(new TreeSet<String>()) 1195 .addAll(languageCodes).addAll(Iso639Data.getAvailable()).get()) { 1196 if (language.equals("no") || language.equals("sh")) 1197 continue; // special cases 1198 Scope languageScope = getScope(language, lstregLanguageInfo); 1199 if (languageScope == Scope.Macrolanguage) { 1200 if (Iso639Data.getHeirarchy(language) != null) { 1201 continue main; // is real family 1202 } 1203 Set<String> replacements = replacementToReplaced 1204 .getAll(language); 1205 if (replacements == null || replacements.size() == 0) { 1206 scopeToCodes.put(languageScope, language); 1207 } else { 1208 // it still might be bad, if we don't have a mapping to a 1209 // regular language 1210 for (String replacement : replacements) { 1211 Scope replacementScope = getScope(replacement, 1212 lstregLanguageInfo); 1213 if (replacementScope == Scope.Individual) { 1214 continue main; 1215 } 1216 } 1217 scopeToCodes.put(languageScope, language); 1218 } 1219 } 1220 } 1221 // now show the items we found 1222 for (Scope scope : scopeToCodes.keySet()) { 1223 for (String language : scopeToCodes.getAll(scope)) { 1224 String name = testInfo.getEnglish().getName(language); 1225 if (name == null || name.equals(language)) { 1226 Set<String> set = Iso639Data.getNames(language); 1227 if (set != null) { 1228 name = set.iterator().next(); 1229 } else { 1230 Map<String, String> languageInfo = lstregLanguageInfo 1231 .get(language); 1232 if (languageInfo != null) { 1233 name = languageInfo.get("Description"); 1234 } 1235 } 1236 } 1237 errln(scope + "\t" + language + "\t" + name + "\t" 1238 + Iso639Data.getType(language)); 1239 } 1240 } 1241 } 1242 getScope(String language, Map<String, Map<String, String>> lstregLanguageInfo)1243 private Scope getScope(String language, 1244 Map<String, Map<String, String>> lstregLanguageInfo) { 1245 Scope languageScope = Iso639Data.getScope(language); 1246 Map<String, String> languageInfo = lstregLanguageInfo.get(language); 1247 if (languageInfo == null) { 1248 // System.out.println("Couldn't get lstreg info for " + language); 1249 } else { 1250 String lstregScope = languageInfo.get("Scope"); 1251 if (lstregScope != null) { 1252 Scope scope2 = Scope.fromString(lstregScope); 1253 if (languageScope != scope2) { 1254 // System.out.println("Mismatch in scope between LSTR and ISO 639:\t" 1255 // + scope2 + "\t" + 1256 // languageScope); 1257 languageScope = scope2; 1258 } 1259 } 1260 } 1261 return languageScope; 1262 } 1263 1264 static final boolean LOCALES_FIXED = true; 1265 TestPopulation()1266 public void TestPopulation() { 1267 Set<String> languages = SUPPLEMENTAL 1268 .getLanguagesForTerritoriesPopulationData(); 1269 Relation<String, String> baseToLanguages = Relation.of( 1270 new TreeMap<String, Set<String>>(), TreeSet.class); 1271 LanguageTagParser ltp = new LanguageTagParser(); 1272 LanguageTagCanonicalizer ltc = new LanguageTagCanonicalizer(false); 1273 1274 for (String language : languages) { 1275 if (LOCALES_FIXED) { 1276 String canonicalForm = ltc.transform(language); 1277 if (!assertEquals("Canonical form", canonicalForm, language)) { 1278 int debug = 0; 1279 } 1280 } 1281 1282 String base = ltp.set(language).getLanguage(); 1283 String script = ltp.getScript(); 1284 baseToLanguages.put(base, language); 1285 1286 // add basic data, basically just for wo! 1287 // if there are primary scripts, they must include script (if not 1288 // empty) 1289 Set<String> primaryScripts = Collections.emptySet(); 1290 Set<String> secondaryScripts = Collections.emptySet(); 1291 Map<Type, BasicLanguageData> basicData = SUPPLEMENTAL 1292 .getBasicLanguageDataMap(base); 1293 if (basicData != null) { 1294 BasicLanguageData s = basicData 1295 .get(BasicLanguageData.Type.primary); 1296 if (s != null) { 1297 primaryScripts = s.getScripts(); 1298 } 1299 s = basicData.get(BasicLanguageData.Type.secondary); 1300 if (s != null) { 1301 secondaryScripts = s.getScripts(); 1302 } 1303 } 1304 1305 // do some consistency tests; if there is a script, it must be in 1306 // primaryScripts or secondaryScripts 1307 if (!script.isEmpty() && !primaryScripts.contains(script) && !secondaryScripts.contains(script)) { 1308 errln(base + ": Script found in territory data (" + script 1309 + ") is not in primary scripts :\t" + primaryScripts 1310 + " and not in secondary scripts :\t" + secondaryScripts); 1311 } 1312 1313 // if there are multiple primary scripts, they will be in 1314 // baseToLanguages 1315 if (primaryScripts.size() > 1) { 1316 for (String script2 : primaryScripts) { 1317 baseToLanguages.put(base, base + "_" + script2); 1318 } 1319 } 1320 } 1321 1322 if (!LOCALES_FIXED) { 1323 // the invariants are that if we have a base, we must not have a script. 1324 // and if we don't have a base, we must have two items 1325 for (String base : baseToLanguages.keySet()) { 1326 Set<String> languagesForBase = baseToLanguages.getAll(base); 1327 if (languagesForBase.contains(base)) { 1328 if (languagesForBase.size() > 1) { 1329 errln("Cannot have base alone with other scripts:\t" 1330 + languagesForBase); 1331 } 1332 } else { 1333 if (languagesForBase.size() == 1) { 1334 errln("Cannot have only one script for language:\t" 1335 + languagesForBase); 1336 } 1337 } 1338 } 1339 } 1340 } 1341 TestCompleteness()1342 public void TestCompleteness() { 1343 if (SUPPLEMENTAL.getSkippedElements().size() > 0) { 1344 logln("SupplementalDataInfo API doesn't support: " 1345 + SUPPLEMENTAL.getSkippedElements().toString()); 1346 } 1347 } 1348 1349 // these are settings for exceptional cases we want to allow 1350 private static final Set<String> EXCEPTION_CURRENCIES_WITH_NEW = new TreeSet<>( 1351 Arrays.asList("ILS", "NZD", "PGK", "TWD")); 1352 1353 // ok since there is no problem with confusion 1354 private static final Set<String> OK_TO_NOT_HAVE_OLD = new TreeSet<>( 1355 Arrays.asList("ADP", "ATS", "BEF", "CYP", "DEM", "ESP", "FIM", 1356 "FRF", "GRD", "IEP", "ITL", "LUF", "MTL", "MTP", "NLG", 1357 "PTE", "YUM", "ARA", "BAD", "BGL", "BOP", "BRC", "BRN", 1358 "BRR", "BUK", "CSK", "ECS", "GEK", "GNS", "GQE", "HRD", 1359 "ILP", "LTT", "LVR", "MGF", "MLF", "MZE", "NIC", "PEI", 1360 "PES", "SIT", "SRG", "SUR", "TJR", "TPE", "UAK", "YUD", 1361 "YUN", "ZRZ", "GWE")); 1362 1363 private static final Date LIMIT_FOR_NEW_CURRENCY = new Date( 1364 new Date().getYear() - 5, 1, 1); 1365 private static final Date NOW = new Date(); 1366 1367 private Matcher oldMatcher = Pattern.compile( 1368 "\\bold\\b|\\([0-9]{4}-[0-9]{4}\\)", Pattern.CASE_INSENSITIVE) 1369 .matcher(""); 1370 private Matcher newMatcher = Pattern.compile("\\bnew\\b", 1371 Pattern.CASE_INSENSITIVE).matcher(""); 1372 1373 /** 1374 * Test that access to currency info in supplemental data is ok. At this 1375 * point just a simple test. 1376 * 1377 * @param args 1378 */ TestCurrency()1379 public void TestCurrency() { 1380 IsoCurrencyParser isoCodes = IsoCurrencyParser.getInstance(); 1381 Set<String> currencyCodes = STANDARD_CODES 1382 .getGoodAvailableCodes("currency"); 1383 Relation<String, Pair<String, CurrencyDateInfo>> nonModernCurrencyCodes = Relation 1384 .of(new TreeMap<String, Set<Pair<String, CurrencyDateInfo>>>(), 1385 TreeSet.class); 1386 Relation<String, Pair<String, CurrencyDateInfo>> modernCurrencyCodes = Relation 1387 .of(new TreeMap<String, Set<Pair<String, CurrencyDateInfo>>>(), 1388 TreeSet.class); 1389 Set<String> territoriesWithoutModernCurrencies = new TreeSet<>( 1390 STANDARD_CODES.getGoodAvailableCodes("territory")); 1391 Map<String, Date> currencyFirstValid = new TreeMap<>(); 1392 Map<String, Date> currencyLastValid = new TreeMap<>(); 1393 territoriesWithoutModernCurrencies.remove("ZZ"); 1394 1395 for (String territory : STANDARD_CODES 1396 .getGoodAvailableCodes("territory")) { 1397 /* "EU" behaves like a country for purposes of this test */ 1398 if ((SUPPLEMENTAL.getContained(territory) != null) 1399 && !territory.equals("EU")) { 1400 territoriesWithoutModernCurrencies.remove(territory); 1401 continue; 1402 } 1403 Set<CurrencyDateInfo> currencyInfo = SUPPLEMENTAL 1404 .getCurrencyDateInfo(territory); 1405 if (currencyInfo == null) { 1406 continue; // error, but will pick up below. 1407 } 1408 for (CurrencyDateInfo dateInfo : currencyInfo) { 1409 final String currency = dateInfo.getCurrency(); 1410 final Date start = dateInfo.getStart(); 1411 final Date end = dateInfo.getEnd(); 1412 if (dateInfo.getErrors().length() != 0) { 1413 logln("parsing " + territory + "\t" + dateInfo.toString() 1414 + "\t" + dateInfo.getErrors()); 1415 } 1416 Date firstValue = currencyFirstValid.get(currency); 1417 if (firstValue == null || firstValue.compareTo(start) < 0) { 1418 currencyFirstValid.put(currency, start); 1419 } 1420 Date lastValue = currencyLastValid.get(currency); 1421 if (lastValue == null || lastValue.compareTo(end) > 0) { 1422 currencyLastValid.put(currency, end); 1423 } 1424 if (start.compareTo(NOW) < 0 && end.compareTo(NOW) >= 0) { // Non-tender 1425 // is 1426 // OK... 1427 modernCurrencyCodes.put(currency, 1428 new Pair<>(territory, 1429 dateInfo)); 1430 territoriesWithoutModernCurrencies.remove(territory); 1431 } else { 1432 nonModernCurrencyCodes.put(currency, 1433 new Pair<>(territory, 1434 dateInfo)); 1435 } 1436 logln(territory 1437 + "\t" 1438 + dateInfo.toString() 1439 + "\t" 1440 + testInfo.getEnglish().getName(CLDRFile.CURRENCY_NAME, 1441 currency)); 1442 } 1443 } 1444 // fix up 1445 nonModernCurrencyCodes.removeAll(modernCurrencyCodes.keySet()); 1446 Relation<String, String> isoCurrenciesToCountries = Relation.of( 1447 new TreeMap<String, Set<String>>(), TreeSet.class) 1448 .addAllInverted(isoCodes.getCountryToCodes()); 1449 // now print error messages 1450 logln("Modern Codes: " + modernCurrencyCodes.size() + "\t" 1451 + modernCurrencyCodes); 1452 Set<String> missing = new TreeSet<>( 1453 isoCurrenciesToCountries.keySet()); 1454 missing.removeAll(modernCurrencyCodes.keySet()); 1455 if (missing.size() != 0) { 1456 errln("Missing codes compared to ISO: " + missing.toString()); 1457 } 1458 1459 for (String currency : modernCurrencyCodes.keySet()) { 1460 Set<Pair<String, CurrencyDateInfo>> data = modernCurrencyCodes 1461 .getAll(currency); 1462 final String name = testInfo.getEnglish().getName( 1463 CLDRFile.CURRENCY_NAME, currency); 1464 1465 Set<String> isoCountries = isoCurrenciesToCountries 1466 .getAll(currency); 1467 if (isoCountries == null) { 1468 isoCountries = new TreeSet<>(); 1469 } 1470 1471 TreeSet<String> cldrCountries = new TreeSet<>(); 1472 for (Pair<String, CurrencyDateInfo> x : data) { 1473 cldrCountries.add(x.getFirst()); 1474 } 1475 if (!isoCountries.equals(cldrCountries)) { 1476 if (!logKnownIssue("cldrbug:10765", "Missing codes compared to ISO: " + missing.toString())) { 1477 1478 errln("Mismatch between ISO and Cldr modern currencies for " 1479 + currency + "\tISO:" + isoCountries + "\tCLDR:" 1480 + cldrCountries); 1481 showCountries("iso-cldr", isoCountries, cldrCountries, missing); 1482 showCountries("cldr-iso", cldrCountries, isoCountries, missing); 1483 } 1484 } 1485 1486 if (oldMatcher.reset(name).find()) { 1487 errln("Has 'old' in name but still used " + "\t" + currency 1488 + "\t" + name + "\t" + data); 1489 } 1490 if (newMatcher.reset(name).find() 1491 && !EXCEPTION_CURRENCIES_WITH_NEW.contains(currency)) { 1492 // find the first use. If older than 5 years, flag as error 1493 if (currencyFirstValid.get(currency).compareTo( 1494 LIMIT_FOR_NEW_CURRENCY) < 0) { 1495 errln("Has 'new' in name but used since " 1496 + CurrencyDateInfo.formatDate(currencyFirstValid 1497 .get(currency)) 1498 + "\t" + currency + "\t" 1499 + name + "\t" + data); 1500 } else { 1501 logln("Has 'new' in name but used since " 1502 + CurrencyDateInfo.formatDate(currencyFirstValid 1503 .get(currency)) 1504 + "\t" + currency + "\t" 1505 + name + "\t" + data); 1506 } 1507 } 1508 } 1509 logln("Non-Modern Codes (with dates): " + nonModernCurrencyCodes.size() 1510 + "\t" + nonModernCurrencyCodes); 1511 for (String currency : nonModernCurrencyCodes.keySet()) { 1512 final String name = testInfo.getEnglish().getName( 1513 CLDRFile.CURRENCY_NAME, currency); 1514 if (newMatcher.reset(name).find() 1515 && !EXCEPTION_CURRENCIES_WITH_NEW.contains(currency)) { 1516 logln("Has 'new' in name but NOT used since " 1517 + CurrencyDateInfo.formatDate(currencyLastValid 1518 .get(currency)) 1519 + "\t" + currency + "\t" + name 1520 + "\t" + nonModernCurrencyCodes.getAll(currency)); 1521 } else if (!oldMatcher.reset(name).find() 1522 && !OK_TO_NOT_HAVE_OLD.contains(currency)) { 1523 logln("Doesn't have 'old' or date range in name but NOT used since " 1524 + CurrencyDateInfo.formatDate(currencyLastValid 1525 .get(currency)) 1526 + "\t" 1527 + currency 1528 + "\t" 1529 + name 1530 + "\t" + nonModernCurrencyCodes.getAll(currency)); 1531 for (Pair<String, CurrencyDateInfo> pair : nonModernCurrencyCodes 1532 .getAll(currency)) { 1533 final String territory = pair.getFirst(); 1534 Set<CurrencyDateInfo> currencyInfo = SUPPLEMENTAL 1535 .getCurrencyDateInfo(territory); 1536 for (CurrencyDateInfo dateInfo : currencyInfo) { 1537 if (dateInfo.getEnd().compareTo(NOW) < 0) { 1538 continue; 1539 } 1540 logln("\tCurrencies used instead: " 1541 + territory 1542 + "\t" 1543 + dateInfo 1544 + "\t" 1545 + testInfo.getEnglish().getName( 1546 CLDRFile.CURRENCY_NAME, 1547 dateInfo.getCurrency())); 1548 1549 } 1550 } 1551 1552 } 1553 } 1554 Set<String> remainder = new TreeSet<>(); 1555 remainder.addAll(currencyCodes); 1556 remainder.removeAll(nonModernCurrencyCodes.keySet()); 1557 // TODO make this an error, except for allowed exceptions. 1558 logln("Currencies without Territories: " + remainder); 1559 if (territoriesWithoutModernCurrencies.size() != 0) { 1560 errln("Modern territory missing currency: " 1561 + territoriesWithoutModernCurrencies); 1562 } 1563 } 1564 showCountries(final String title, Set<String> isoCountries, Set<String> cldrCountries, Set<String> missing)1565 private void showCountries(final String title, Set<String> isoCountries, 1566 Set<String> cldrCountries, Set<String> missing) { 1567 missing.clear(); 1568 missing.addAll(isoCountries); 1569 missing.removeAll(cldrCountries); 1570 for (String country : missing) { 1571 logln("\t\tExtra in " + title + "\t" + country + " - " 1572 + getRegionName(country)); 1573 } 1574 } 1575 TestCurrencyDecimalPlaces()1576 public void TestCurrencyDecimalPlaces() { 1577 IsoCurrencyParser isoCodes = IsoCurrencyParser.getInstance(); 1578 Relation<String, IsoCurrencyParser.Data> codeList = isoCodes 1579 .getCodeList(); 1580 Set<String> currencyCodes = STANDARD_CODES 1581 .getGoodAvailableCodes("currency"); 1582 for (String cc : currencyCodes) { 1583 Set<IsoCurrencyParser.Data> d = codeList.get(cc); 1584 if (d != null) { 1585 for (IsoCurrencyParser.Data x : d) { 1586 CurrencyNumberInfo cni = SUPPLEMENTAL.getCurrencyNumberInfo(cc); 1587 if (cni.digits != x.getMinorUnit()) { 1588 logln("Mismatch between ISO/CLDR for decimal places for currency => " + cc + 1589 ". ISO = " + x.getMinorUnit() + " CLDR = " + cni.digits); 1590 } 1591 } 1592 } 1593 } 1594 } 1595 1596 /** 1597 * Verify that we have a default script for every CLDR base language 1598 */ TestDefaultScripts()1599 public void TestDefaultScripts() { 1600 SupplementalDataInfo supp = SUPPLEMENTAL; 1601 Map<String, String> likelyData = supp.getLikelySubtags(); 1602 Map<String, String> baseToDefaultContentScript = new HashMap<>(); 1603 for (CLDRLocale locale : supp.getDefaultContentCLDRLocales()) { 1604 String script = locale.getScript(); 1605 if (!script.isEmpty() && locale.getCountry().isEmpty()) { 1606 baseToDefaultContentScript.put(locale.getLanguage(), script); 1607 } 1608 } 1609 for (String locale : testInfo.getCldrFactory().getAvailableLanguages()) { 1610 if ("root".equals(locale)) { 1611 continue; 1612 } 1613 CLDRLocale loc = CLDRLocale.getInstance(locale); 1614 String baseLanguage = loc.getLanguage(); 1615 String defaultScript = supp.getDefaultScript(baseLanguage); 1616 1617 String defaultContentScript = baseToDefaultContentScript 1618 .get(baseLanguage); 1619 if (defaultContentScript != null) { 1620 assertEquals(loc + " defaultContentScript = default", 1621 defaultScript, defaultContentScript); 1622 } 1623 String likely = likelyData.get(baseLanguage); 1624 String likelyScript = likely == null ? null : CLDRLocale 1625 .getInstance(likely).getScript(); 1626 Map<Type, BasicLanguageData> scriptInfo = supp 1627 .getBasicLanguageDataMap(baseLanguage); 1628 if (scriptInfo == null) { 1629 errln(loc + ": has no BasicLanguageData"); 1630 } else { 1631 BasicLanguageData data = scriptInfo.get(Type.primary); 1632 if (data == null) { 1633 data = scriptInfo.get(Type.secondary); 1634 } 1635 if (data == null) { 1636 errln(loc + ": has no scripts in BasicLanguageData"); 1637 } else if (!data.getScripts().contains(defaultScript)) { 1638 errln(loc + ": " + defaultScript 1639 + " not in BasicLanguageData " + data.getScripts()); 1640 } 1641 } 1642 1643 assertEquals(loc + " likely = default", defaultScript, likelyScript); 1644 1645 assertNotNull(loc + ": needs default script", defaultScript); 1646 1647 if (!loc.getScript().isEmpty()) { 1648 if (!loc.getScript().equals(defaultScript)) { 1649 assertNotEquals(locale 1650 + ": only include script if not default", 1651 loc.getScript(), defaultScript); 1652 } 1653 } 1654 1655 } 1656 } 1657 1658 enum CoverageIssue { 1659 log, warn, error 1660 } 1661 TestPluralCompleteness()1662 public void TestPluralCompleteness() { 1663 // Set<String> cardinalLocales = new 1664 // TreeSet<String>(SUPPLEMENTAL.getPluralLocales(PluralType.cardinal)); 1665 // Set<String> ordinalLocales = new 1666 // TreeSet<String>(SUPPLEMENTAL.getPluralLocales(PluralType.ordinal)); 1667 // Map<ULocale, PluralRulesFactory.SamplePatterns> sampleCardinals = 1668 // PluralRulesFactory.getLocaleToSamplePatterns(); 1669 // Set<ULocale> sampleCardinalLocales = PluralRulesFactory.getLocales(); 1670 // // new HashSet(PluralRulesFactory.getSampleCounts(uLocale, 1671 // type).keySet()); 1672 // Map<ULocale, PluralRules> overrideCardinals = 1673 // PluralRulesFactory.getPluralOverrides(); 1674 // Set<ULocale> overrideCardinalLocales = new 1675 // HashSet<ULocale>(overrideCardinals.keySet()); 1676 1677 Set<String> testLocales = STANDARD_CODES.getLocaleCoverageLocales( 1678 Organization.google, EnumSet.of(Level.MODERN)); 1679 Set<String> allLocales = testInfo.getCldrFactory().getAvailable(); 1680 LanguageTagParser ltp = new LanguageTagParser(); 1681 for (String locale : allLocales) { 1682 // the only known case where plural rules depend on region or script 1683 // is pt_PT 1684 if (locale.equals("root")) { 1685 continue; 1686 } 1687 ltp.set(locale); 1688 if (!ltp.getRegion().isEmpty() || !ltp.getScript().isEmpty()) { 1689 continue; 1690 } 1691 CoverageIssue needsCoverage = testLocales.contains(locale) 1692 ? CoverageIssue.error 1693 : CoverageIssue.log; 1694 CoverageIssue needsCoverage2 = needsCoverage == CoverageIssue.error ? CoverageIssue.warn : needsCoverage; 1695 1696 // if (logKnownIssue("Cldrbug:8809", "Missing plural rules/samples be and ga locales")) { 1697 // if (locale.equals("be") || locale.equals("ga")) { 1698 // needsCoverage = CoverageIssue.warn; 1699 // } 1700 // } 1701 PluralRulesFactory prf = PluralRulesFactory 1702 .getInstance(CLDRConfig.getInstance() 1703 .getSupplementalDataInfo()); 1704 1705 for (PluralType type : PluralType.values()) { 1706 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(type, locale, 1707 false); 1708 if (pluralInfo == null) { 1709 errOrLog(needsCoverage, locale + "\t" + type + " \tmissing plural rules", "Cldrbug:7839", "Missing plural data for modern locales"); 1710 continue; 1711 } 1712 Set<Count> counts = pluralInfo.getCounts(); 1713 // if (counts.size() == 1) { 1714 // continue; // skip checking samples 1715 // } 1716 HashSet<String> samples = new HashSet<>(); 1717 EnumSet<Count> countsWithNoSamples = EnumSet 1718 .noneOf(Count.class); 1719 Relation<String, Count> samplesToCounts = Relation.of( 1720 new HashMap(), LinkedHashSet.class); 1721 Set<Count> countsFound = prf.getSampleCounts(locale, 1722 type.standardType); 1723 StringBuilder failureCases = new StringBuilder(); 1724 for (Count count : counts) { 1725 String pattern = PluralRulesFactory.getSamplePattern(locale, type.standardType, count); 1726 final String rangeLine = getRangeLine(count, pluralInfo.getPluralRules(), pattern); 1727 failureCases.append('\n').append(locale).append('\t').append(type).append('\t').append(rangeLine); 1728 if (countsFound == null || !countsFound.contains(count)) { 1729 countsWithNoSamples.add(count); 1730 } else { 1731 samplesToCounts.put(pattern, count); 1732 logln(locale + "\t" + type + "\t" + count + "\t" 1733 + pattern); 1734 } 1735 } 1736 if (!countsWithNoSamples.isEmpty()) { 1737 errOrLog(needsCoverage, locale + "\t" + type + "\t missing samples:\t" + countsWithNoSamples, 1738 "cldrbug:7075", "Missing ordinal minimal pairs"); 1739 errOrLog(needsCoverage2, failureCases.toString()); 1740 } 1741 for (Entry<String, Set<Count>> entry : samplesToCounts 1742 .keyValuesSet()) { 1743 if (entry.getValue().size() != 1) { 1744 errOrLog(needsCoverage, locale + "\t" + type + "\t duplicate samples: " + entry.getValue() 1745 + " => «" + entry.getKey() + "»", "cldrbug:7119", "Some duplicate minimal pairs"); 1746 errOrLog(needsCoverage2, failureCases.toString()); 1747 } 1748 } 1749 } 1750 } 1751 } 1752 errOrLog(CoverageIssue causeError, String message, String logTicket, String logComment)1753 public void errOrLog(CoverageIssue causeError, String message, String logTicket, String logComment) { 1754 switch (causeError) { 1755 case error: 1756 if (logTicket == null) { 1757 errln(message); 1758 break; 1759 } 1760 logKnownIssue(logTicket, logComment); 1761 // fall through 1762 case warn: 1763 warnln(message); 1764 break; 1765 case log: 1766 logln(message); 1767 break; 1768 } 1769 } 1770 errOrLog(CoverageIssue causeError, String message)1771 public void errOrLog(CoverageIssue causeError, String message) { 1772 errOrLog(causeError, message, null, null); 1773 } 1774 TestNumberingSystemDigits()1775 public void TestNumberingSystemDigits() { 1776 1777 // Don't worry about digits from supplemental planes yet ( ICU can't 1778 // handle them anyways ) 1779 // hanidec is the only known non codepoint order numbering system 1780 // TODO: Fix so that it works properly on non-BMP digit strings. 1781 String[] knownExceptions = { "brah", "cakm", "hanidec", "osma", "shrd", 1782 "sora", "takr" }; 1783 List<String> knownExceptionList = Arrays.asList(knownExceptions); 1784 for (String ns : SUPPLEMENTAL.getNumericNumberingSystems()) { 1785 if (knownExceptionList.contains(ns)) { 1786 continue; 1787 } 1788 String digits = SUPPLEMENTAL.getDigits(ns); 1789 int previousChar = 0; 1790 int ch; 1791 1792 for (int i = 0; i < digits.length(); i += Character.charCount(ch)) { 1793 ch = digits.codePointAt(i); 1794 if (i > 0 && ch != previousChar + 1) { 1795 errln("Digits for numbering system " 1796 + ns 1797 + " are not in code point order. Previous char = U+" 1798 + Utility.hex(previousChar, 4) 1799 + " Current char = U+" + Utility.hex(ch, 4)); 1800 break; 1801 } 1802 previousChar = ch; 1803 } 1804 } 1805 } 1806 TestNumberingSystemDigitCompleteness()1807 public void TestNumberingSystemDigitCompleteness() { 1808 List<Integer> unicodeDigits = new ArrayList<>(); 1809 for (int cp = UCharacter.MIN_CODE_POINT; cp <= UCharacter.MAX_CODE_POINT; cp++) { 1810 if (UCharacter.getType(cp) == UCharacterEnums.ECharacterCategory.DECIMAL_DIGIT_NUMBER) { 1811 unicodeDigits.add(Integer.valueOf(cp)); 1812 } 1813 } 1814 1815 for (String ns : SUPPLEMENTAL.getNumericNumberingSystems()) { 1816 String digits = SUPPLEMENTAL.getDigits(ns); 1817 int ch; 1818 1819 for (int i = 0; i < digits.length(); i += Character.charCount(ch)) { 1820 ch = digits.codePointAt(i); 1821 unicodeDigits.remove(Integer.valueOf(ch)); 1822 } 1823 } 1824 1825 if (unicodeDigits.size() > 0) { 1826 for (Integer i : unicodeDigits) { 1827 errln("Unicode digit: " + UCharacter.getName(i) + " is not in any numbering system. Script = " 1828 + UScript.getShortName(UScript.getScript(i))); 1829 } 1830 } 1831 } 1832 TestMetazones()1833 public void TestMetazones() { 1834 Date goalMin = new Date(70, 0, 1); 1835 Date goalMax = new Date(300, 0, 2); 1836 ImmutableSet<String> knownTZWithoutMetazone = ImmutableSet.of("America/Montreal", "Asia/Barnaul", "Asia/Tomsk", "Europe/Kirov"); 1837 for (String timezoneRaw : TimeZone.getAvailableIDs()) { 1838 String timezone = TimeZone.getCanonicalID(timezoneRaw); 1839 String region = TimeZone.getRegion(timezone); 1840 if (!timezone.equals(timezoneRaw) || "001".equals(region)) { 1841 continue; 1842 } 1843 if (knownTZWithoutMetazone.contains(timezone)) { 1844 continue; 1845 } 1846 final Set<MetaZoneRange> ranges = SUPPLEMENTAL 1847 .getMetaZoneRanges(timezone); 1848 1849 if (assertNotNull("metazones for " + timezone, ranges)) { 1850 long min = Long.MAX_VALUE; 1851 long max = Long.MIN_VALUE; 1852 for (MetaZoneRange range : ranges) { 1853 if (range.dateRange.from != DateRange.START_OF_TIME) { 1854 min = Math.min(min, range.dateRange.from); 1855 } 1856 if (range.dateRange.to != DateRange.END_OF_TIME) { 1857 max = Math.max(max, range.dateRange.to); 1858 } 1859 } 1860 assertRelation(timezone + " has metazone before 1970?", true, 1861 goalMin, LEQ, new Date(min)); 1862 assertRelation(timezone 1863 + " has metazone until way in the future?", true, 1864 goalMax, GEQ, new Date(max)); 1865 } 1866 } 1867 com.google.common.collect.Interners i; 1868 } 1869 Test9924()1870 public void Test9924() { 1871 Boolean b = org.unicode.cldr.unittest.TestSupplementalInfo.LOCALES_FIXED; 1872 PopulationData zhCNData = SUPPLEMENTAL.getLanguageAndTerritoryPopulationData(b ? "zh" : "zh_Hans", "CN"); 1873 PopulationData yueCNData = SUPPLEMENTAL.getLanguageAndTerritoryPopulationData("yue_Hans", "CN"); 1874 assertTrue("yue*10 < zh", yueCNData.getPopulation() < zhCNData.getPopulation()); 1875 } 1876 Test10765()1877 public void Test10765() { // 1878 Set<String> surveyToolLanguages = SUPPLEMENTAL.getCLDRLanguageCodes(); // codes that show up in Survey Tool 1879 Set<String> mainLanguages = new TreeSet<>(); 1880 LanguageTagParser ltp = new LanguageTagParser(); 1881 for (String locale : testInfo.getCldrFactory().getAvailableLanguages()) { 1882 mainLanguages.add(ltp.set(locale).getLanguage()); 1883 } 1884 // add special codes we want to see anyway 1885 mainLanguages.add("und"); 1886 mainLanguages.add("mul"); 1887 mainLanguages.add("zxx"); 1888 1889 if (!mainLanguages.containsAll(surveyToolLanguages)) { 1890 CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(SUPPLEMENTAL, "ja"); // pick "neutral" locale 1891 Set<String> temp = new TreeSet<>(surveyToolLanguages); 1892 temp.removeAll(mainLanguages); 1893 Set<String> modern = new TreeSet<>(); 1894 Set<String> comprehensive = new TreeSet<>(); 1895 for (String lang : temp) { 1896 Level level = coverageLevel.getLevel(CLDRFile.getKey(CLDRFile.LANGUAGE_NAME, lang)); 1897 if (level.compareTo(Level.MODERN) <= 0) { 1898 modern.add(lang); 1899 } else { 1900 comprehensive.add(lang); 1901 } 1902 } 1903 warnln("«Modern» Languages in <variable id='$language' type='choice'> that aren't in main/* : " + getNames(modern)); 1904 logln("«Comprehensive» Languages in <variable id='$language' type='choice'> that aren't in main/* : " + getNames(comprehensive)); 1905 } 1906 if (!surveyToolLanguages.containsAll(mainLanguages)) { 1907 mainLanguages.removeAll(surveyToolLanguages); 1908 // TODO: See https://unicode-org.atlassian.net/browse/CLDR-14974 1909 // Currently there is a requirement that all locales in main/* are in attributeValueValidity.xml 1910 assertEquals("main/* languages missing from <variable id='$language'/> in attributeValueValidity.xml", 1911 Collections.EMPTY_SET, mainLanguages); 1912 } 1913 } 1914 getNames(Set<String> temp)1915 private Set<String> getNames(Set<String> temp) { 1916 Set<String> tempNames = new TreeSet<>(); 1917 for (String langCode : temp) { 1918 tempNames.add(testInfo.getEnglish().getName(CLDRFile.LANGUAGE_NAME, langCode) + " (" + langCode + ")"); 1919 } 1920 return tempNames; 1921 } 1922 TestGrammarInfo()1923 public void TestGrammarInfo() { 1924 final Logger logger = getLogger(); 1925 Multimap<String,String> allValues = TreeMultimap.create(); 1926 for (String locale : SUPPLEMENTAL.hasGrammarInfo()) { 1927 if (locale.contentEquals("tr")) { 1928 int debug = 0; 1929 } 1930 GrammarInfo grammarInfo = SUPPLEMENTAL.getGrammarInfo(locale); 1931 for (GrammaticalTarget target : GrammaticalTarget.values()) { 1932 for (GrammaticalFeature feature : GrammaticalFeature.values()) { 1933 Collection<String> general = grammarInfo.get(target, feature, GrammaticalScope.general); 1934 for (GrammaticalScope scope : GrammaticalScope.values()) { 1935 Collection<String> units = grammarInfo.get(target, feature, scope); 1936 allValues.putAll(target + "/" + feature + "/" + scope, units); 1937 if (scope != GrammaticalScope.general) { 1938 assertTrue(general + " > " + scope + " " + units, general.containsAll(units)); 1939 } 1940 } 1941 } 1942 } 1943 logger.fine(grammarInfo.toString("\n" + locale + "\t")); 1944 } 1945 if (logger.isLoggable(java.util.logging.Level.FINE)) { // if level is at least FINE 1946 logger.fine(""); 1947 for (Entry<String, Collection<String>> entry : allValues.asMap().entrySet()) { 1948 logger.fine(entry.getKey() + "\t" + Joiner.on(", ").join(entry.getValue())); 1949 } 1950 } 1951 } 1952 } 1953