1 package org.unicode.cldr.unittest; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.collect.ImmutableMap; 5 import com.google.common.collect.Multimap; 6 import com.google.common.collect.Sets; 7 import com.google.common.collect.TreeMultimap; 8 import com.ibm.icu.dev.test.TestFmwk; 9 import com.ibm.icu.impl.Relation; 10 import com.ibm.icu.impl.Row.R2; 11 import com.ibm.icu.text.NumberFormat; 12 import com.ibm.icu.text.UTF16; 13 import com.ibm.icu.text.UnicodeSet; 14 import com.ibm.icu.util.Output; 15 import java.io.File; 16 import java.util.ArrayList; 17 import java.util.Arrays; 18 import java.util.Collection; 19 import java.util.HashSet; 20 import java.util.Iterator; 21 import java.util.LinkedHashMap; 22 import java.util.LinkedHashSet; 23 import java.util.List; 24 import java.util.Map; 25 import java.util.Map.Entry; 26 import java.util.Objects; 27 import java.util.Set; 28 import java.util.TreeMap; 29 import java.util.TreeSet; 30 import java.util.regex.Matcher; 31 import java.util.regex.Pattern; 32 import org.unicode.cldr.test.CoverageLevel2; 33 import org.unicode.cldr.tool.LikelySubtags; 34 import org.unicode.cldr.util.CLDRConfig; 35 import org.unicode.cldr.util.CLDRFile; 36 import org.unicode.cldr.util.CLDRFile.DraftStatus; 37 import org.unicode.cldr.util.CLDRFile.Status; 38 import org.unicode.cldr.util.CLDRLocale; 39 import org.unicode.cldr.util.CLDRPaths; 40 import org.unicode.cldr.util.CldrUtility; 41 import org.unicode.cldr.util.Counter; 42 import org.unicode.cldr.util.DtdType; 43 import org.unicode.cldr.util.Factory; 44 import org.unicode.cldr.util.GlossonymConstructor; 45 import org.unicode.cldr.util.GrammarInfo; 46 import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature; 47 import org.unicode.cldr.util.GrammarInfo.GrammaticalTarget; 48 import org.unicode.cldr.util.LanguageTagParser; 49 import org.unicode.cldr.util.Level; 50 import org.unicode.cldr.util.LocaleIDParser; 51 import org.unicode.cldr.util.PathHeader; 52 import org.unicode.cldr.util.PathHeader.PageId; 53 import org.unicode.cldr.util.PathHeader.SectionId; 54 import org.unicode.cldr.util.PatternCache; 55 import org.unicode.cldr.util.PatternPlaceholders; 56 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderStatus; 57 import org.unicode.cldr.util.SimpleFactory; 58 import org.unicode.cldr.util.SupplementalDataInfo; 59 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 60 import org.unicode.cldr.util.SupplementalDataInfo.PluralType; 61 import org.unicode.cldr.util.XPathParts; 62 63 /** 64 * This is the original TestFwmk test case for CLDRFile. 65 * 66 * @see {@link org.unicode.cldr.util.TestCLDRFile} 67 * @see {@link org.unicode.cldr.util.CLDRFile} 68 */ 69 public class TestCLDRFile extends TestFmwk { 70 private static final boolean DISABLE_TIL_WORKS = false; 71 72 static CLDRConfig testInfo = CLDRConfig.getInstance(); 73 static SupplementalDataInfo sdi = testInfo.getSupplementalDataInfo(); 74 main(String[] args)75 public static void main(String[] args) { 76 new TestCLDRFile().run(args); 77 } 78 testFallbackNames()79 public void testFallbackNames() { 80 String[][] tests = { 81 {"zh-Hanb", "Chinese (Han with Bopomofo)"}, 82 {"aaa", "Ghotuo"}, 83 {"zh-RR", "Chinese (RR)"}, 84 {"new_Newa_NP", "Newari (Newa, Nepal)"}, 85 }; 86 CLDRFile english = testInfo.getEnglish(); 87 for (String[] test : tests) { 88 assertEquals("", test[1], english.getName(test[0])); 89 } 90 } 91 92 // verify for all paths, if there is a count="other", then there is a 93 // count="x", for all x in keywords testPlurals()94 public void testPlurals() { 95 for (String locale : new String[] {"fr", "en", "root", "ar", "ja"}) { 96 checkPlurals(locale); 97 } 98 } 99 100 static final Pattern COUNT_MATCHER = Pattern.compile("\\[@count=\"([^\"]+)\"]"); 101 102 static final UnicodeSet DIGITS = new UnicodeSet('0', '9').freeze(); 103 checkPlurals(String locale)104 private void checkPlurals(String locale) { 105 CLDRFile cldrFile = testInfo.getCLDRFile(locale, true); 106 Matcher m = COUNT_MATCHER.matcher(""); 107 Relation<String, String> skeletonToKeywords = 108 Relation.of( 109 new TreeMap<String, Set<String>>(cldrFile.getComparator()), TreeSet.class); 110 PluralInfo plurals = sdi.getPlurals(PluralType.cardinal, locale); 111 for (String path : cldrFile.fullIterable()) { 112 if (!path.contains("@count")) { 113 continue; 114 } 115 if (!m.reset(path).find()) { 116 throw new IllegalArgumentException(); 117 } 118 if (DIGITS.containsAll(m.group(1))) { 119 continue; 120 } 121 String skeleton = path.substring(0, m.start(1)) + ".*" + path.substring(m.end(1)); 122 skeletonToKeywords.put(skeleton, m.group(1)); 123 } 124 Set<String> normalKeywords = plurals.getAdjustedCountStrings(); 125 126 for (Entry<String, Set<String>> entry : skeletonToKeywords.keyValuesSet()) { 127 final String abbreviatedPath = entry.getKey(); 128 Set<String> expected = normalKeywords; 129 if (abbreviatedPath.startsWith("//ldml/numbers/minimalPairs/pluralMinimalPairs")) { 130 expected = plurals.getCanonicalKeywords(); 131 } 132 assertEquals( 133 "Incorrect keywords: " + locale + ", " + abbreviatedPath, 134 expected, 135 entry.getValue()); 136 } 137 } 138 139 static Factory cldrFactory = testInfo.getCldrFactory(); 140 141 static class LocaleInfo { 142 final String locale; 143 final CLDRFile cldrFile; 144 final Set<String> paths = new HashSet<>(); 145 LocaleInfo(String locale)146 LocaleInfo(String locale) { 147 this.locale = locale; 148 cldrFile = testInfo.getCLDRFile(locale, true); 149 for (String path : cldrFile.fullIterable()) { 150 Level level = sdi.getCoverageLevel(path, locale); 151 if (level.compareTo(Level.COMPREHENSIVE) > 0) { 152 continue; 153 } 154 if (path.contains("[@count=") && !path.contains("[@count=\"other\"]")) { 155 continue; 156 } 157 paths.add(path); 158 } 159 } 160 } 161 162 static final boolean DEBUG = false; 163 static final boolean DEBUG_testExtraPaths = true; 164 testExtraPaths()165 public void testExtraPaths() { 166 // for debugging 167 final CLDRFile german = CLDRConfig.getInstance().getCldrFactory().make("de", true); 168 getLogger().fine(""); 169 Set<String> sorted = new TreeSet<>(german.getExtraPaths()); 170 PathHeader.Factory phf = PathHeader.getFactory(); 171 PatternPlaceholders pph = PatternPlaceholders.getInstance(); 172 173 for (String path : sorted) { 174 if (DEBUG_testExtraPaths 175 && path.equals( 176 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"acceleration-g-force\"]/unitPattern")) { 177 List<String> failures = new ArrayList<>(); 178 phf.fromPath(path, failures); 179 } 180 PathHeader ph = phf.fromPath(path); 181 if (ph.getPageId() != PageId.Deprecated) { 182 assertNotEquals( 183 "bad placeholder: " + path + " ; " + ph, 184 SectionId.Special, 185 ph.getSectionId()); 186 } 187 PlaceholderStatus phStatus = pph.getStatus(path); 188 189 PlaceholderStatus expected = 190 path.contains("/metazone") 191 || path.contains("/timeZoneNames") 192 || path.contains("/gender") 193 || path.startsWith("//ldml/numbers/currencies/currency") 194 || path.startsWith("//ldml/personNames/sampleName") 195 ? PlaceholderStatus.DISALLOWED 196 : path.contains("/compoundUnitPattern1") 197 ? PlaceholderStatus.REQUIRED 198 : PlaceholderStatus.LOCALE_DEPENDENT; 199 if (!assertEquals(path, expected, phStatus)) { 200 int debug = 0; 201 } 202 203 if (DEBUG) { 204 if (GrammaticalFeature.pathHasFeature(path) != null || path.endsWith("/gender")) { 205 System.out.println(path + "\t" + german.getStringValue(path)); 206 String newPath = path.replace("[@case=\"accusative\"]", ""); 207 if (!newPath.contentEquals(path) && !sorted.contains(newPath)) { 208 System.out.println(newPath + "\t" + german.getStringValue(newPath)); 209 } 210 } 211 } 212 } 213 214 Set<String> badCoverage = new TreeSet<>(); 215 Counter<String> extraPaths = new Counter<>(); 216 final Factory fullCldrFactory = CLDRConfig.getInstance().getFullCldrFactory(); 217 218 for (String locale : sdi.hasGrammarInfo()) { 219 if (sdi.getGrammarInfo(locale).hasInfo(GrammaticalTarget.nominal)) { 220 if (!fullCldrFactory.getAvailable().contains(locale)) { 221 if (CLDRConfig.SKIP_SEED) { 222 continue; // don't check if skipping seed, for production 223 } 224 } 225 final CLDRFile cldrFile = fullCldrFactory.make(locale, true); 226 Set<String> sorted2 = new TreeSet<>(cldrFile.getExtraPaths()); 227 for (String path : sorted2) { 228 if (path.contains("speed-beaufort")) { 229 continue; // special case 230 } 231 if (path.contains("/gender") 232 || path.contains("@gender") 233 || path.contains("@case")) { 234 Level level = sdi.getCoverageLevel(path, locale); 235 if (level.compareTo(Level.MODERN) > 0) { 236 badCoverage.add(path); 237 } 238 extraPaths.add(locale, 1); 239 } 240 } 241 } 242 } 243 getLogger().fine("Units with grammar info: " + GrammarInfo.getUnitsToAddGrammar().size()); 244 getLogger().fine("Inflection Paths"); 245 for (R2<Long, String> locale : extraPaths.getEntrySetSortedByCount(false, null)) { 246 getLogger().fine(locale.get0() + "\t" + locale.get1()); 247 } 248 if (!badCoverage.isEmpty()) { 249 errln("Paths not at modern: " + Joiner.on("\n\t").join(badCoverage)); 250 } 251 252 // Set<String> validUnits = 253 // Validity.getInstance().getStatusToCodes(LstrType.unit).get(Validity.Status.regular); 254 // validUnits.forEach(System.out::println); 255 // 256 // grammarInfo = testInfo.getSupplementalDataInfo().getGrammarInfo(); 257 // for (Entry<String, GrammarInfo> entry : grammarInfo.entrySet()) { 258 // System.out.println(entry); 259 // } 260 261 // CLDRFile toCheck = testInfo.getCldrFactory().make("de", true); // 262 // testInfo.getFullCldrFactory().make("en", false); 263 // Set<String> sorted = new TreeSet<>(CLDRFile.getComparator(DtdType.ldml)); 264 // sorted.addAll(toCheck.getExtraPaths()); 265 // for (String path : sorted) { 266 // XPathParts parts = XPathParts.getFrozenInstance(path); 267 // assertEquals("parts: ", parts.toString(), path); 268 // System.out.println(path); 269 // } 270 // int debug = 0; 271 272 Map<String, LocaleInfo> localeInfos = new LinkedHashMap<>(); 273 Relation<String, String> missingPathsToLocales = 274 Relation.of( 275 new TreeMap<String, Set<String>>(CLDRFile.getComparator(DtdType.ldml)), 276 TreeSet.class); 277 Relation<String, String> extraPathsToLocales = 278 Relation.of( 279 new TreeMap<String, Set<String>>(CLDRFile.getComparator(DtdType.ldml)), 280 TreeSet.class); 281 282 for (String locale : new String[] {"en", "root", "fr", "ar", "ja"}) { 283 localeInfos.put(locale, new LocaleInfo(locale)); 284 } 285 LocaleInfo englishInfo = localeInfos.get("en"); 286 for (String path : englishInfo.paths) { 287 if (path.startsWith("//ldml/identity/") 288 || path.startsWith("//ldml/numbers/currencies/currency[@type=") 289 // || path.startsWith("//ldml/dates/calendars/calendar") && 290 // !path.startsWith("//ldml/dates/calendars/calendar[@type=\"gregorian\"]") 291 // || 292 // path.startsWith("//ldml/numbers/currencyFormats[@numberSystem=") 293 // && 294 // !path.startsWith("//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]") 295 || (path.contains("[@count=") && !path.contains("[@count=\"other\"]")) 296 || (path.contains("[@ordinal=") && !path.contains("[@ordinal=\"other\"]")) 297 || path.contains("dayPeriod[@type=\"noon\"]")) { 298 continue; 299 } 300 for (LocaleInfo localeInfo : localeInfos.values()) { 301 if (localeInfo == englishInfo) { 302 continue; 303 } 304 if (!localeInfo.paths.contains(path)) { 305 if (path.startsWith("//ldml/dates/calendars/calendar") 306 && !(path.contains("[@type=\"generic\"]") 307 || path.contains("[@type=\"gregorian\"]")) 308 || (path.contains("/eras/") 309 && path.contains("[@alt=\"variant\"]")) // it is OK 310 // for 311 // just 312 // "en" 313 // to 314 // have 315 // /eras/.../era[@type=...][@alt="variant"] 316 || path.contains("[@type=\"japanese\"]") 317 || path.contains("[@type=\"coptic\"]") 318 || path.contains("[@type=\"hebrew\"]") 319 || path.contains("[@type=\"islamic-rgsa\"]") 320 || path.contains("[@type=\"islamic-umalqura\"]") 321 || path.contains("/relative[@type=\"-2\"]") 322 || path.contains("/relative[@type=\"2\"]") 323 || path.startsWith("//ldml/contextTransforms/contextTransformUsage") 324 || path.contains("[@alt=\"variant\"]") 325 || path.contains("[@alt=\"formal\"]") 326 || path.contains("[@type=\"pressure-gasoline-energy-density\"]") 327 || (path.contains("dayPeriod[@type=") 328 && (path.endsWith("1\"]") 329 || path.endsWith("\"am\"]") 330 || path.endsWith("\"pm\"]") 331 || path.endsWith( 332 "\"midnight\"]"))) // morning1, afternoon1, ... 333 || (localeInfo.locale.equals("root") 334 && (path.startsWith( 335 "//ldml/characters/exemplarCharacters[@type=\"index\"]") 336 || (path.startsWith("//ldml/units/unitLength") // two 337 // aliased 338 // paths in 339 // root 340 && (path.contains("[@type=\"energy-foodcalorie") 341 || path.contains( 342 "[@type=\"graphics-dot"))))) 343 // //ldml/characters/exemplarCharacters[@type="index"][root] 344 ) { 345 continue; 346 } 347 String localeAndStatus = 348 localeInfo.locale + (englishInfo.cldrFile.isHere(path) ? "" : "*"); 349 missingPathsToLocales.put(path, localeAndStatus); 350 // English contains the path, and the target locale doesn't. 351 // The * means that the value is inherited (eg from root). 352 } 353 } 354 } 355 356 for (LocaleInfo localeInfo : localeInfos.values()) { 357 if (localeInfo == englishInfo) { 358 continue; 359 } 360 for (String path : localeInfo.paths) { 361 if (path.contains("[@numberSystem=\"arab\"]") 362 || path.contains("[@type=\"japanese\"]") 363 || path.contains("[@type=\"coptic\"]") 364 || path.contains("[@type=\"hebrew\"]") 365 || path.contains("[@type=\"islamic-rgsa\"]") 366 || path.contains("[@type=\"islamic-umalqura\"]") 367 || path.contains("/relative[@type=\"-2\"]") 368 || path.contains("/relative[@type=\"2\"]")) { 369 continue; 370 } 371 if (!englishInfo.paths.contains(path)) { 372 String localeAndStatus = 373 localeInfo.locale + (localeInfo.cldrFile.isHere(path) ? "" : "*"); 374 extraPathsToLocales.put(path, localeAndStatus); 375 // English doesn't contains the path, and the target locale does. 376 // The * means that the value is inherited (eg from root). 377 } 378 } 379 } 380 381 for (Entry<String, Set<String>> entry : missingPathsToLocales.keyValuesSet()) { 382 String path = entry.getKey(); 383 Set<String> locales = entry.getValue(); 384 Status status = new Status(); 385 String originalLocale = englishInfo.cldrFile.getSourceLocaleID(path, status); 386 String engName = 387 "en" 388 + (englishInfo.cldrFile.isHere(path) 389 ? "" 390 : " (source_locale:" 391 + originalLocale 392 + (path.equals(status.pathWhereFound) 393 ? "" 394 : ", source_path: " + status) 395 + ")"); 396 if (path.startsWith("//ldml/localeDisplayNames/") 397 || path.contains("[@alt=\"accounting\"]") 398 || path.contains("[@alt=\"alphaNextToNumber\"]") // CLDR-14336 399 || path.contains("[@alt=\"ascii\"]") // CLDR-16606 400 || path.contains("[@alt=\"noCurrency\"]") // CLDR-14336 401 || path.startsWith("//ldml/personNames/") // CLDR-15384 402 || path.startsWith("//ldml/typographicNames/styleName") 403 || path.startsWith("//ldml/units")) { 404 logln("+" + engName + ", -" + locales + "\t" + path); 405 } else { 406 errln("+" + engName + ", -" + locales + "\t" + path); 407 } 408 } 409 for (Entry<String, Set<String>> entry : extraPathsToLocales.keyValuesSet()) { 410 String path = entry.getKey(); 411 Set<String> locales = entry.getValue(); 412 if (path.startsWith("//ldml/localeDisplayNames/") 413 || path.startsWith("//ldml/numbers/otherNumberingSystems/") 414 // || path.contains("[@alt=\"accounting\"]") 415 ) { 416 logln("-en, +" + locales + "\t" + path); 417 } else { 418 logln("-en, +" + locales + "\t" + path); 419 } 420 } 421 422 // for (String locale : new String[] { "fr", "ar", "ja" }) { 423 // CLDRFile cldrFile = cldrFactory.make(locale, true); 424 // Set<String> s = (Set<String>) cldrFile.getExtraPaths(new 425 // TreeSet<String>()); 426 // System.out.println("Extras for " + locale); 427 // for (String path : s) { 428 // System.out.println(path + " => " + cldrFile.getStringValue(path)); 429 // } 430 // System.out.println("Already in " + locale); 431 // for (Iterator<String> it = 432 // cldrFile.iterator(PatternCache.get(".*\\[@count=.*").matcher("")); 433 // it.hasNext();) { 434 // String path = it.next(); 435 // System.out.println(path + " => " + cldrFile.getStringValue(path)); 436 // } 437 // } 438 } 439 440 // public void testDraftFilter() { 441 // Factory cldrFactory = Factory.make(CldrUtility.MAIN_DIRECTORY, ".*", 442 // DraftStatus.approved); 443 // checkLocale(cldrFactory.make("root", true)); 444 // checkLocale(cldrFactory.make("ee", true)); 445 // } 446 checkLocale(CLDRFile cldr)447 public void checkLocale(CLDRFile cldr) { 448 Matcher m = PatternCache.get("gregorian.*eras").matcher(""); 449 for (Iterator<String> it = cldr.iterator("", new UTF16.StringComparator()); 450 it.hasNext(); ) { 451 String path = it.next(); 452 if (m.reset(path).find() && !path.contains("alias")) { 453 errln( 454 cldr.getLocaleID() 455 + "\t" 456 + cldr.getStringValue(path) 457 + "\t" 458 + cldr.getFullXPath(path)); 459 } 460 if (path == null) { 461 errln("Null path"); 462 } 463 String fullPath = cldr.getFullXPath(path); 464 if (fullPath.contains("@draft")) { 465 errln("File can't contain draft elements"); 466 } 467 } 468 } 469 470 // public void testTimeZonePath() { 471 // Factory cldrFactory = Factory.make(CldrUtility.MAIN_DIRECTORY, ".*"); 472 // String tz = "Pacific/Midway"; 473 // CLDRFile cldrFile = cldrFactory.make("lv", true); 474 // String retVal = cldrFile.getStringValue( 475 // "//ldml/dates/timeZoneNames/zone[@type=\"" + tz + "\"]/exemplarCity" 476 // , true).trim(); 477 // errln(retVal); 478 // } 479 testSimple()480 public void testSimple() { 481 double deltaTime = System.currentTimeMillis(); 482 CLDRFile english = testInfo.getEnglish(); 483 deltaTime = System.currentTimeMillis() - deltaTime; 484 logln("Creation: Elapsed: " + deltaTime / 1000.0 + " seconds"); 485 486 deltaTime = System.currentTimeMillis(); 487 english.getStringValue("//ldml"); 488 deltaTime = System.currentTimeMillis() - deltaTime; 489 logln("Creation: Elapsed: " + deltaTime / 1000.0 + " seconds"); 490 491 deltaTime = System.currentTimeMillis(); 492 english.getStringValue("//ldml"); 493 deltaTime = System.currentTimeMillis() - deltaTime; 494 logln("Caching: Elapsed: " + deltaTime / 1000.0 + " seconds"); 495 496 deltaTime = System.currentTimeMillis(); 497 for (int j = 0; j < 2; ++j) { 498 for (Iterator<String> it = english.iterator(); it.hasNext(); ) { 499 String dpath = it.next(); 500 String value = english.getStringValue(dpath); 501 Set<String> paths = english.getPathsWithValue(value, "", null, null); 502 if (paths.size() == 0) { 503 continue; 504 } 505 if (!paths.contains(dpath)) { 506 if (DISABLE_TIL_WORKS) { 507 errln("Missing " + dpath + " in " + pathsWithValues(value, paths)); 508 } 509 } 510 if (paths.size() > 1) { 511 Set<String> nonAliased = getNonAliased(paths, english); 512 if (nonAliased.size() > 1) { 513 logln(pathsWithValues(value, nonAliased)); 514 } 515 } 516 } 517 } 518 deltaTime = System.currentTimeMillis() - deltaTime; 519 logln("Elapsed: " + deltaTime / 1000.0 + " seconds"); 520 } 521 pathsWithValues(String value, Set<String> paths)522 private String pathsWithValues(String value, Set<String> paths) { 523 return paths.size() 524 + " paths with: <" 525 + value 526 + ">\t\tPaths: " 527 + paths.iterator().next() 528 + ",..."; 529 } 530 getNonAliased(Set<String> paths, CLDRFile file)531 private Set<String> getNonAliased(Set<String> paths, CLDRFile file) { 532 Set<String> result = new LinkedHashSet<>(); 533 for (String path : paths) { 534 if (file.isHere(path)) { 535 result.add(path); 536 } 537 } 538 return result; 539 } 540 testResolution()541 public void testResolution() { 542 CLDRFile german = testInfo.getCLDRFile("de", true); 543 CLDRFile bal = testInfo.getCLDRFile("bal", true); 544 // Test direct lookup. 545 546 String xpath = "//ldml/localeDisplayNames/localeDisplayPattern/localeSeparator"; 547 String id = bal.getSourceLocaleID(xpath, null); 548 if (!id.equals("bal")) { 549 errln("Expected bal but was " + id + " for " + xpath); 550 } 551 552 // Test aliasing. 553 xpath = 554 "//ldml/dates/calendars/calendar[@type=\"islamic-civil\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"yyyyMEd\"]"; 555 id = german.getSourceLocaleID(xpath, null); 556 if (!id.equals("de")) { 557 errln("Expected de but was " + id + " for " + xpath); 558 } 559 560 // Test lookup that falls to root. 561 xpath = 562 "//ldml/dates/calendars/calendar[@type=\"coptic\"]/months/monthContext[@type=\"stand-alone\"]/monthWidth[@type=\"narrow\"]/month[@type=\"5\"]"; 563 id = german.getSourceLocaleID(xpath, null); 564 if (!id.equals("root")) { 565 errln("Expected root but was " + id + " for " + xpath); 566 } 567 } 568 569 static final NumberFormat percent = NumberFormat.getPercentInstance(); 570 571 static final class Size { 572 int items; 573 int chars; 574 add(String topValue)575 public void add(String topValue) { 576 items++; 577 chars += topValue.length(); 578 } 579 over(Size base)580 public String over(Size base) { 581 return "items: " 582 + items 583 + "(" 584 + percent.format(items / (0.0 + base.items)) 585 + "); " 586 + "chars: " 587 + chars 588 + "(" 589 + percent.format(chars / (0.0 + base.chars)) 590 + ")"; 591 } 592 } 593 testGeorgeBailey()594 public void testGeorgeBailey() { 595 PathHeader.Factory phf = PathHeader.getFactory(testInfo.getEnglish()); 596 for (String locale : Arrays.asList("de", "de_AT", "en", "nl")) { 597 CLDRFile cldrFile = testInfo.getCLDRFile(locale, true); 598 599 CLDRFile cldrFileUnresolved = testInfo.getCLDRFile(locale, false); 600 Status status = new Status(); 601 Output<String> localeWhereFound = new Output<>(); 602 Output<String> pathWhereFound = new Output<>(); 603 604 Map<String, String> diff = new TreeMap<>(CLDRFile.getComparator(DtdType.ldml)); 605 606 Size countSuperfluous = new Size(); 607 Size countExtraLevel = new Size(); 608 Size countOrdinary = new Size(); 609 610 for (String path : cldrFile.fullIterable()) { 611 String baileyValue = 612 cldrFile.getBaileyValue(path, pathWhereFound, localeWhereFound); 613 String topValue = cldrFileUnresolved.getStringValue(path); 614 String resolvedValue = cldrFile.getStringValue(path); 615 String unresolvedConstructedValue = cldrFileUnresolved.getConstructedValue(path); 616 String resolvedConstructedValue = cldrFile.getConstructedValue(path); 617 618 // assertEquals("x≠y", "x", "y"); // expected x, got y 619 if (unresolvedConstructedValue != null) { 620 assertEquals( 621 "uc≠rc\t" + locale + "\t" + phf.fromPath(path), 622 unresolvedConstructedValue, 623 resolvedConstructedValue); 624 } 625 626 // if there is a value, then either it is at the top level or it 627 // is the bailey value. 628 // OR it is INHERITANCE_MARKER 629 630 if (resolvedValue != null) { 631 if (topValue != null && !CldrUtility.INHERITANCE_MARKER.equals(topValue)) { 632 if (!topValue.equals(cldrFileUnresolved.getConstructedValue(path))) { 633 assertEquals( 634 "top≠resolved\t" + locale + "\t" + phf.fromPath(path), 635 topValue, 636 resolvedValue); 637 } 638 } else { 639 String locale2 = cldrFile.getSourceLocaleID(path, status); 640 if (!assertEquals( 641 "bailey value≠\t" + locale + "\t" + phf.fromPath(path), 642 resolvedValue, 643 baileyValue)) { 644 baileyValue = 645 cldrFile.getBaileyValue(path, pathWhereFound, localeWhereFound); 646 topValue = cldrFileUnresolved.getStringValue(path); 647 } 648 if (!assertEquals( 649 "bailey locale≠\t" + locale + "\t" + phf.fromPath(path), 650 locale2, 651 localeWhereFound.value)) { 652 baileyValue = 653 cldrFile.getBaileyValue(path, pathWhereFound, localeWhereFound); 654 topValue = cldrFileUnresolved.getStringValue(path); 655 } 656 if (!assertEquals( 657 "bailey path≠\t" + locale + "\t" + phf.fromPath(path), 658 status.pathWhereFound, 659 pathWhereFound.value)) { 660 baileyValue = 661 cldrFile.getBaileyValue(path, pathWhereFound, localeWhereFound); 662 topValue = cldrFileUnresolved.getStringValue(path); 663 } 664 } 665 } 666 667 if (topValue != null) { 668 if (CldrUtility.equals(topValue, baileyValue)) { 669 countSuperfluous.add(topValue); 670 } else if (sdi.getCoverageLevel(path, locale).compareTo(Level.MODERN) > 0) { 671 countExtraLevel.add(topValue); 672 } 673 countOrdinary.add(topValue); 674 } 675 } 676 logln("Superfluous (" + locale + "):\t" + countSuperfluous.over(countOrdinary)); 677 logln(">Modern (" + locale + "):\t" + countExtraLevel.over(countOrdinary)); 678 for (Entry<String, String> entry : diff.entrySet()) { 679 logln(locale + "\t" + phf.fromPath(entry.getKey()) + ";\t" + entry.getValue()); 680 } 681 } 682 } 683 TestConstructedValue()684 public void TestConstructedValue() { 685 CLDRFile eng = CLDRConfig.getInstance().getEnglish(); 686 687 String prefix = GlossonymConstructor.PATH_PREFIX; 688 String display = eng.getConstructedValue(prefix + "zh_Hans" + "\"]"); 689 assertEquals("contructed value", "Chinese (Simplified)", display); 690 display = eng.getConstructedValue(prefix + "es_US" + "\"]"); 691 assertEquals("contructed value", "Spanish (United States)", display); 692 display = eng.getConstructedValue(prefix + "es_US" + "\"][@alt=\"short\"]"); 693 assertEquals("contructed value", "Spanish (US)", display); 694 display = eng.getConstructedValue(prefix + "es" + "\"]"); 695 assertEquals("contructed value", null, display); 696 display = eng.getConstructedValue(prefix + "missing" + "\"]"); 697 assertEquals("contructed value", null, display); 698 } 699 TestFileLocations()700 public void TestFileLocations() { 701 File mainDir = new File(CLDRPaths.MAIN_DIRECTORY); 702 if (!mainDir.isDirectory()) { 703 throw new IllegalArgumentException( 704 "MAIN_DIRECTORY is not a directory: " + CLDRPaths.MAIN_DIRECTORY); 705 } 706 File mainCollationDir = new File(CLDRPaths.COLLATION_DIRECTORY); 707 if (!mainCollationDir.isDirectory()) { 708 throw new IllegalArgumentException( 709 "COLLATION_DIRECTORY is not a directory: " + CLDRPaths.COLLATION_DIRECTORY); 710 } 711 if (CLDRConfig.SKIP_SEED) { 712 return; 713 } 714 File seedDir = new File(CLDRPaths.SEED_DIRECTORY); 715 if (!seedDir.isDirectory()) { 716 throw new IllegalArgumentException( 717 "SEED_DIRECTORY is not a directory: " + CLDRPaths.SEED_DIRECTORY); 718 } 719 File seedCollationDir = new File(CLDRPaths.SEED_COLLATION_DIRECTORY); 720 if (!seedCollationDir.isDirectory()) { 721 throw new IllegalArgumentException( 722 "SEED_COLLATION_DIRECTORY is not a directory: " 723 + CLDRPaths.SEED_COLLATION_DIRECTORY); 724 } 725 726 File[] md = {mainDir, mainCollationDir}; 727 File[] sd = {seedDir, seedCollationDir}; 728 Factory mf = SimpleFactory.make(md, ".*", DraftStatus.unconfirmed); 729 Factory sf = SimpleFactory.make(sd, ".*", DraftStatus.unconfirmed); 730 Set<CLDRLocale> mainLocales = mf.getAvailableCLDRLocales(); 731 Set<CLDRLocale> seedLocales = sf.getAvailableCLDRLocales(); 732 mainLocales.retainAll(seedLocales); 733 mainLocales.remove(CLDRLocale.getInstance("root")); // allow multiple roots 734 if (!mainLocales.isEmpty()) { 735 errln( 736 "CLDR locale files located in both common and seed ==> " 737 + mainLocales.toString()); 738 } 739 } 740 TestForStrayFiles()741 public void TestForStrayFiles() { 742 TreeSet<String> mainList = 743 new TreeSet<>(Arrays.asList(new File(CLDRPaths.MAIN_DIRECTORY).list())); 744 745 for (String dir : DtdType.ldml.directories) { 746 Set<String> dirFiles = 747 new TreeSet<>( 748 Arrays.asList( 749 new File(CLDRPaths.BASE_DIRECTORY + "common/" + dir).list())); 750 if (!mainList.containsAll(dirFiles)) { 751 dirFiles.removeAll(mainList); 752 errln(dir + "/ has extra files not in main/: " + dirFiles); 753 } 754 } 755 } 756 TestFileIds()757 public void TestFileIds() { 758 Output<Map<String, Multimap<LdmlDir, Source>>> localeToDirToSource = new Output<>(); 759 Map<LdmlDir, Multimap<String, Source>> dirToLocaleToSource = getFiles(localeToDirToSource); 760 761 for (Entry<String, Multimap<LdmlDir, Source>> e : localeToDirToSource.value.entrySet()) { 762 String locale = e.getKey(); 763 if (locale.equals("root")) { 764 continue; // allow multiple root locales 765 } 766 Map<LdmlDir, Collection<Source>> value = e.getValue().asMap(); 767 for (Entry<LdmlDir, Collection<Source>> e2 : value.entrySet()) { 768 LdmlDir dir = e2.getKey(); 769 Collection<Source> sources = e2.getValue(); 770 if (sources.size() != 1) { 771 errln( 772 "Can only one have 1 instance of " 773 + locale 774 + " in " 775 + dir 776 + ", but have in " 777 + sources); 778 } 779 } 780 } 781 782 LikelySubtags likelySubtags = new LikelySubtags(); 783 784 for (Entry<LdmlDir, Multimap<String, Source>> dirAndLocaleToSource : 785 dirToLocaleToSource.entrySet()) { 786 LdmlDir ldmlDir = dirAndLocaleToSource.getKey(); 787 Multimap<String, Source> localesToDirs = dirAndLocaleToSource.getValue(); 788 for (Entry<String, Source> localeAndDir : localesToDirs.entries()) { 789 String loc = localeAndDir.getKey(); 790 if (loc.equals("root")) { 791 continue; 792 } 793 Source source = localeAndDir.getValue(); 794 String parent = LocaleIDParser.getParent(loc); 795 String parent2 = LanguageTagParser.getSimpleParent(loc); 796 if (parent2.isEmpty()) { 797 parent2 = "root"; 798 } 799 String likely = likelySubtags.minimize(loc); 800 if (!localesToDirs.containsKey(parent)) { 801 errln( 802 "Missing parent (" 803 + parent 804 + ") for " 805 + loc 806 + " in " 807 + source 808 + "/" 809 + ldmlDir 810 + "; likely=" 811 + likely); 812 } 813 if (!Objects.equals(parent, parent2) && !localesToDirs.containsKey(parent2)) { 814 errln( 815 "Missing simple parent (" 816 + parent2 817 + ") for " 818 + loc 819 + " in " 820 + source 821 + "/" 822 + ldmlDir 823 + "; likely=" 824 + likely); 825 } 826 } 827 828 // establish that the parent of locale is somewhere in the same 829 // assertEquals(dir + " locale file has minimal id: ", min, loc); 830 // if (!dir.endsWith("exemplars")) { 831 // continue; 832 // } 833 // String trans = ltc.transform(loc); 834 // System.out.println("\t" + min + "\t" + loc + "\t" + trans); 835 } 836 } 837 838 enum Source { 839 common, 840 seed, 841 exemplars 842 } 843 844 enum LdmlDir { 845 main, 846 annotations, 847 annotationsDerived, 848 casing, 849 collation, 850 rbnf, 851 segments, 852 subdivisions 853 } 854 855 /** 856 * Returns a map from directory (eg main) to its parent (eg seed) and to their children (locales 857 * in seed/main) 858 */ getFiles( Output<Map<String, Multimap<LdmlDir, Source>>> localeToDirToSource)859 private Map<LdmlDir, Multimap<String, Source>> getFiles( 860 Output<Map<String, Multimap<LdmlDir, Source>>> localeToDirToSource) { 861 862 Map<LdmlDir, Multimap<String, Source>> _dirToLocaleToSource = new TreeMap<>(); 863 Map<String, Multimap<LdmlDir, Source>> _localeToDirToSource = new TreeMap<>(); 864 865 for (String base : new File(CLDRPaths.BASE_DIRECTORY).list()) { 866 Source source; 867 try { 868 source = Source.valueOf(base); 869 } catch (Exception e) { 870 continue; 871 } 872 String fullBase = CLDRPaths.BASE_DIRECTORY + base; 873 File fullBaseFile = new File(fullBase); 874 if (!fullBaseFile.isDirectory()) { 875 continue; 876 } 877 878 for (String sub1 : fullBaseFile.list()) { 879 if (!DtdType.ldml.directories.contains(sub1)) { 880 continue; 881 } 882 LdmlDir ldmlDir = LdmlDir.valueOf(sub1); 883 String dir = fullBase + "/" + ldmlDir; 884 for (String loc : new File(dir).list()) { 885 if (!loc.endsWith(".xml")) { 886 continue; 887 } 888 loc = loc.substring(0, loc.length() - 4); 889 890 put(_localeToDirToSource, loc, ldmlDir, source); 891 put(_dirToLocaleToSource, ldmlDir, loc, source); 892 } 893 } 894 } 895 localeToDirToSource.value = 896 ImmutableMap.copyOf(_localeToDirToSource); // TODO protect subtrees 897 return ImmutableMap.copyOf(_dirToLocaleToSource); 898 } 899 put(Map<A, Multimap<B, C>> aToBToC, A a, B b, C c)900 private <A, B, C> void put(Map<A, Multimap<B, C>> aToBToC, A a, B b, C c) { 901 Multimap<B, C> dirToSource = aToBToC.get(a); 902 if (dirToSource == null) { 903 aToBToC.put(a, dirToSource = (Multimap<B, C>) TreeMultimap.create()); 904 } 905 dirToSource.put(b, c); 906 } 907 TestSwissHighGerman()908 public void TestSwissHighGerman() { 909 CLDRFile swissHighGerman = testInfo.getCommonSeedExemplarsFactory().make("de_CH", true); 910 for (String xpath : swissHighGerman) { 911 if (xpath.equals("//ldml/characters/exemplarCharacters[@type=\"auxiliary\"]")) { 912 continue; 913 } 914 String value = swissHighGerman.getStringValue(xpath); 915 if (value.indexOf('ß') >= 0) { 916 warnln("«" + value + "» contains ß at " + xpath); 917 } 918 } 919 } 920 TestExtraPaths()921 public void TestExtraPaths() { 922 List<String> testCases = 923 Arrays.asList( 924 "//ldml/localeDisplayNames/languages/language[@type=\"ccp\"]", 925 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"Gy\"]/greatestDifference[@id=\"G\"]"); 926 CLDRFile af = testInfo.getCldrFactory().make("af", true); 927 Set<String> missing = new HashSet<>(testCases); 928 CoverageLevel2 coverageLevel2 = CoverageLevel2.getInstance("af"); 929 PathHeader.Factory pathHeaderFactory = PathHeader.getFactory(testInfo.getEnglish()); 930 Status status = new Status(); 931 932 for (String xpath : af) { 933 if (missing.contains(xpath)) { 934 String value = af.getStringValue(xpath); 935 String source = af.getSourceLocaleID(xpath, status); 936 Level level = coverageLevel2.getLevel(xpath); 937 PathHeader ph = pathHeaderFactory.fromPath(xpath); 938 getLogger() 939 .fine( 940 "" 941 + "\nPathHeader:\t" 942 + ph 943 + "\nValue:\t" 944 + value 945 + "\nLevel:\t" 946 + level 947 + "\nReq. Locale:\t" 948 + "af" 949 + "\nSource Locale:\t" 950 + source 951 + "\nReq. XPath:\t" 952 + xpath 953 + "\nSource Path:\t" 954 + status); 955 missing.remove(xpath); 956 } 957 } 958 assertTrue("Should be empty", missing.isEmpty()); 959 } 960 TestExtraPaths13954()961 public void TestExtraPaths13954() { 962 CLDRFile es = cldrFactory.make("es", true); 963 CLDRFile es_US = cldrFactory.make("es_US", true); 964 if (!es_US.getRawExtraPaths().containsAll(es.getRawExtraPaths())) { 965 errln( 966 "Failure: " 967 + Joiner.on('\n') 968 .join( 969 Sets.difference( 970 es.getRawExtraPaths(), 971 es_US.getRawExtraPaths()))); 972 } 973 } 974 testEnglishSideways()975 public void testEnglishSideways() { 976 CLDRFile fr = cldrFactory.make("fr", true); 977 CLDRFile en = cldrFactory.make("en", true); 978 System.out.println(); 979 for (String path : fr.fullIterable()) { 980 if (!path.startsWith("//ldml/units") || path.endsWith("/gender")) { 981 continue; 982 } 983 Status status = new Status(); 984 String localeWhereFound = en.getSourceLocaleID(path, status); 985 if (!Objects.equals(path, status.pathWhereFound)) { 986 XPathParts pathParts = XPathParts.getFrozenInstance(path); 987 String type = pathParts.getAttributeValue(3, "type"); 988 XPathParts foundPathParts = XPathParts.getFrozenInstance(status.pathWhereFound); 989 String foundType = foundPathParts.getAttributeValue(3, "type"); 990 if (Objects.equals(type, foundType)) { 991 continue; // ok to go sideways within type 992 } 993 System.out.println( 994 String.format("%s\t%s\t%s", path, status.pathWhereFound, localeWhereFound)); 995 } 996 } 997 } 998 } 999