1 package org.unicode.cldr.unittest; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.collect.ImmutableMap; 5 import com.google.common.collect.Multimap; 6 import com.google.common.collect.Sets; 7 import com.google.common.collect.TreeMultimap; 8 import com.ibm.icu.dev.test.TestFmwk; 9 import com.ibm.icu.impl.Relation; 10 import com.ibm.icu.impl.Row.R2; 11 import com.ibm.icu.text.NumberFormat; 12 import com.ibm.icu.text.UTF16; 13 import com.ibm.icu.text.UnicodeSet; 14 import com.ibm.icu.util.Output; 15 import java.io.File; 16 import java.util.ArrayList; 17 import java.util.Arrays; 18 import java.util.Collection; 19 import java.util.HashSet; 20 import java.util.Iterator; 21 import java.util.LinkedHashMap; 22 import java.util.LinkedHashSet; 23 import java.util.List; 24 import java.util.Map; 25 import java.util.Map.Entry; 26 import java.util.Objects; 27 import java.util.Set; 28 import java.util.TreeMap; 29 import java.util.TreeSet; 30 import java.util.regex.Matcher; 31 import java.util.regex.Pattern; 32 import org.unicode.cldr.test.CoverageLevel2; 33 import org.unicode.cldr.tool.LikelySubtags; 34 import org.unicode.cldr.util.CLDRConfig; 35 import org.unicode.cldr.util.CLDRFile; 36 import org.unicode.cldr.util.CLDRFile.DraftStatus; 37 import org.unicode.cldr.util.CLDRFile.Status; 38 import org.unicode.cldr.util.CLDRLocale; 39 import org.unicode.cldr.util.CLDRPaths; 40 import org.unicode.cldr.util.CldrUtility; 41 import org.unicode.cldr.util.Counter; 42 import org.unicode.cldr.util.DtdType; 43 import org.unicode.cldr.util.Factory; 44 import org.unicode.cldr.util.GlossonymConstructor; 45 import org.unicode.cldr.util.GrammarInfo; 46 import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature; 47 import org.unicode.cldr.util.GrammarInfo.GrammaticalTarget; 48 import org.unicode.cldr.util.LanguageTagParser; 49 import org.unicode.cldr.util.Level; 50 import org.unicode.cldr.util.LocaleIDParser; 51 import org.unicode.cldr.util.PathHeader; 52 import org.unicode.cldr.util.PathHeader.PageId; 53 import org.unicode.cldr.util.PathHeader.SectionId; 54 import org.unicode.cldr.util.PatternCache; 55 import org.unicode.cldr.util.PatternPlaceholders; 56 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderStatus; 57 import org.unicode.cldr.util.SimpleFactory; 58 import org.unicode.cldr.util.SupplementalDataInfo; 59 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 60 import org.unicode.cldr.util.SupplementalDataInfo.PluralType; 61 import org.unicode.cldr.util.XPathParts; 62 63 /** 64 * This is the original TestFwmk test case for CLDRFile. 65 * 66 * @see {@link org.unicode.cldr.util.TestCLDRFile} 67 * @see {@link org.unicode.cldr.util.CLDRFile} 68 */ 69 public class TestCLDRFile extends TestFmwk { 70 private static final boolean DISABLE_TIL_WORKS = false; 71 72 static CLDRConfig testInfo = CLDRConfig.getInstance(); 73 static SupplementalDataInfo sdi = testInfo.getSupplementalDataInfo(); 74 main(String[] args)75 public static void main(String[] args) { 76 new TestCLDRFile().run(args); 77 } 78 testFallbackNames()79 public void testFallbackNames() { 80 String[][] tests = { 81 {"zh-Hanb", "Chinese (Han with Bopomofo)"}, 82 {"aaa", "Ghotuo"}, 83 {"zh-RR", "Chinese (RR)"}, 84 {"new_Newa_NP", "Newari (Newa, Nepal)"}, 85 }; 86 CLDRFile english = testInfo.getEnglish(); 87 for (String[] test : tests) { 88 assertEquals("", test[1], english.getName(test[0])); 89 } 90 } 91 92 // verify for all paths, if there is a count="other", then there is a 93 // count="x", for all x in keywords testPlurals()94 public void testPlurals() { 95 for (String locale : new String[] {"fr", "en", "root", "ar", "ja"}) { 96 checkPlurals(locale); 97 } 98 } 99 100 static final Pattern COUNT_MATCHER = Pattern.compile("\\[@count=\"([^\"]+)\"]"); 101 102 static final UnicodeSet DIGITS = new UnicodeSet('0', '9').freeze(); 103 checkPlurals(String locale)104 private void checkPlurals(String locale) { 105 CLDRFile cldrFile = testInfo.getCLDRFile(locale, true); 106 Matcher m = COUNT_MATCHER.matcher(""); 107 Relation<String, String> skeletonToKeywords = 108 Relation.of( 109 new TreeMap<String, Set<String>>(cldrFile.getComparator()), TreeSet.class); 110 PluralInfo plurals = sdi.getPlurals(PluralType.cardinal, locale); 111 for (String path : cldrFile.fullIterable()) { 112 if (!path.contains("@count")) { 113 continue; 114 } 115 if (!m.reset(path).find()) { 116 throw new IllegalArgumentException(); 117 } 118 if (DIGITS.containsAll(m.group(1))) { 119 continue; 120 } 121 String skeleton = path.substring(0, m.start(1)) + ".*" + path.substring(m.end(1)); 122 skeletonToKeywords.put(skeleton, m.group(1)); 123 } 124 Set<String> normalKeywords = plurals.getAdjustedCountStrings(); 125 126 for (Entry<String, Set<String>> entry : skeletonToKeywords.keyValuesSet()) { 127 final String abbreviatedPath = entry.getKey(); 128 Set<String> expected = normalKeywords; 129 if (abbreviatedPath.startsWith("//ldml/numbers/minimalPairs/pluralMinimalPairs")) { 130 expected = plurals.getCanonicalKeywords(); 131 } 132 assertEquals( 133 "Incorrect keywords: " + locale + ", " + abbreviatedPath, 134 expected, 135 entry.getValue()); 136 } 137 } 138 139 static Factory cldrFactory = testInfo.getCldrFactory(); 140 141 static class LocaleInfo { 142 final String locale; 143 final CLDRFile cldrFile; 144 final Set<String> paths = new HashSet<>(); 145 LocaleInfo(String locale)146 LocaleInfo(String locale) { 147 this.locale = locale; 148 cldrFile = testInfo.getCLDRFile(locale, true); 149 for (String path : cldrFile.fullIterable()) { 150 Level level = sdi.getCoverageLevel(path, locale); 151 if (level.compareTo(Level.COMPREHENSIVE) > 0) { 152 continue; 153 } 154 if (path.contains("[@count=") && !path.contains("[@count=\"other\"]")) { 155 continue; 156 } 157 paths.add(path); 158 } 159 } 160 } 161 162 static final boolean DEBUG = false; 163 static final boolean DEBUG_testExtraPaths = true; 164 testExtraPaths()165 public void testExtraPaths() { 166 // for debugging 167 final CLDRFile german = CLDRConfig.getInstance().getCldrFactory().make("de", true); 168 getLogger().fine(""); 169 Set<String> sorted = new TreeSet<>(german.getExtraPaths()); 170 PathHeader.Factory phf = PathHeader.getFactory(); 171 PatternPlaceholders pph = PatternPlaceholders.getInstance(); 172 173 for (String path : sorted) { 174 if (DEBUG_testExtraPaths 175 && path.equals( 176 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"acceleration-g-force\"]/unitPattern")) { 177 List<String> failures = new ArrayList<>(); 178 phf.fromPath(path, failures); 179 } 180 PathHeader ph = phf.fromPath(path); 181 if (ph.getPageId() != PageId.Deprecated) { 182 assertNotEquals( 183 "bad placeholder: " + path + " ; " + ph, 184 SectionId.Special, 185 ph.getSectionId()); 186 } 187 PlaceholderStatus phStatus = pph.getStatus(path); 188 189 PlaceholderStatus expected = 190 path.contains("/metazone") 191 || path.contains("/timeZoneNames") 192 || path.contains("/gender") 193 || path.startsWith("//ldml/numbers/currencies/currency") 194 || path.startsWith("//ldml/personNames/sampleName") 195 || path.contains("/availableFormats") 196 ? PlaceholderStatus.DISALLOWED 197 : path.contains("/compoundUnitPattern1") 198 ? PlaceholderStatus.REQUIRED 199 : PlaceholderStatus.LOCALE_DEPENDENT; 200 if (!assertEquals(path, expected, phStatus)) { 201 int debug = 0; 202 } 203 204 if (DEBUG) { 205 if (GrammaticalFeature.pathHasFeature(path) != null || path.endsWith("/gender")) { 206 System.out.println(path + "\t" + german.getStringValue(path)); 207 String newPath = path.replace("[@case=\"accusative\"]", ""); 208 if (!newPath.contentEquals(path) && !sorted.contains(newPath)) { 209 System.out.println(newPath + "\t" + german.getStringValue(newPath)); 210 } 211 } 212 } 213 } 214 215 Set<String> badCoverage = new TreeSet<>(); 216 Counter<String> extraPaths = new Counter<>(); 217 final Factory fullCldrFactory = CLDRConfig.getInstance().getFullCldrFactory(); 218 219 for (String locale : sdi.hasGrammarInfo()) { 220 if (sdi.getGrammarInfo(locale).hasInfo(GrammaticalTarget.nominal)) { 221 if (!fullCldrFactory.getAvailable().contains(locale)) { 222 if (CLDRConfig.SKIP_SEED) { 223 continue; // don't check if skipping seed, for production 224 } 225 } 226 final CLDRFile cldrFile = fullCldrFactory.make(locale, true); 227 Set<String> sorted2 = new TreeSet<>(cldrFile.getExtraPaths()); 228 for (String path : sorted2) { 229 if (path.contains("speed-beaufort")) { 230 continue; // special case 231 } 232 if (path.contains("/gender") 233 || path.contains("@gender") 234 || path.contains("@case")) { 235 Level level = sdi.getCoverageLevel(path, locale); 236 if (level.compareTo(Level.MODERN) > 0) { 237 badCoverage.add(path); 238 } 239 extraPaths.add(locale, 1); 240 } 241 } 242 } 243 } 244 getLogger().fine("Units with grammar info: " + GrammarInfo.getUnitsToAddGrammar().size()); 245 getLogger().fine("Inflection Paths"); 246 for (R2<Long, String> locale : extraPaths.getEntrySetSortedByCount(false, null)) { 247 getLogger().fine(locale.get0() + "\t" + locale.get1()); 248 } 249 if (!badCoverage.isEmpty()) { 250 errln("Paths not at modern: " + Joiner.on("\n\t").join(badCoverage)); 251 } 252 253 // Set<String> validUnits = 254 // Validity.getInstance().getStatusToCodes(LstrType.unit).get(Validity.Status.regular); 255 // validUnits.forEach(System.out::println); 256 // 257 // grammarInfo = testInfo.getSupplementalDataInfo().getGrammarInfo(); 258 // for (Entry<String, GrammarInfo> entry : grammarInfo.entrySet()) { 259 // System.out.println(entry); 260 // } 261 262 // CLDRFile toCheck = testInfo.getCldrFactory().make("de", true); // 263 // testInfo.getFullCldrFactory().make("en", false); 264 // Set<String> sorted = new TreeSet<>(CLDRFile.getComparator(DtdType.ldml)); 265 // sorted.addAll(toCheck.getExtraPaths()); 266 // for (String path : sorted) { 267 // XPathParts parts = XPathParts.getFrozenInstance(path); 268 // assertEquals("parts: ", parts.toString(), path); 269 // System.out.println(path); 270 // } 271 // int debug = 0; 272 273 Map<String, LocaleInfo> localeInfos = new LinkedHashMap<>(); 274 Relation<String, String> missingPathsToLocales = 275 Relation.of( 276 new TreeMap<String, Set<String>>(CLDRFile.getComparator(DtdType.ldml)), 277 TreeSet.class); 278 Relation<String, String> extraPathsToLocales = 279 Relation.of( 280 new TreeMap<String, Set<String>>(CLDRFile.getComparator(DtdType.ldml)), 281 TreeSet.class); 282 283 for (String locale : new String[] {"en", "root", "fr", "ar", "ja"}) { 284 localeInfos.put(locale, new LocaleInfo(locale)); 285 } 286 LocaleInfo englishInfo = localeInfos.get("en"); 287 for (String path : englishInfo.paths) { 288 if (path.startsWith("//ldml/identity/") 289 || path.startsWith("//ldml/numbers/currencies/currency[@type=") 290 // || path.startsWith("//ldml/dates/calendars/calendar") && 291 // !path.startsWith("//ldml/dates/calendars/calendar[@type=\"gregorian\"]") 292 // || 293 // path.startsWith("//ldml/numbers/currencyFormats[@numberSystem=") 294 // && 295 // !path.startsWith("//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]") 296 || (path.contains("[@count=") && !path.contains("[@count=\"other\"]")) 297 || (path.contains("[@ordinal=") && !path.contains("[@ordinal=\"other\"]")) 298 || path.contains("dayPeriod[@type=\"noon\"]")) { 299 continue; 300 } 301 for (LocaleInfo localeInfo : localeInfos.values()) { 302 if (localeInfo == englishInfo) { 303 continue; 304 } 305 if (!localeInfo.paths.contains(path)) { 306 if (path.startsWith("//ldml/dates/calendars/calendar") 307 && !(path.contains("[@type=\"generic\"]") 308 || path.contains("[@type=\"gregorian\"]")) 309 || (path.contains("/eras/") 310 && path.contains("[@alt=\"variant\"]")) // it is OK 311 // for 312 // just 313 // "en" 314 // to 315 // have 316 // /eras/.../era[@type=...][@alt="variant"] 317 || path.contains("[@type=\"japanese\"]") 318 || path.contains("[@type=\"coptic\"]") 319 || path.contains("[@type=\"hebrew\"]") 320 || path.contains("[@type=\"islamic-rgsa\"]") 321 || path.contains("[@type=\"islamic-umalqura\"]") 322 || path.contains("/relative[@type=\"-2\"]") 323 || path.contains("/relative[@type=\"2\"]") 324 || path.startsWith("//ldml/contextTransforms/contextTransformUsage") 325 || path.contains("[@alt=\"variant\"]") 326 || path.contains("[@alt=\"formal\"]") 327 || path.contains("[@type=\"pressure-gasoline-energy-density\"]") 328 || (path.contains("dayPeriod[@type=") 329 && (path.endsWith("1\"]") 330 || path.endsWith("\"am\"]") 331 || path.endsWith("\"pm\"]") 332 || path.endsWith( 333 "\"midnight\"]"))) // morning1, afternoon1, ... 334 || (localeInfo.locale.equals("root") 335 && (path.startsWith( 336 "//ldml/characters/exemplarCharacters[@type=\"index\"]") 337 || (path.startsWith("//ldml/units/unitLength") // two 338 // aliased 339 // paths in 340 // root 341 && (path.contains("[@type=\"energy-foodcalorie") 342 || path.contains( 343 "[@type=\"graphics-dot"))))) 344 // //ldml/characters/exemplarCharacters[@type="index"][root] 345 ) { 346 continue; 347 } 348 String localeAndStatus = 349 localeInfo.locale + (englishInfo.cldrFile.isHere(path) ? "" : "*"); 350 missingPathsToLocales.put(path, localeAndStatus); 351 // English contains the path, and the target locale doesn't. 352 // The * means that the value is inherited (eg from root). 353 } 354 } 355 } 356 357 for (LocaleInfo localeInfo : localeInfos.values()) { 358 if (localeInfo == englishInfo) { 359 continue; 360 } 361 for (String path : localeInfo.paths) { 362 if (path.contains("[@numberSystem=\"arab\"]") 363 || path.contains("[@type=\"japanese\"]") 364 || path.contains("[@type=\"coptic\"]") 365 || path.contains("[@type=\"hebrew\"]") 366 || path.contains("[@type=\"islamic-rgsa\"]") 367 || path.contains("[@type=\"islamic-umalqura\"]") 368 || path.contains("/relative[@type=\"-2\"]") 369 || path.contains("/relative[@type=\"2\"]")) { 370 continue; 371 } 372 if (!englishInfo.paths.contains(path)) { 373 String localeAndStatus = 374 localeInfo.locale + (localeInfo.cldrFile.isHere(path) ? "" : "*"); 375 extraPathsToLocales.put(path, localeAndStatus); 376 // English doesn't contains the path, and the target locale does. 377 // The * means that the value is inherited (eg from root). 378 } 379 } 380 } 381 382 for (Entry<String, Set<String>> entry : missingPathsToLocales.keyValuesSet()) { 383 String path = entry.getKey(); 384 Set<String> locales = entry.getValue(); 385 Status status = new Status(); 386 String originalLocale = englishInfo.cldrFile.getSourceLocaleID(path, status); 387 String engName = 388 "en" 389 + (englishInfo.cldrFile.isHere(path) 390 ? "" 391 : " (source_locale:" 392 + originalLocale 393 + (path.equals(status.pathWhereFound) 394 ? "" 395 : ", source_path: " + status) 396 + ")"); 397 if (path.startsWith("//ldml/localeDisplayNames/") 398 || path.contains("[@alt=\"accounting\"]") 399 || path.contains("[@alt=\"alphaNextToNumber\"]") // CLDR-14336 400 || path.contains("[@alt=\"ascii\"]") // CLDR-16606 401 || path.contains("[@alt=\"noCurrency\"]") // CLDR-14336 402 || path.startsWith("//ldml/personNames/") // CLDR-15384 403 || path.startsWith("//ldml/typographicNames/styleName") 404 || path.startsWith("//ldml/units")) { 405 logln("+" + engName + ", -" + locales + "\t" + path); 406 } else { 407 errln("+" + engName + ", -" + locales + "\t" + path); 408 } 409 } 410 for (Entry<String, Set<String>> entry : extraPathsToLocales.keyValuesSet()) { 411 String path = entry.getKey(); 412 Set<String> locales = entry.getValue(); 413 if (path.startsWith("//ldml/localeDisplayNames/") 414 || path.startsWith("//ldml/numbers/otherNumberingSystems/") 415 // || path.contains("[@alt=\"accounting\"]") 416 ) { 417 logln("-en, +" + locales + "\t" + path); 418 } else { 419 logln("-en, +" + locales + "\t" + path); 420 } 421 } 422 423 // for (String locale : new String[] { "fr", "ar", "ja" }) { 424 // CLDRFile cldrFile = cldrFactory.make(locale, true); 425 // Set<String> s = (Set<String>) cldrFile.getExtraPaths(new 426 // TreeSet<String>()); 427 // System.out.println("Extras for " + locale); 428 // for (String path : s) { 429 // System.out.println(path + " => " + cldrFile.getStringValue(path)); 430 // } 431 // System.out.println("Already in " + locale); 432 // for (Iterator<String> it = 433 // cldrFile.iterator(PatternCache.get(".*\\[@count=.*").matcher("")); 434 // it.hasNext();) { 435 // String path = it.next(); 436 // System.out.println(path + " => " + cldrFile.getStringValue(path)); 437 // } 438 // } 439 } 440 441 // public void testDraftFilter() { 442 // Factory cldrFactory = Factory.make(CldrUtility.MAIN_DIRECTORY, ".*", 443 // DraftStatus.approved); 444 // checkLocale(cldrFactory.make("root", true)); 445 // checkLocale(cldrFactory.make("ee", true)); 446 // } 447 checkLocale(CLDRFile cldr)448 public void checkLocale(CLDRFile cldr) { 449 Matcher m = PatternCache.get("gregorian.*eras").matcher(""); 450 for (Iterator<String> it = cldr.iterator("", new UTF16.StringComparator()); 451 it.hasNext(); ) { 452 String path = it.next(); 453 if (m.reset(path).find() && !path.contains("alias")) { 454 errln( 455 cldr.getLocaleID() 456 + "\t" 457 + cldr.getStringValue(path) 458 + "\t" 459 + cldr.getFullXPath(path)); 460 } 461 if (path == null) { 462 errln("Null path"); 463 } 464 String fullPath = cldr.getFullXPath(path); 465 if (fullPath.contains("@draft")) { 466 errln("File can't contain draft elements"); 467 } 468 } 469 } 470 471 // public void testTimeZonePath() { 472 // Factory cldrFactory = Factory.make(CldrUtility.MAIN_DIRECTORY, ".*"); 473 // String tz = "Pacific/Midway"; 474 // CLDRFile cldrFile = cldrFactory.make("lv", true); 475 // String retVal = cldrFile.getStringValue( 476 // "//ldml/dates/timeZoneNames/zone[@type=\"" + tz + "\"]/exemplarCity" 477 // , true).trim(); 478 // errln(retVal); 479 // } 480 testSimple()481 public void testSimple() { 482 double deltaTime = System.currentTimeMillis(); 483 CLDRFile english = testInfo.getEnglish(); 484 deltaTime = System.currentTimeMillis() - deltaTime; 485 logln("Creation: Elapsed: " + deltaTime / 1000.0 + " seconds"); 486 487 deltaTime = System.currentTimeMillis(); 488 english.getStringValue("//ldml"); 489 deltaTime = System.currentTimeMillis() - deltaTime; 490 logln("Creation: Elapsed: " + deltaTime / 1000.0 + " seconds"); 491 492 deltaTime = System.currentTimeMillis(); 493 english.getStringValue("//ldml"); 494 deltaTime = System.currentTimeMillis() - deltaTime; 495 logln("Caching: Elapsed: " + deltaTime / 1000.0 + " seconds"); 496 497 deltaTime = System.currentTimeMillis(); 498 for (int j = 0; j < 2; ++j) { 499 for (Iterator<String> it = english.iterator(); it.hasNext(); ) { 500 String dpath = it.next(); 501 String value = english.getStringValue(dpath); 502 Set<String> paths = english.getPathsWithValue(value, "", null, null); 503 if (paths.size() == 0) { 504 continue; 505 } 506 if (!paths.contains(dpath)) { 507 if (DISABLE_TIL_WORKS) { 508 errln("Missing " + dpath + " in " + pathsWithValues(value, paths)); 509 } 510 } 511 if (paths.size() > 1) { 512 Set<String> nonAliased = getNonAliased(paths, english); 513 if (nonAliased.size() > 1) { 514 logln(pathsWithValues(value, nonAliased)); 515 } 516 } 517 } 518 } 519 deltaTime = System.currentTimeMillis() - deltaTime; 520 logln("Elapsed: " + deltaTime / 1000.0 + " seconds"); 521 } 522 pathsWithValues(String value, Set<String> paths)523 private String pathsWithValues(String value, Set<String> paths) { 524 return paths.size() 525 + " paths with: <" 526 + value 527 + ">\t\tPaths: " 528 + paths.iterator().next() 529 + ",..."; 530 } 531 getNonAliased(Set<String> paths, CLDRFile file)532 private Set<String> getNonAliased(Set<String> paths, CLDRFile file) { 533 Set<String> result = new LinkedHashSet<>(); 534 for (String path : paths) { 535 if (file.isHere(path)) { 536 result.add(path); 537 } 538 } 539 return result; 540 } 541 testResolution()542 public void testResolution() { 543 CLDRFile german = testInfo.getCLDRFile("de", true); 544 CLDRFile bal = testInfo.getCLDRFile("bal", true); 545 // Test direct lookup. 546 547 String xpath = "//ldml/localeDisplayNames/localeDisplayPattern/localeSeparator"; 548 String id = bal.getSourceLocaleID(xpath, null); 549 if (!id.equals("bal")) { 550 errln("Expected bal but was " + id + " for " + xpath); 551 } 552 553 // Test aliasing. 554 xpath = 555 "//ldml/dates/calendars/calendar[@type=\"islamic-civil\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"yyyyMEd\"]"; 556 id = german.getSourceLocaleID(xpath, null); 557 if (!id.equals("de")) { 558 errln("Expected de but was " + id + " for " + xpath); 559 } 560 561 // Test lookup that falls to root. 562 xpath = 563 "//ldml/dates/calendars/calendar[@type=\"coptic\"]/months/monthContext[@type=\"stand-alone\"]/monthWidth[@type=\"narrow\"]/month[@type=\"5\"]"; 564 id = german.getSourceLocaleID(xpath, null); 565 if (!id.equals("root")) { 566 errln("Expected root but was " + id + " for " + xpath); 567 } 568 } 569 570 static final NumberFormat percent = NumberFormat.getPercentInstance(); 571 572 static final class Size { 573 int items; 574 int chars; 575 add(String topValue)576 public void add(String topValue) { 577 items++; 578 chars += topValue.length(); 579 } 580 over(Size base)581 public String over(Size base) { 582 return "items: " 583 + items 584 + "(" 585 + percent.format(items / (0.0 + base.items)) 586 + "); " 587 + "chars: " 588 + chars 589 + "(" 590 + percent.format(chars / (0.0 + base.chars)) 591 + ")"; 592 } 593 } 594 testGeorgeBailey()595 public void testGeorgeBailey() { 596 PathHeader.Factory phf = PathHeader.getFactory(testInfo.getEnglish()); 597 for (String locale : Arrays.asList("de", "de_AT", "en", "nl")) { 598 CLDRFile cldrFile = testInfo.getCLDRFile(locale, true); 599 600 CLDRFile cldrFileUnresolved = testInfo.getCLDRFile(locale, false); 601 Status status = new Status(); 602 Output<String> localeWhereFound = new Output<>(); 603 Output<String> pathWhereFound = new Output<>(); 604 605 Map<String, String> diff = new TreeMap<>(CLDRFile.getComparator(DtdType.ldml)); 606 607 Size countSuperfluous = new Size(); 608 Size countExtraLevel = new Size(); 609 Size countOrdinary = new Size(); 610 611 for (String path : cldrFile.fullIterable()) { 612 String baileyValue = 613 cldrFile.getBaileyValue(path, pathWhereFound, localeWhereFound); 614 String topValue = cldrFileUnresolved.getStringValue(path); 615 String resolvedValue = cldrFile.getStringValue(path); 616 String unresolvedConstructedValue = cldrFileUnresolved.getConstructedValue(path); 617 String resolvedConstructedValue = cldrFile.getConstructedValue(path); 618 619 // assertEquals("x≠y", "x", "y"); // expected x, got y 620 if (unresolvedConstructedValue != null) { 621 assertEquals( 622 "uc≠rc\t" + locale + "\t" + phf.fromPath(path), 623 unresolvedConstructedValue, 624 resolvedConstructedValue); 625 } 626 627 // if there is a value, then either it is at the top level or it 628 // is the bailey value. 629 // OR it is INHERITANCE_MARKER 630 631 if (resolvedValue != null) { 632 if (topValue != null && !CldrUtility.INHERITANCE_MARKER.equals(topValue)) { 633 if (!topValue.equals(cldrFileUnresolved.getConstructedValue(path))) { 634 assertEquals( 635 "top≠resolved\t" + locale + "\t" + phf.fromPath(path), 636 topValue, 637 resolvedValue); 638 } 639 } else { 640 String locale2 = cldrFile.getSourceLocaleID(path, status); 641 if (!assertEquals( 642 "bailey value≠\t" + locale + "\t" + phf.fromPath(path), 643 resolvedValue, 644 baileyValue)) { 645 baileyValue = 646 cldrFile.getBaileyValue(path, pathWhereFound, localeWhereFound); 647 topValue = cldrFileUnresolved.getStringValue(path); 648 } 649 if (!assertEquals( 650 "bailey locale≠\t" + locale + "\t" + phf.fromPath(path), 651 locale2, 652 localeWhereFound.value)) { 653 baileyValue = 654 cldrFile.getBaileyValue(path, pathWhereFound, localeWhereFound); 655 topValue = cldrFileUnresolved.getStringValue(path); 656 } 657 if (!assertEquals( 658 "bailey path≠\t" + locale + "\t" + phf.fromPath(path), 659 status.pathWhereFound, 660 pathWhereFound.value)) { 661 baileyValue = 662 cldrFile.getBaileyValue(path, pathWhereFound, localeWhereFound); 663 topValue = cldrFileUnresolved.getStringValue(path); 664 } 665 } 666 } 667 668 if (topValue != null) { 669 if (CldrUtility.equals(topValue, baileyValue)) { 670 countSuperfluous.add(topValue); 671 } else if (sdi.getCoverageLevel(path, locale).compareTo(Level.MODERN) > 0) { 672 countExtraLevel.add(topValue); 673 } 674 countOrdinary.add(topValue); 675 } 676 } 677 logln("Superfluous (" + locale + "):\t" + countSuperfluous.over(countOrdinary)); 678 logln(">Modern (" + locale + "):\t" + countExtraLevel.over(countOrdinary)); 679 for (Entry<String, String> entry : diff.entrySet()) { 680 logln(locale + "\t" + phf.fromPath(entry.getKey()) + ";\t" + entry.getValue()); 681 } 682 } 683 } 684 TestConstructedValue()685 public void TestConstructedValue() { 686 CLDRFile eng = CLDRConfig.getInstance().getEnglish(); 687 688 String prefix = GlossonymConstructor.PATH_PREFIX; 689 String display = eng.getConstructedValue(prefix + "zh_Hans" + "\"]"); 690 assertEquals("contructed value", "Chinese (Simplified)", display); 691 display = eng.getConstructedValue(prefix + "es_US" + "\"]"); 692 assertEquals("contructed value", "Spanish (United States)", display); 693 display = eng.getConstructedValue(prefix + "es_US" + "\"][@alt=\"short\"]"); 694 assertEquals("contructed value", "Spanish (US)", display); 695 display = eng.getConstructedValue(prefix + "es" + "\"]"); 696 assertEquals("contructed value", null, display); 697 display = eng.getConstructedValue(prefix + "missing" + "\"]"); 698 assertEquals("contructed value", null, display); 699 } 700 TestFileLocations()701 public void TestFileLocations() { 702 File mainDir = new File(CLDRPaths.MAIN_DIRECTORY); 703 if (!mainDir.isDirectory()) { 704 throw new IllegalArgumentException( 705 "MAIN_DIRECTORY is not a directory: " + CLDRPaths.MAIN_DIRECTORY); 706 } 707 File mainCollationDir = new File(CLDRPaths.COLLATION_DIRECTORY); 708 if (!mainCollationDir.isDirectory()) { 709 throw new IllegalArgumentException( 710 "COLLATION_DIRECTORY is not a directory: " + CLDRPaths.COLLATION_DIRECTORY); 711 } 712 if (CLDRConfig.SKIP_SEED) { 713 return; 714 } 715 File seedDir = new File(CLDRPaths.SEED_DIRECTORY); 716 if (!seedDir.isDirectory()) { 717 throw new IllegalArgumentException( 718 "SEED_DIRECTORY is not a directory: " + CLDRPaths.SEED_DIRECTORY); 719 } 720 File seedCollationDir = new File(CLDRPaths.SEED_COLLATION_DIRECTORY); 721 if (!seedCollationDir.isDirectory()) { 722 throw new IllegalArgumentException( 723 "SEED_COLLATION_DIRECTORY is not a directory: " 724 + CLDRPaths.SEED_COLLATION_DIRECTORY); 725 } 726 727 File[] md = {mainDir, mainCollationDir}; 728 File[] sd = {seedDir, seedCollationDir}; 729 Factory mf = SimpleFactory.make(md, ".*", DraftStatus.unconfirmed); 730 Factory sf = SimpleFactory.make(sd, ".*", DraftStatus.unconfirmed); 731 Set<CLDRLocale> mainLocales = mf.getAvailableCLDRLocales(); 732 Set<CLDRLocale> seedLocales = sf.getAvailableCLDRLocales(); 733 mainLocales.retainAll(seedLocales); 734 mainLocales.remove(CLDRLocale.getInstance("root")); // allow multiple roots 735 if (!mainLocales.isEmpty()) { 736 errln( 737 "CLDR locale files located in both common and seed ==> " 738 + mainLocales.toString()); 739 } 740 } 741 TestForStrayFiles()742 public void TestForStrayFiles() { 743 TreeSet<String> mainList = 744 new TreeSet<>(Arrays.asList(new File(CLDRPaths.MAIN_DIRECTORY).list())); 745 746 for (String dir : DtdType.ldml.directories) { 747 Set<String> dirFiles = 748 new TreeSet<>( 749 Arrays.asList( 750 new File(CLDRPaths.BASE_DIRECTORY + "common/" + dir).list())); 751 if (!mainList.containsAll(dirFiles)) { 752 dirFiles.removeAll(mainList); 753 errln(dir + "/ has extra files not in main/: " + dirFiles); 754 } 755 } 756 } 757 TestFileIds()758 public void TestFileIds() { 759 Output<Map<String, Multimap<LdmlDir, Source>>> localeToDirToSource = new Output<>(); 760 Map<LdmlDir, Multimap<String, Source>> dirToLocaleToSource = getFiles(localeToDirToSource); 761 762 for (Entry<String, Multimap<LdmlDir, Source>> e : localeToDirToSource.value.entrySet()) { 763 String locale = e.getKey(); 764 if (locale.equals("root")) { 765 continue; // allow multiple root locales 766 } 767 Map<LdmlDir, Collection<Source>> value = e.getValue().asMap(); 768 for (Entry<LdmlDir, Collection<Source>> e2 : value.entrySet()) { 769 LdmlDir dir = e2.getKey(); 770 Collection<Source> sources = e2.getValue(); 771 if (sources.size() != 1) { 772 errln( 773 "Can only one have 1 instance of " 774 + locale 775 + " in " 776 + dir 777 + ", but have in " 778 + sources); 779 } 780 } 781 } 782 783 LikelySubtags likelySubtags = new LikelySubtags(); 784 785 for (Entry<LdmlDir, Multimap<String, Source>> dirAndLocaleToSource : 786 dirToLocaleToSource.entrySet()) { 787 LdmlDir ldmlDir = dirAndLocaleToSource.getKey(); 788 Multimap<String, Source> localesToDirs = dirAndLocaleToSource.getValue(); 789 for (Entry<String, Source> localeAndDir : localesToDirs.entries()) { 790 String loc = localeAndDir.getKey(); 791 if (loc.equals("root")) { 792 continue; 793 } 794 Source source = localeAndDir.getValue(); 795 String parent = LocaleIDParser.getParent(loc); 796 String parent2 = LanguageTagParser.getSimpleParent(loc); 797 if (parent2.isEmpty()) { 798 parent2 = "root"; 799 } 800 String likely = likelySubtags.minimize(loc); 801 if (!localesToDirs.containsKey(parent)) { 802 errln( 803 "Missing parent (" 804 + parent 805 + ") for " 806 + loc 807 + " in " 808 + source 809 + "/" 810 + ldmlDir 811 + "; likely=" 812 + likely); 813 } 814 if (!Objects.equals(parent, parent2) && !localesToDirs.containsKey(parent2)) { 815 errln( 816 "Missing simple parent (" 817 + parent2 818 + ") for " 819 + loc 820 + " in " 821 + source 822 + "/" 823 + ldmlDir 824 + "; likely=" 825 + likely); 826 } 827 } 828 829 // establish that the parent of locale is somewhere in the same 830 // assertEquals(dir + " locale file has minimal id: ", min, loc); 831 // if (!dir.endsWith("exemplars")) { 832 // continue; 833 // } 834 // String trans = ltc.transform(loc); 835 // System.out.println("\t" + min + "\t" + loc + "\t" + trans); 836 } 837 } 838 839 enum Source { 840 common, 841 seed, 842 exemplars 843 } 844 845 enum LdmlDir { 846 main, 847 annotations, 848 annotationsDerived, 849 casing, 850 collation, 851 rbnf, 852 segments, 853 subdivisions 854 } 855 856 /** 857 * Returns a map from directory (eg main) to its parent (eg seed) and to their children (locales 858 * in seed/main) 859 */ getFiles( Output<Map<String, Multimap<LdmlDir, Source>>> localeToDirToSource)860 private Map<LdmlDir, Multimap<String, Source>> getFiles( 861 Output<Map<String, Multimap<LdmlDir, Source>>> localeToDirToSource) { 862 863 Map<LdmlDir, Multimap<String, Source>> _dirToLocaleToSource = new TreeMap<>(); 864 Map<String, Multimap<LdmlDir, Source>> _localeToDirToSource = new TreeMap<>(); 865 866 for (String base : new File(CLDRPaths.BASE_DIRECTORY).list()) { 867 Source source; 868 try { 869 source = Source.valueOf(base); 870 } catch (Exception e) { 871 continue; 872 } 873 String fullBase = CLDRPaths.BASE_DIRECTORY + base; 874 File fullBaseFile = new File(fullBase); 875 if (!fullBaseFile.isDirectory()) { 876 continue; 877 } 878 879 for (String sub1 : fullBaseFile.list()) { 880 if (!DtdType.ldml.directories.contains(sub1)) { 881 continue; 882 } 883 LdmlDir ldmlDir = LdmlDir.valueOf(sub1); 884 String dir = fullBase + "/" + ldmlDir; 885 for (String loc : new File(dir).list()) { 886 if (!loc.endsWith(".xml")) { 887 continue; 888 } 889 loc = loc.substring(0, loc.length() - 4); 890 891 put(_localeToDirToSource, loc, ldmlDir, source); 892 put(_dirToLocaleToSource, ldmlDir, loc, source); 893 } 894 } 895 } 896 localeToDirToSource.value = 897 ImmutableMap.copyOf(_localeToDirToSource); // TODO protect subtrees 898 return ImmutableMap.copyOf(_dirToLocaleToSource); 899 } 900 put(Map<A, Multimap<B, C>> aToBToC, A a, B b, C c)901 private <A, B, C> void put(Map<A, Multimap<B, C>> aToBToC, A a, B b, C c) { 902 Multimap<B, C> dirToSource = aToBToC.get(a); 903 if (dirToSource == null) { 904 aToBToC.put(a, dirToSource = (Multimap<B, C>) TreeMultimap.create()); 905 } 906 dirToSource.put(b, c); 907 } 908 TestSwissHighGerman()909 public void TestSwissHighGerman() { 910 CLDRFile swissHighGerman = testInfo.getCommonSeedExemplarsFactory().make("de_CH", true); 911 for (String xpath : swissHighGerman) { 912 if (xpath.equals("//ldml/characters/exemplarCharacters[@type=\"auxiliary\"]")) { 913 continue; 914 } 915 String value = swissHighGerman.getStringValue(xpath); 916 if (value != null && value.indexOf('ß') >= 0) { 917 warnln("«" + value + "» contains ß at " + xpath); 918 } 919 } 920 } 921 TestExtraPaths()922 public void TestExtraPaths() { 923 List<String> testCases = 924 Arrays.asList( 925 "//ldml/localeDisplayNames/languages/language[@type=\"ccp\"]", 926 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"Gy\"]/greatestDifference[@id=\"G\"]"); 927 CLDRFile af = testInfo.getCldrFactory().make("af", true); 928 Set<String> missing = new HashSet<>(testCases); 929 CoverageLevel2 coverageLevel2 = CoverageLevel2.getInstance("af"); 930 PathHeader.Factory pathHeaderFactory = PathHeader.getFactory(testInfo.getEnglish()); 931 Status status = new Status(); 932 933 for (String xpath : af) { 934 if (missing.contains(xpath)) { 935 String value = af.getStringValue(xpath); 936 String source = af.getSourceLocaleID(xpath, status); 937 Level level = coverageLevel2.getLevel(xpath); 938 PathHeader ph = pathHeaderFactory.fromPath(xpath); 939 getLogger() 940 .fine( 941 "" 942 + "\nPathHeader:\t" 943 + ph 944 + "\nValue:\t" 945 + value 946 + "\nLevel:\t" 947 + level 948 + "\nReq. Locale:\t" 949 + "af" 950 + "\nSource Locale:\t" 951 + source 952 + "\nReq. XPath:\t" 953 + xpath 954 + "\nSource Path:\t" 955 + status); 956 missing.remove(xpath); 957 } 958 } 959 assertTrue("Should be empty", missing.isEmpty()); 960 } 961 TestExtraPaths13954()962 public void TestExtraPaths13954() { 963 CLDRFile es = cldrFactory.make("es", true); 964 CLDRFile es_US = cldrFactory.make("es_US", true); 965 if (!es_US.getRawExtraPaths().containsAll(es.getRawExtraPaths())) { 966 errln( 967 "Failure: " 968 + Joiner.on('\n') 969 .join( 970 Sets.difference( 971 es.getRawExtraPaths(), 972 es_US.getRawExtraPaths()))); 973 } 974 } 975 testEnglishSideways()976 public void testEnglishSideways() { 977 CLDRFile fr = cldrFactory.make("fr", true); 978 CLDRFile en = cldrFactory.make("en", true); 979 System.out.println(); 980 for (String path : fr.fullIterable()) { 981 if (!path.startsWith("//ldml/units") || path.endsWith("/gender")) { 982 continue; 983 } 984 Status status = new Status(); 985 String localeWhereFound = en.getSourceLocaleID(path, status); 986 if (!Objects.equals(path, status.pathWhereFound)) { 987 XPathParts pathParts = XPathParts.getFrozenInstance(path); 988 String type = pathParts.getAttributeValue(3, "type"); 989 XPathParts foundPathParts = XPathParts.getFrozenInstance(status.pathWhereFound); 990 String foundType = foundPathParts.getAttributeValue(3, "type"); 991 if (Objects.equals(type, foundType)) { 992 continue; // ok to go sideways within type 993 } 994 System.out.println( 995 String.format("%s\t%s\t%s", path, status.pathWhereFound, localeWhereFound)); 996 } 997 } 998 } 999 } 1000