1 package org.unicode.cldr.unittest; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.collect.HashMultimap; 5 import com.google.common.collect.ImmutableSet; 6 import com.google.common.collect.LinkedListMultimap; 7 import com.google.common.collect.Multimap; 8 import com.google.common.collect.TreeMultimap; 9 import com.ibm.icu.impl.Relation; 10 import com.ibm.icu.impl.Row; 11 import com.ibm.icu.impl.Row.R2; 12 import java.io.File; 13 import java.util.ArrayList; 14 import java.util.Arrays; 15 import java.util.Collection; 16 import java.util.EnumMap; 17 import java.util.EnumSet; 18 import java.util.HashMap; 19 import java.util.HashSet; 20 import java.util.LinkedHashMap; 21 import java.util.LinkedHashSet; 22 import java.util.List; 23 import java.util.Map; 24 import java.util.Map.Entry; 25 import java.util.Set; 26 import java.util.TreeMap; 27 import java.util.TreeSet; 28 import java.util.regex.Matcher; 29 import java.util.stream.Collectors; 30 import org.unicode.cldr.test.CoverageLevel2; 31 import org.unicode.cldr.test.ExampleGenerator; 32 import org.unicode.cldr.util.CLDRConfig; 33 import org.unicode.cldr.util.CLDRFile; 34 import org.unicode.cldr.util.CLDRFile.Status; 35 import org.unicode.cldr.util.CLDRLocale; 36 import org.unicode.cldr.util.CLDRPaths; 37 import org.unicode.cldr.util.CLDRURLS; 38 import org.unicode.cldr.util.CldrUtility; 39 import org.unicode.cldr.util.Containment; 40 import org.unicode.cldr.util.Counter; 41 import org.unicode.cldr.util.DtdData; 42 import org.unicode.cldr.util.DtdType; 43 import org.unicode.cldr.util.Emoji; 44 import org.unicode.cldr.util.Factory; 45 import org.unicode.cldr.util.GrammarInfo; 46 import org.unicode.cldr.util.GrammarInfo.CaseValues; 47 import org.unicode.cldr.util.GrammarInfo.GenderValues; 48 import org.unicode.cldr.util.Iso3166Data; 49 import org.unicode.cldr.util.LanguageTagParser; 50 import org.unicode.cldr.util.Level; 51 import org.unicode.cldr.util.Organization; 52 import org.unicode.cldr.util.Pair; 53 import org.unicode.cldr.util.PathDescription; 54 import org.unicode.cldr.util.PathHeader; 55 import org.unicode.cldr.util.PathHeader.PageId; 56 import org.unicode.cldr.util.PathHeader.SectionId; 57 import org.unicode.cldr.util.PathHeader.SurveyToolStatus; 58 import org.unicode.cldr.util.PathHeader.Width; 59 import org.unicode.cldr.util.PathStarrer; 60 import org.unicode.cldr.util.PatternCache; 61 import org.unicode.cldr.util.PatternPlaceholders; 62 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderInfo; 63 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderStatus; 64 import org.unicode.cldr.util.StandardCodes; 65 import org.unicode.cldr.util.SupplementalDataInfo; 66 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 67 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 68 import org.unicode.cldr.util.SupplementalDataInfo.PluralType; 69 import org.unicode.cldr.util.With; 70 import org.unicode.cldr.util.XMLFileReader; 71 import org.unicode.cldr.util.XPathParts; 72 73 public class TestPathHeader extends TestFmwkPlus { 74 private static final DtdType DEBUG_DTD_TYPE = null; // DtdType.supplementalData; 75 private static final String COMMON_DIR = CLDRPaths.BASE_DIRECTORY + "common/"; 76 private static final boolean DEBUG = false; 77 main(String[] args)78 public static void main(String[] args) { 79 new TestPathHeader().run(args); 80 } 81 82 static final CLDRConfig info = CLDRConfig.getInstance(); 83 static final Factory factory = info.getCommonAndSeedAndMainAndAnnotationsFactory(); 84 static final CLDRFile english = factory.make("en", true); 85 static final SupplementalDataInfo supplemental = info.getSupplementalDataInfo(); 86 static PathHeader.Factory pathHeaderFactory = PathHeader.getFactory(english); 87 private EnumSet<PageId> badZonePages = EnumSet.of(PageId.UnknownT); 88 tempTestAnnotation()89 public void tempTestAnnotation() { 90 // NEW: <annotation cp="">face | grin</annotation> 91 // <annotation cp="" type="tts">grinning face</annotation> 92 93 final String path1 = "//ldml/annotations/annotation[@cp=\"\"]"; 94 PathHeader ph1 = pathHeaderFactory.fromPath(path1); 95 logln(ph1.toString() + "\t" + path1); 96 final String path2 = "//ldml/annotations/annotation[@cp=\"\"][@type=\"tts\"]"; 97 PathHeader ph2 = pathHeaderFactory.fromPath(path2); 98 logln(ph2.toString() + "\t" + path2); 99 final String path3 = "//ldml/annotations/annotation[@cp=\"\"]"; 100 PathHeader ph3 = pathHeaderFactory.fromPath(path2); 101 logln(ph3.toString() + "\t" + path3); 102 103 assertNotEquals("pathheader", ph1, ph2); 104 assertNotEquals("pathheader", ph1.toString(), ph2.toString()); 105 assertRelation("pathheader", true, ph1, TestFmwkPlus.LEQ, ph3); 106 assertRelation("pathheader", true, ph3, TestFmwkPlus.LEQ, ph2); 107 } 108 109 static final String[] MIN_LOCALES = { 110 "root", "en", "de", "ru", "ko" 111 }; // choose locales with range of case/gender structures 112 tempTestCompletenessLdmlDtd()113 public void tempTestCompletenessLdmlDtd() { 114 // List<String> failures = null; 115 pathHeaderFactory.clearCache(); 116 PathChecker pathChecker = new PathChecker(); 117 for (String directory : DtdType.ldml.directories) { 118 Factory factory2 = CLDRConfig.getInstance().getMainAndAnnotationsFactory(); 119 Set<String> source = factory2.getAvailable(); 120 for (String file : getFilesToTest(source, MIN_LOCALES)) { 121 if (DEBUG) warnln(" TestCompletenessLdmlDtd: " + directory + ", " + file); 122 DtdData dtdData = null; 123 CLDRFile cldrFile = factory2.make(file, true); 124 for (String path : cldrFile.fullIterable()) { 125 pathChecker.checkPathHeader(cldrFile.getDtdData(), path); 126 } 127 } 128 } 129 Set<String> missing = pathHeaderFactory.getUnmatchedRegexes(); 130 if (missing.size() != 0) { 131 for (String e : missing) { 132 errln("Path Regex never matched:\t" + e); 133 } 134 } 135 if (!pathChecker.badHeaders.isEmpty()) { 136 System.out.println("For help with DTD updates: " + CLDRURLS.CLDR_UPDATINGDTD_URL); 137 } 138 } 139 getFilesToTest(Collection<String> source, String... doFirst)140 private Collection<String> getFilesToTest(Collection<String> source, String... doFirst) { 141 LinkedHashSet<String> files = new LinkedHashSet<>(Arrays.asList(doFirst)); 142 files.retainAll(source); // put first 143 files.addAll(new HashSet<>(source)); // now add others semi-randomly 144 int max = Math.min(30, files.size()); 145 if (getInclusion() == 10 || files.size() <= max) { 146 return files; 147 } 148 ArrayList<String> shortFiles = new ArrayList<>(files); 149 if (getInclusion() > 5) { 150 max += (files.size() - 30) * (getInclusion() - 5) / 10; // use proportional amount 151 } 152 return shortFiles.subList(0, max); 153 } 154 TestCompleteness()155 public void TestCompleteness() { 156 PathHeader.Factory pathHeaderFactory2 = PathHeader.getFactory(english); 157 // List<String> failures = null; 158 pathHeaderFactory2.clearCache(); 159 Multimap<PathHeader.PageId, PathHeader.SectionId> pageUniqueness = TreeMultimap.create(); 160 Multimap<String, Pair<PathHeader.SectionId, PathHeader.PageId>> headerUniqueness = 161 TreeMultimap.create(); 162 Set<String> toTest; 163 switch (getInclusion()) { 164 default: 165 toTest = StandardCodes.make().getLocaleCoverageLocales(Organization.cldr); 166 break; 167 case 10: 168 toTest = factory.getAvailable(); 169 break; 170 } 171 toTest = ImmutableSet.<String>builder().add("en").addAll(toTest).build(); 172 Set<String> seenPaths = new HashSet<>(); 173 Set<String> localSeenPaths = new TreeSet<>(); 174 for (String locale : toTest) { 175 localSeenPaths.clear(); 176 for (String p : factory.make(locale, true).fullIterable()) { 177 if (p.startsWith("//ldml/identity/")) { 178 continue; 179 } 180 if (seenPaths.contains(p)) { 181 continue; 182 } 183 seenPaths.add(p); 184 localSeenPaths.add(p); 185 // if (p.contains("symbol[@alt") && failures == null) { 186 // PathHeader result = pathHeaderFactory2.fromPath(p, failures = new 187 // ArrayList<String>()); 188 // logln("Matching " + p + ": " + result + "\t" + 189 // result.getSurveyToolStatus()); 190 // for (String failure : failures) { 191 // logln("\t" + failure); 192 // } 193 // } 194 PathHeader ph; 195 try { 196 ph = pathHeaderFactory2.fromPath(p); 197 } catch (Exception e1) { 198 try { 199 ph = pathHeaderFactory2.fromPath(p); 200 } catch (Exception e2) { 201 throw new IllegalArgumentException(locale + ":\t" + p, e2); 202 } 203 } 204 if (ph == null) { 205 errln("Failed to create path from: " + p); 206 continue; 207 } 208 final SectionId sectionId = ph.getSectionId(); 209 if (sectionId != SectionId.Special) { 210 pageUniqueness.put(ph.getPageId(), sectionId); 211 headerUniqueness.put(ph.getHeader(), new Pair<>(sectionId, ph.getPageId())); 212 } 213 } 214 if (!localSeenPaths.isEmpty()) { 215 logln(locale + ": checked " + localSeenPaths.size() + " new paths"); 216 } 217 } 218 Set<String> missing = pathHeaderFactory2.getUnmatchedRegexes(); 219 if (missing.size() != 0) { 220 for (String e : missing) { 221 if (e.contains("//ldml/")) { 222 if (e.contains("//ldml/rbnf/") 223 || e.contains("//ldml/segmentations/") 224 || e.contains("//ldml/collations/")) { 225 continue; 226 } 227 logln("Path Regex never matched:\t" + e); 228 } 229 } 230 } 231 232 for (Entry<PageId, Collection<SectionId>> e : pageUniqueness.asMap().entrySet()) { 233 Collection<SectionId> values = e.getValue(); 234 if (values.size() != 1) { 235 warnln("Duplicate page in section: " + CldrUtility.toString(e)); 236 } 237 } 238 239 for (Entry<String, Collection<Pair<SectionId, PageId>>> e : 240 headerUniqueness.asMap().entrySet()) { 241 Collection<Pair<SectionId, PageId>> values = e.getValue(); 242 if (values.size() != 1) { 243 warnln("Duplicate header in (section,page): " + CldrUtility.toString(e)); 244 } 245 } 246 } 247 Test6170()248 public void Test6170() { 249 String p1 = 250 "//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"speed-kilometer-per-hour\"]/unitPattern[@count=\"other\"]"; 251 String p2 = 252 "//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"area-square-meter\"]/unitPattern[@count=\"other\"]"; 253 PathHeader ph1 = pathHeaderFactory.fromPath(p1); 254 PathHeader ph2 = pathHeaderFactory.fromPath(p2); 255 int comp12 = ph1.compareTo(ph2); 256 int comp21 = ph2.compareTo(ph1); 257 assertEquals("comp ph", comp12, -comp21); 258 } 259 TestVariant()260 public void TestVariant() { 261 PathHeader p1 = 262 pathHeaderFactory.fromPath( 263 "//ldml/localeDisplayNames/languages/language[@type=\"ug\"][@alt=\"variant\"]"); 264 PathHeader p2 = 265 pathHeaderFactory.fromPath( 266 "//ldml/localeDisplayNames/languages/language[@type=\"ug\"]"); 267 assertNotEquals("variants", p1, p2); 268 assertNotEquals("variants", p1.toString(), p2.toString()); 269 // Code Lists Languages Arabic Script ug-variant 270 } 271 Test4587()272 public void Test4587() { 273 String test = 274 "//ldml/dates/timeZoneNames/metazone[@type=\"Pacific/Wallis\"]/short/standard"; 275 PathHeader ph = pathHeaderFactory.fromPath(test); 276 if (ph == null) { 277 errln("Failure with " + test); 278 } else { 279 logln(ph + "\t" + test); 280 } 281 } 282 TestMiscPatterns()283 public void TestMiscPatterns() { 284 String test = 285 "//ldml/numbers/miscPatterns[@numberSystem=\"arab\"]/pattern[@type=\"atLeast\"]"; 286 PathHeader ph = pathHeaderFactory.fromPath(test); 287 assertNotNull("MiscPatterns path not found", ph); 288 if (false) System.out.println(english.getStringValue(test)); 289 } 290 TestPluralOrder()291 public void TestPluralOrder() { 292 Set<PathHeader> sorted = new TreeSet<>(); 293 for (String locale : new String[] {"ru", "ar", "ja"}) { 294 sorted.clear(); 295 CLDRFile cldrFile = info.getCLDRFile(locale, true); 296 CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(locale); 297 for (String path : cldrFile.fullIterable()) { 298 if (!path.contains("@count")) { 299 continue; 300 } 301 Level level = coverageLevel.getLevel(path); 302 if (Level.MODERN.compareTo(level) < 0) { 303 continue; 304 } 305 PathHeader p = pathHeaderFactory.fromPath(path); 306 sorted.add(p); 307 } 308 for (PathHeader p : sorted) { 309 logln(locale + "\t" + p + "\t" + p.getOriginalPath()); 310 } 311 } 312 } 313 314 static final String APPEND_TIMEZONE = 315 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/appendItems/appendItem[@request=\"Timezone\"]"; 316 static final String APPEND_TIMEZONE_END = 317 "/dateTimeFormats/appendItems/appendItem[@request=\"Timezone\"]"; 318 static final String BEFORE_PH = 319 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"ms\"]"; 320 static final String AFTER_PH = 321 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"d\"]/greatestDifference[@id=\"d\"]"; 322 TestAppendTimezone()323 public void TestAppendTimezone() { 324 CLDRFile cldrFile = info.getEnglish(); 325 CoverageLevel2 coverageLevel = CoverageLevel2.getInstance("en"); 326 assertEquals( 327 "appendItem:Timezone", Level.MODERATE, coverageLevel.getLevel(APPEND_TIMEZONE)); 328 329 PathHeader ph = pathHeaderFactory.fromPath(APPEND_TIMEZONE); 330 assertEquals("appendItem:Timezone pathheader", "Timezone", ph.getCode()); 331 // check that they are in the right place (they weren't before!) 332 PathHeader phBefore = pathHeaderFactory.fromPath(BEFORE_PH); 333 PathHeader phAfter = pathHeaderFactory.fromPath(AFTER_PH); 334 assertTrue(phBefore, LEQ, ph); 335 assertTrue(ph, LEQ, phAfter); 336 337 PathDescription pathDescription = 338 new PathDescription( 339 supplemental, english, null, null, PathDescription.ErrorHandling.CONTINUE); 340 String description = pathDescription.getDescription(APPEND_TIMEZONE, "tempvalue", null); 341 assertTrue("appendItem:Timezone pathDescription", description.contains("“Timezone”")); 342 343 PatternPlaceholders patternPlaceholders = PatternPlaceholders.getInstance(); 344 PlaceholderStatus status = patternPlaceholders.getStatus(APPEND_TIMEZONE); 345 assertEquals("appendItem:Timezone placeholders", PlaceholderStatus.REQUIRED, status); 346 347 Map<String, PlaceholderInfo> placeholderInfo = patternPlaceholders.get(APPEND_TIMEZONE); 348 PlaceholderInfo placeholderInfo2 = placeholderInfo.get("{1}"); 349 if (assertNotNull("appendItem:Timezone placeholders", placeholderInfo2)) { 350 assertEquals( 351 "appendItem:Timezone placeholders", 352 "APPEND_FIELD_FORMAT", 353 placeholderInfo2.name); 354 assertEquals( 355 "appendItem:Timezone placeholders", "Pacific Time", placeholderInfo2.example); 356 } 357 ExampleGenerator eg = new ExampleGenerator(cldrFile, cldrFile); 358 String example = 359 eg.getExampleHtml(APPEND_TIMEZONE, cldrFile.getStringValue(APPEND_TIMEZONE)); 360 String result = ExampleGenerator.simplify(example, false); 361 assertEquals("", "〖❬6:25:59 PM❭ ❬GMT❭〗", result); 362 } 363 TestOptional()364 public void TestOptional() { 365 if (true) return; 366 Map<PathHeader, String> sorted = new TreeMap<>(); 367 for (String locale : new String[] {"af"}) { 368 sorted.clear(); 369 CLDRFile cldrFile = info.getCLDRFile(locale, true); 370 CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(locale); 371 for (String path : cldrFile.fullIterable()) { 372 Level level = coverageLevel.getLevel(path); 373 if (supplemental.isDeprecated(DtdType.ldml, path)) { 374 continue; 375 } 376 377 if (Level.COMPREHENSIVE.compareTo(level) != 0) { 378 continue; 379 } 380 381 PathHeader ph = pathHeaderFactory.fromPath(path); 382 if (ph == null || ph.shouldHide()) { 383 continue; 384 } 385 final SurveyToolStatus status = ph.getSurveyToolStatus(); 386 sorted.put(ph, locale + "\t" + status + "\t" + ph + "\t" + ph.getOriginalPath()); 387 } 388 Set<String> codes = new LinkedHashSet<>(); 389 PathHeader old = null; 390 String line = null; 391 for (Entry<PathHeader, String> s : sorted.entrySet()) { 392 PathHeader p = s.getKey(); 393 String v = s.getValue(); 394 if (old == null) { 395 line = v; 396 codes.add(p.getCode()); 397 } else if (p.getSectionId() == old.getSectionId() 398 && p.getPageId() == old.getPageId() 399 && p.getHeader().equals(old.getHeader())) { 400 codes.add(p.getCode()); 401 } else { 402 logln(line + "\t" + codes.toString()); 403 codes.clear(); 404 line = v; 405 codes.add(p.getCode()); 406 } 407 old = p; 408 } 409 logln(line + "\t" + codes.toString()); 410 } 411 } 412 TestPluralCanonicals()413 public void TestPluralCanonicals() { 414 Relation<String, String> data = 415 Relation.of(new LinkedHashMap<String, Set<String>>(), TreeSet.class); 416 for (String locale : factory.getAvailable()) { 417 if (locale.contains("_")) { 418 continue; 419 } 420 PluralInfo info = supplemental.getPlurals(PluralType.cardinal, locale); 421 Set<String> keywords = info.getCanonicalKeywords(); 422 data.put(keywords.toString(), locale); 423 } 424 for (Entry<String, Set<String>> entry : data.keyValuesSet()) { 425 logln(entry.getKey() + "\t" + entry.getValue()); 426 } 427 } 428 TestPluralPaths()429 public void TestPluralPaths() { 430 // do the following line once, when the file is opened 431 Set<String> filePaths = pathHeaderFactory.pathsForFile(english); 432 433 // check that English doesn't contain few or many 434 verifyContains(PageId.Duration, filePaths, "few", false); 435 verifyContains(PageId.C_NAmerica, filePaths, "many", false); 436 verifyContains(PageId.C_SAmerica, filePaths, "many", false); 437 verifyContains(PageId.C_NWEurope, filePaths, "many", false); 438 verifyContains(PageId.C_SEEurope, filePaths, "many", false); 439 verifyContains(PageId.C_NAfrica, filePaths, "many", false); 440 verifyContains(PageId.C_WAfrica, filePaths, "many", false); 441 verifyContains(PageId.C_SAfrica, filePaths, "many", false); 442 verifyContains(PageId.C_EAfrica, filePaths, "many", false); 443 verifyContains(PageId.C_CAsia, filePaths, "many", false); 444 verifyContains(PageId.C_WAsia, filePaths, "many", false); 445 verifyContains(PageId.C_SEAsia, filePaths, "many", false); 446 verifyContains(PageId.C_Oceania, filePaths, "many", false); 447 verifyContains(PageId.C_Unknown, filePaths, "many", false); 448 449 // check that Arabic does contain few and many 450 filePaths = pathHeaderFactory.pathsForFile(info.getCLDRFile("ar", true)); 451 452 verifyContains(PageId.Duration, filePaths, "few", true); 453 verifyContains(PageId.C_NAmerica, filePaths, "many", true); 454 verifyContains(PageId.C_SAmerica, filePaths, "many", true); 455 verifyContains(PageId.C_NWEurope, filePaths, "many", true); 456 verifyContains(PageId.C_SEEurope, filePaths, "many", true); 457 verifyContains(PageId.C_NAfrica, filePaths, "many", true); 458 verifyContains(PageId.C_WAfrica, filePaths, "many", true); 459 verifyContains(PageId.C_SAfrica, filePaths, "many", true); 460 verifyContains(PageId.C_EAfrica, filePaths, "many", true); 461 verifyContains(PageId.C_CAsia, filePaths, "many", true); 462 verifyContains(PageId.C_WAsia, filePaths, "many", true); 463 verifyContains(PageId.C_SEAsia, filePaths, "many", true); 464 verifyContains(PageId.C_Oceania, filePaths, "many", true); 465 verifyContains(PageId.C_Unknown, filePaths, "many", true); 466 } 467 TestCoverage()468 public void TestCoverage() { 469 Map<Row.R2<SectionId, PageId>, Counter<Level>> data = new TreeMap<>(); 470 CLDRFile cldrFile = english; 471 for (String path : cldrFile.fullIterable()) { 472 if (supplemental.isDeprecated(DtdType.ldml, path)) { 473 errln("Deprecated path in English: " + path); 474 continue; 475 } 476 Level level = supplemental.getCoverageLevel(path, cldrFile.getLocaleID()); 477 PathHeader p = pathHeaderFactory.fromPath(path); 478 SurveyToolStatus status = p.getSurveyToolStatus(); 479 480 boolean hideCoverage = level == Level.COMPREHENSIVE; 481 boolean hidePathHeader = p.shouldHide(); 482 if (hidePathHeader != hideCoverage) { 483 String message = "PathHeader: " + status + ", Coverage: " + level + ": " + path; 484 if (hidePathHeader && !hideCoverage) { 485 errln( 486 message 487 + " - PathHeader says to HIDE this, but it visible at <comprehensive coverage. Fix PathHeader to show, or fix coverage."); 488 } else if (!hidePathHeader && hideCoverage) { 489 logln(message); 490 } 491 } 492 final R2<SectionId, PageId> key = Row.of(p.getSectionId(), p.getPageId()); 493 Counter<Level> counter = data.get(key); 494 if (counter == null) { 495 data.put(key, counter = new Counter<>()); 496 } 497 counter.add(level, 1); 498 } 499 StringBuffer b = new StringBuffer("\t"); 500 for (Level level : Level.values()) { 501 b.append("\t" + level); 502 } 503 logln(b.toString()); 504 for (Entry<R2<SectionId, PageId>, Counter<Level>> entry : data.entrySet()) { 505 b.setLength(0); 506 b.append(entry.getKey().get0() + "\t" + entry.getKey().get1()); 507 Counter<Level> counter = entry.getValue(); 508 long total = 0; 509 for (Level level : Level.values()) { 510 total += counter.getCount(level); 511 b.append("\t" + total); 512 } 513 logln(b.toString()); 514 } 515 } 516 Test00AFile()517 public void Test00AFile() { 518 final String localeId = "en"; 519 Counter<Level> counter = new Counter<>(); 520 Map<String, PathHeader> uniqueness = new HashMap<>(); 521 Set<String> alreadySeen = new HashSet<>(); 522 check(localeId, true, uniqueness, alreadySeen); 523 // check paths 524 for (Entry<SectionId, Set<PageId>> sectionAndPages : 525 PathHeader.Factory.getSectionIdsToPageIds().keyValuesSet()) { 526 final SectionId section = sectionAndPages.getKey(); 527 if (section == SectionId.Supplemental || section == SectionId.BCP47) { 528 continue; 529 } 530 logln(section.toString()); 531 for (PageId page : sectionAndPages.getValue()) { 532 final Set<String> cachedPaths = PathHeader.Factory.getCachedPaths(section, page); 533 if (cachedPaths == null) { 534 if (!badZonePages.contains(page) && page != PageId.Unknown) { 535 errln("Null pages for: " + section + "\t" + page); 536 } 537 } else if (section == SectionId.Special && page == PageId.Unknown) { 538 // skip 539 } else if (section == SectionId.Timezones && page == PageId.UnknownT) { 540 // skip 541 } else if (section == SectionId.Misc && page == PageId.Transforms) { 542 // skip 543 } else { 544 545 int count2 = cachedPaths.size(); 546 if (count2 == 0) { 547 warnln("Missing pages for: " + section + "\t" + page); 548 } else { 549 counter.clear(); 550 for (String s : cachedPaths) { 551 Level coverage = supplemental.getCoverageLevel(s, localeId); 552 counter.add(coverage, 1); 553 } 554 String countString = ""; 555 int total = 0; 556 for (Level item : Level.values()) { 557 long count = counter.get(item); 558 if (count != 0) { 559 if (!countString.isEmpty()) { 560 countString += ",\t+"; 561 } 562 total += count; 563 countString += item + "=" + total; 564 } 565 } 566 logln("\t" + page + "\t" + countString); 567 if (page.toString().startsWith("Unknown")) { 568 logln("\t\t" + cachedPaths); 569 } 570 } 571 } 572 } 573 } 574 } 575 TestMetazones()576 public void TestMetazones() { 577 578 CLDRFile nativeFile = info.getEnglish(); 579 Set<PathHeader> pathHeaders = getPathHeaders(nativeFile); 580 // String oldPage = ""; 581 String oldHeader = ""; 582 for (PathHeader entry : pathHeaders) { 583 final String page = entry.getPage(); 584 // if (!oldPage.equals(page)) { 585 // logln(page); 586 // oldPage = page; 587 // } 588 String header = entry.getHeader(); 589 if (!oldHeader.equals(header)) { 590 logln(page + "\t" + header); 591 oldHeader = header; 592 } 593 } 594 } 595 getPathHeaders(CLDRFile nativeFile)596 public Set<PathHeader> getPathHeaders(CLDRFile nativeFile) { 597 Set<PathHeader> pathHeaders = new TreeSet<>(); 598 for (String path : nativeFile.fullIterable()) { 599 PathHeader p = pathHeaderFactory.fromPath(path); 600 pathHeaders.add(p); 601 } 602 return pathHeaders; 603 } 604 verifyContains( PageId pageId, Set<String> filePaths, String substring, boolean contains)605 public void verifyContains( 606 PageId pageId, Set<String> filePaths, String substring, boolean contains) { 607 String path; 608 path = findOneContaining(allPaths(pageId, filePaths), substring); 609 if (contains) { 610 if (path == null) { 611 errln("No path contains <" + substring + ">"); 612 } 613 } else { 614 if (path != null) { 615 errln("Path contains <" + substring + ">\t" + path); 616 } 617 } 618 } 619 findOneContaining(Collection<String> allPaths, String substring)620 private String findOneContaining(Collection<String> allPaths, String substring) { 621 for (String path : allPaths) { 622 if (path.contains(substring)) { 623 return path; 624 } 625 } 626 return null; 627 } 628 allPaths(PageId pageId, Set<String> filePaths)629 public Set<String> allPaths(PageId pageId, Set<String> filePaths) { 630 Set<String> result = PathHeader.Factory.getCachedPaths(pageId.getSectionId(), pageId); 631 result.retainAll(filePaths); 632 return result; 633 } 634 TestUniqueness()635 public void TestUniqueness() { 636 Factory factory2 = CLDRConfig.getInstance().getMainAndAnnotationsFactory(); 637 Set<String> source = factory2.getAvailable(); 638 for (String file : getFilesToTest(source, MIN_LOCALES)) { 639 CLDRFile nativeFile = factory2.make(file, true); 640 Map<PathHeader, String> headerToPath = new HashMap<>(); 641 Map<String, String> headerVisibleToPath = new HashMap<>(); 642 for (String path : nativeFile.fullIterable()) { 643 PathHeader p = pathHeaderFactory.fromPath(path); 644 if (p.getSectionId() == SectionId.Special) { 645 continue; 646 } 647 String old = headerToPath.get(p); 648 if (old == null) { 649 headerToPath.put(p, path); 650 } else if (!old.equals(path)) { 651 if (true) { // for debugging 652 pathHeaderFactory.clearCache(); 653 List<String> failuresOld = new ArrayList<>(); 654 pathHeaderFactory.fromPath(old, failuresOld); 655 List<String> failuresPath = new ArrayList<>(); 656 pathHeaderFactory.fromPath(path, failuresPath); 657 } 658 errln(file + " collision with path " + p + "\t" + old + "\t" + path); 659 } 660 final String visible = p.toString(); 661 old = headerVisibleToPath.get(visible); 662 if (old == null) { 663 headerVisibleToPath.put(visible, path); 664 } else if (!old.equals(path)) { 665 errln("Collision with path " + visible + "\t" + old + "\t" + path); 666 } 667 } 668 } 669 } 670 TestStatus()671 public void TestStatus() { 672 CLDRFile nativeFile = info.getEnglish(); 673 PathStarrer starrer = new PathStarrer(); 674 EnumMap<SurveyToolStatus, Relation<String, String>> info2 = 675 new EnumMap<>(SurveyToolStatus.class); 676 Set<String> nuked = new HashSet<>(); 677 Set<String> deprecatedStar = new HashSet<>(); 678 679 for (String path : nativeFile.fullIterable()) { 680 681 PathHeader p = pathHeaderFactory.fromPath(path); 682 final SurveyToolStatus surveyToolStatus = p.getSurveyToolStatus(); 683 684 if (p.getSectionId() == SectionId.Special 685 && surveyToolStatus == SurveyToolStatus.READ_WRITE) { 686 errln("SurveyToolStatus should not be " + surveyToolStatus + ": " + p); 687 } 688 689 String starred = starrer.set(path); 690 List<String> attr = starrer.getAttributes(); 691 if (surveyToolStatus != SurveyToolStatus.READ_WRITE) { 692 nuked.add(starred); 693 } 694 695 // check against deprecated 696 boolean isDeprecated = supplemental.isDeprecated(DtdType.ldml, path); 697 if (isDeprecated != (surveyToolStatus == SurveyToolStatus.DEPRECATED)) { 698 if (!deprecatedStar.contains(starred)) { 699 errln( 700 "Different from DtdData deprecated:\t" 701 + isDeprecated 702 + "\t" 703 + surveyToolStatus 704 + "\t" 705 + path); 706 deprecatedStar.add(starred); 707 } 708 } 709 710 Relation<String, String> data = info2.get(surveyToolStatus); 711 if (data == null) { 712 info2.put( 713 surveyToolStatus, 714 data = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class)); 715 } 716 data.put(starred, Joiner.on("|").join(attr)); 717 } 718 for (Entry<SurveyToolStatus, Relation<String, String>> entry : info2.entrySet()) { 719 final SurveyToolStatus status = entry.getKey(); 720 for (Entry<String, Set<String>> item : entry.getValue().keyValuesSet()) { 721 final String starred = item.getKey(); 722 if (status == SurveyToolStatus.READ_WRITE && !nuked.contains(starred)) { 723 continue; 724 } 725 logln(status + "\t" + starred + "\t" + item.getValue()); 726 } 727 } 728 } 729 TestPathsNotInEnglish()730 public void TestPathsNotInEnglish() { 731 Set<String> englishPaths = new HashSet<>(); 732 for (String path : english.fullIterable()) { 733 englishPaths.add(path); 734 } 735 Set<String> alreadySeen = new HashSet<>(englishPaths); 736 737 for (String locale : factory.getAvailable()) { 738 CLDRFile nativeFile = info.getCLDRFile(locale, false); 739 CoverageLevel2 coverageLevel2 = null; 740 for (String path : nativeFile.fullIterable()) { 741 if (alreadySeen.contains(path) || path.contains("@count")) { 742 continue; 743 } 744 if (coverageLevel2 == null) { 745 coverageLevel2 = CoverageLevel2.getInstance(locale); 746 } 747 Level level = coverageLevel2.getLevel(path); 748 if (Level.COMPREHENSIVE.compareTo(level) < 0) { 749 continue; 750 } 751 logln("Path not in English\t" + locale + "\t" + path); 752 alreadySeen.add(path); 753 } 754 } 755 } 756 TestPathDescriptionCompleteness()757 public void TestPathDescriptionCompleteness() { 758 PathDescription pathDescription = 759 new PathDescription( 760 supplemental, english, null, null, PathDescription.ErrorHandling.CONTINUE); 761 Matcher normal = 762 PatternCache.get("https://cldr.unicode.org/translation/[-a-zA-Z0-9_]").matcher(""); 763 // https://cldr.unicode.org/translation/plurals#TOC-Minimal-Pairs 764 Set<String> alreadySeen = new HashSet<>(); 765 PathStarrer starrer = new PathStarrer(); 766 767 checkPathDescriptionCompleteness( 768 pathDescription, 769 normal, 770 "//ldml/numbers/defaultNumberingSystem", 771 alreadySeen, 772 starrer); 773 for (PathHeader pathHeader : getPathHeaders(english)) { 774 if (pathHeader.shouldHide()) { 775 continue; 776 } 777 String path = pathHeader.getOriginalPath(); 778 checkPathDescriptionCompleteness(pathDescription, normal, path, alreadySeen, starrer); 779 } 780 } 781 checkPathDescriptionCompleteness( PathDescription pathDescription, Matcher normal, String path, Set<String> alreadySeen, PathStarrer starrer)782 public void checkPathDescriptionCompleteness( 783 PathDescription pathDescription, 784 Matcher normal, 785 String path, 786 Set<String> alreadySeen, 787 PathStarrer starrer) { 788 String value = english.getStringValue(path); 789 String description = pathDescription.getDescription(path, value, null); 790 String starred = starrer.set(path); 791 if (alreadySeen.contains(starred)) { 792 return; 793 } else if (description == null) { 794 errln("Path has no description:\t" + value + "\t" + path); 795 } else if (!description.contains("https://")) { 796 errln("Description has no URL:\t" + description + "\t" + value + "\t" + path); 797 } else if (!normal.reset(description).find()) { 798 errln( 799 "Description has generic URL, fix to be specific:\t" 800 + description 801 + "\t" 802 + value 803 + "\t" 804 + path); 805 } else if (description == PathDescription.MISSING_DESCRIPTION) { 806 errln("Fallback Description:\t" + value + "\t" + path); 807 } else { 808 return; 809 } 810 // Add if we had a problem, keeping us from being overwhelmed with 811 // errors. 812 alreadySeen.add(starred); 813 } 814 TestTerritoryOrder()815 public void TestTerritoryOrder() { 816 final Set<String> goodAvailableCodes = 817 StandardCodes.make().getGoodAvailableCodes("territory"); 818 Set<String> results = showContained("001", 0, new HashSet<>(goodAvailableCodes)); 819 results.remove("ZZ"); 820 results.removeAll(Iso3166Data.getRegionCodesNotForTranslation()); 821 for (String territory : results) { 822 String sub = Containment.getSubcontinent(territory); 823 String cont = Containment.getContinent(territory); 824 errln( 825 "Missing\t" 826 + getNameAndOrder(territory) 827 + "\t" 828 + getNameAndOrder(sub) 829 + "\t" 830 + getNameAndOrder(cont)); 831 } 832 } 833 showContained(String territory, int level, Set<String> soFar)834 private Set<String> showContained(String territory, int level, Set<String> soFar) { 835 if (!soFar.contains(territory)) { 836 return soFar; 837 } 838 soFar.remove(territory); 839 Set<String> contained = supplemental.getContained(territory); 840 if (contained == null) { 841 return soFar; 842 } 843 for (String containedItem : contained) { 844 logln( 845 level 846 + "\t" 847 + getNameAndOrder(territory) 848 + "\t" 849 + getNameAndOrder(containedItem)); 850 } 851 for (String containedItem : contained) { 852 showContained(containedItem, level + 1, soFar); 853 } 854 return soFar; 855 } 856 getNameAndOrder(String territory)857 private String getNameAndOrder(String territory) { 858 return territory 859 + "\t" 860 + english.getName(CLDRFile.TERRITORY_NAME, territory) 861 + "\t" 862 + Containment.getOrder(territory); 863 } 864 TestZCompleteness()865 public void TestZCompleteness() { 866 Map<String, PathHeader> uniqueness = new HashMap<>(); 867 Set<String> alreadySeen = new HashSet<>(); 868 LanguageTagParser ltp = new LanguageTagParser(); 869 int count = 0; 870 for (String locale : factory.getAvailable()) { 871 if (!ltp.set(locale).getRegion().isEmpty()) { 872 continue; 873 } 874 check(locale, false, uniqueness, alreadySeen); 875 ++count; 876 } 877 logln("Count:\t" + count); 878 } 879 check( String localeID, boolean resolved, Map<String, PathHeader> uniqueness, Set<String> alreadySeen)880 public void check( 881 String localeID, 882 boolean resolved, 883 Map<String, PathHeader> uniqueness, 884 Set<String> alreadySeen) { 885 CLDRFile nativeFile = info.getCLDRFile(localeID, resolved); 886 int count = 0; 887 for (String path : nativeFile) { 888 if (alreadySeen.contains(path)) { 889 continue; 890 } 891 alreadySeen.add(path); 892 final PathHeader pathHeader = pathHeaderFactory.fromPath(path); 893 ++count; 894 if (pathHeader == null) { 895 errln("Null pathheader for " + path); 896 } else { 897 String visible = pathHeader.toString(); 898 PathHeader old = uniqueness.get(visible); 899 if (pathHeader.getSectionId() == SectionId.Timezones) { 900 final PageId pageId = pathHeader.getPageId(); 901 if (badZonePages.contains(pageId) && !pathHeader.getCode().equals("Unknown")) { 902 String msg = "Bad page ID:\t" + pageId + "\t" + pathHeader + "\t" + path; 903 if (!logKnownIssue( 904 "cldrbug:7802", "ICU/CLDR time zone data sync problem - " + msg)) { 905 errln("Bad page ID:\t" + pageId + "\t" + pathHeader + "\t" + path); 906 } 907 } 908 } 909 if (old == null) { 910 if (pathHeader.getSection().equals("Special")) { 911 if (pathHeader.getSection().equals("Unknown")) { 912 errln( 913 "PathHeader has fallback: " 914 + visible 915 + "\t" 916 + pathHeader.getOriginalPath()); 917 // } else { 918 // logln("Special:\t" + visible + "\t" + 919 // pathHeader.getOriginalPath()); 920 } 921 } 922 uniqueness.put(visible, pathHeader); 923 } else if (!old.equals(pathHeader)) { 924 if (pathHeader.getSectionId() == SectionId.Special) { 925 logln( 926 "Special PathHeader not unique: " 927 + visible 928 + "\t" 929 + pathHeader.getOriginalPath() 930 + "\t" 931 + old.getOriginalPath()); 932 } else { 933 errln( 934 "PathHeader not unique: " 935 + visible 936 + "\t" 937 + pathHeader.getOriginalPath() 938 + "\t" 939 + old.getOriginalPath()); 940 } 941 } 942 } 943 } 944 logln(localeID + "\t" + count); 945 } 946 TestContainment()947 public void TestContainment() { 948 Map<String, Map<String, String>> metazoneToRegionToZone = 949 supplemental.getMetazoneToRegionToZone(); 950 Map<String, String> metazoneToContinent = supplemental.getMetazoneToContinentMap(); 951 for (String metazone : metazoneToRegionToZone.keySet()) { 952 Map<String, String> regionToZone = metazoneToRegionToZone.get(metazone); 953 String worldZone = regionToZone.get("001"); 954 String territory = Containment.getRegionFromZone(worldZone); 955 if (territory == null) { 956 territory = "ZZ"; 957 } 958 String cont = Containment.getContinent(territory); 959 int order = Containment.getOrder(territory); 960 String sub = Containment.getSubcontinent(territory); 961 String revision = PathHeader.getMetazonePageTerritory(metazone); 962 String continent = metazoneToContinent.get(metazone); 963 if (continent == null) { 964 continent = "UnknownT"; 965 } 966 // Russia, Antarctica => territory 967 // in Australasia, Asia, S. America => subcontinent 968 // in N. America => N. America (grouping of 3 subcontinents) 969 // in everything else => continent 970 971 if (territory.equals("RU")) { 972 assertEquals("Russia special case", "RU", revision); 973 } else if (territory.equals("US")) { 974 assertEquals("N. America special case", "003", revision); 975 } else if (territory.equals("BR")) { 976 assertEquals("S. America special case", "005", revision); 977 } 978 if (isVerbose()) { 979 String name = english.getName(CLDRFile.TERRITORY_NAME, cont); 980 String name2 = english.getName(CLDRFile.TERRITORY_NAME, sub); 981 String name3 = english.getName(CLDRFile.TERRITORY_NAME, territory); 982 String name4 = english.getName(CLDRFile.TERRITORY_NAME, revision); 983 984 logln( 985 metazone + "\t" + continent + "\t" + name + "\t" + name2 + "\t" + name3 986 + "\t" + order + "\t" + name4); 987 } 988 } 989 } 990 TestZ()991 public void TestZ() { 992 PathStarrer pathStarrer = new PathStarrer(); 993 pathStarrer.setSubstitutionPattern("%A"); 994 995 Set<PathHeader> sorted = new TreeSet<>(); 996 Map<String, String> missing = new TreeMap<>(); 997 Map<String, String> skipped = new TreeMap<>(); 998 Map<String, String> collide = new TreeMap<>(); 999 1000 logln("Traversing Paths"); 1001 for (String path : english) { 1002 PathHeader pathHeader = pathHeaderFactory.fromPath(path); 1003 String value = english.getStringValue(path); 1004 if (pathHeader == null) { 1005 final String starred = pathStarrer.set(path); 1006 missing.put(starred, value + "\t" + path); 1007 continue; 1008 } 1009 if (pathHeader.getSection().equalsIgnoreCase("skip")) { 1010 final String starred = pathStarrer.set(path); 1011 skipped.put(starred, value + "\t" + path); 1012 continue; 1013 } 1014 sorted.add(pathHeader); 1015 } 1016 logln("\nConverted:\t" + sorted.size()); 1017 String lastHeader = ""; 1018 String lastPage = ""; 1019 String lastSection = ""; 1020 List<String> threeLevel = new ArrayList<>(); 1021 Status status = new Status(); 1022 CoverageLevel2 coverageLevel2 = CoverageLevel2.getInstance("en"); 1023 1024 for (PathHeader pathHeader : sorted) { 1025 String original = pathHeader.getOriginalPath(); 1026 if (!original.equals(status.pathWhereFound)) { 1027 continue; 1028 } 1029 if (!lastSection.equals(pathHeader.getSection())) { 1030 logln(""); 1031 threeLevel.add(pathHeader.getSection()); 1032 threeLevel.add("\t" + pathHeader.getPage()); 1033 threeLevel.add("\t\t" + pathHeader.getHeader()); 1034 lastSection = pathHeader.getSection(); 1035 lastPage = pathHeader.getPage(); 1036 lastHeader = pathHeader.getHeader(); 1037 } else if (!lastPage.equals(pathHeader.getPage())) { 1038 logln(""); 1039 threeLevel.add("\t" + pathHeader.getPage()); 1040 threeLevel.add("\t\t" + pathHeader.getHeader()); 1041 lastPage = pathHeader.getPage(); 1042 lastHeader = pathHeader.getHeader(); 1043 } else if (!lastHeader.equals(pathHeader.getHeader())) { 1044 logln(""); 1045 threeLevel.add("\t\t" + pathHeader.getHeader()); 1046 lastHeader = pathHeader.getHeader(); 1047 } 1048 logln( 1049 pathHeader 1050 + "\t" 1051 + coverageLevel2.getLevel(original) 1052 + "\t" 1053 + english.getStringValue(pathHeader.getOriginalPath()) 1054 + "\t" 1055 + pathHeader.getOriginalPath()); 1056 } 1057 if (collide.size() != 0) { 1058 errln("\nCollide:\t" + collide.size()); 1059 for (Entry<String, String> item : collide.entrySet()) { 1060 errln("\t" + item); 1061 } 1062 } 1063 if (missing.size() != 0) { 1064 errln("\nMissing:\t" + missing.size()); 1065 for (Entry<String, String> item : missing.entrySet()) { 1066 errln("\t" + item.getKey() + "\tvalue:\t" + item.getValue()); 1067 } 1068 } 1069 if (skipped.size() != 0) { 1070 errln("\nSkipped:\t" + skipped.size()); 1071 for (Entry<String, String> item : skipped.entrySet()) { 1072 errln("\t" + item); 1073 } 1074 } 1075 Counter<PathHeader.Factory.CounterData> counterData = 1076 pathHeaderFactory.getInternalCounter(); 1077 logln("\nInternal Counter:\t" + counterData.size()); 1078 for (PathHeader.Factory.CounterData item : counterData.keySet()) { 1079 logln( 1080 "\t" 1081 + counterData.getCount(item) 1082 + "\t" 1083 + item.get2() // externals 1084 + "\t" 1085 + item.get3() 1086 + "\t" 1087 + item.get0() // internals 1088 + "\t" 1089 + item.get1()); 1090 } 1091 logln("\nMenus/Headers:\t" + threeLevel.size()); 1092 for (String item : threeLevel) { 1093 logln(item); 1094 } 1095 Relation<SectionId, PageId> s2p = PathHeader.Factory.getSectionIdsToPageIds(); 1096 logln("\nMenus:\t" + s2p.size()); 1097 for (Entry<SectionId, Set<PageId>> sectionAndPages : s2p.keyValuesSet()) { 1098 final SectionId section = sectionAndPages.getKey(); 1099 for (PageId page : sectionAndPages.getValue()) { 1100 logln("\t" + section + "\t" + page); 1101 int count = 0; 1102 for (String path : pathHeaderFactory.filterCldr(section, page, english)) { 1103 count += 1; // just count them. 1104 } 1105 logln("\t" + count); 1106 } 1107 } 1108 } 1109 1110 public static final Set<String> GERMAN_UNIT_ORDER = 1111 ImmutableSet.of( 1112 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]", 1113 "//ldml/units/unitLength[@type=\"short\"]/compoundUnit[@type=\"power2\"]", 1114 "//ldml/units/unitLength[@type=\"narrow\"]/compoundUnit[@type=\"power2\"]", 1115 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]", 1116 "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]", 1117 "//ldml/units/unitLength[@type=\"narrrow\"]/unit[@type=\"volume-liter\"]", 1118 "//ldml/numbers/minimalPairs/caseMinimalPairs", 1119 "//ldml/numbers/minimalPairs/genderMinimalPairs"); 1120 TestOrder()1121 public void TestOrder() { 1122 String[] paths = { 1123 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"narrow\"]/dayPeriod[@type=\"noon\"]", 1124 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"narrow\"]/dayPeriod[@type=\"afternoon1\"]", 1125 }; 1126 PathHeader pathHeaderLast = null; 1127 for (String path : paths) { 1128 PathHeader pathHeader = pathHeaderFactory.fromPath(path); 1129 if (pathHeaderLast != null) { 1130 assertRelation("ordering", true, pathHeaderLast, LEQ, pathHeader); 1131 } 1132 pathHeaderLast = pathHeader; 1133 } 1134 CLDRFile german = factory.make("de", true); 1135 Multimap<PathHeader, String> pathHeaderToPaths = TreeMultimap.create(); 1136 for (String path : german.fullIterable()) { 1137 for (String prefix : GERMAN_UNIT_ORDER) { 1138 if (path.startsWith(prefix)) { 1139 PathHeader pathHeader = pathHeaderFactory.fromPath(path); 1140 pathHeaderToPaths.put(pathHeader, path); 1141 } 1142 } 1143 } 1144 String[] germanExpected = { 1145 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/gender", // Units 1146 // 1147 // Volume 1148 // liter 1149 // 1150 // long-gender 1151 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/displayName", // Units Volume liter long-displayName 1152 "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]/displayName", // Units Volume liter short-displayName 1153 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/perUnitPattern", // Units Volume liter long-per 1154 "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]/perUnitPattern", // Units Volume liter short-per 1155 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"]", // Units Volume liter long-one-nominative 1156 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"][@case=\"accusative\"]", // Units Volume liter long-one-accusative 1157 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"][@case=\"genitive\"]", // Units Volume liter long-one-genitive 1158 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"][@case=\"dative\"]", // Units Volume liter long-one-dative 1159 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"]", // Units Volume liter long-other-nominative 1160 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"][@case=\"accusative\"]", // Units Volume liter long-other-accusative 1161 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"][@case=\"genitive\"]", // Units Volume liter long-other-genitive 1162 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"][@case=\"dative\"]", // Units Volume liter long-other-dative 1163 "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"]", // Units Volume liter short-one-nominative 1164 "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"]", // Units Volume liter short-other-nominative 1165 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"masculine\"]", // Units Compound Units power2 long-one-nominative-masculine 1166 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"]", // Units Compound Units power2 long-one-nominative-feminine 1167 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"]", // Units Compound Units power2 long-one-nominative-dgender 1168 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"masculine\"][@case=\"accusative\"]", // Units Compound Units power2 long-one-accusative-masculine 1169 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"accusative\"]", // Units Compound Units power2 long-one-accusative-feminine 1170 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@case=\"accusative\"]", // Units Compound Units power2 long-one-accusative-dgender 1171 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"masculine\"][@case=\"genitive\"]", // Units Compound Units power2 long-one-genitive-masculine 1172 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"genitive\"]", // Units Compound Units power2 long-one-genitive-feminine 1173 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@case=\"genitive\"]", // Units Compound Units power2 long-one-genitive-dgender 1174 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"masculine\"][@case=\"dative\"]", // Units Compound Units power2 long-one-dative-masculine 1175 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"dative\"]", // Units Compound Units power2 long-one-dative-feminine 1176 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@case=\"dative\"]", // Units Compound Units power2 long-one-dative-dgender 1177 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"masculine\"]", // Units Compound Units power2 long-other-nominative-masculine 1178 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"]", // Units Compound Units power2 long-other-nominative-feminine 1179 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"]", // Units Compound Units power2 long-other-nominative-dgender 1180 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"masculine\"][@case=\"accusative\"]", // Units Compound Units power2 long-other-accusative-masculine 1181 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"][@case=\"accusative\"]", // Units Compound Units power2 long-other-accusative-feminine 1182 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@case=\"accusative\"]", // Units Compound Units power2 long-other-accusative-dgender 1183 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"masculine\"][@case=\"genitive\"]", // Units Compound Units power2 long-other-genitive-masculine 1184 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"][@case=\"genitive\"]", // Units Compound Units power2 long-other-genitive-feminine 1185 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@case=\"genitive\"]", // Units Compound Units power2 long-other-genitive-dgender 1186 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"masculine\"][@case=\"dative\"]", // Units Compound Units power2 long-other-dative-masculine 1187 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"][@case=\"dative\"]", // Units Compound Units power2 long-other-dative-feminine 1188 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@case=\"dative\"]", // Units Compound Units power2 long-other-dative-dgender 1189 "//ldml/units/unitLength[@type=\"short\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"]", // Units Compound Units power2 short-one-nominative-dgender 1190 "//ldml/units/unitLength[@type=\"short\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"]", // Units Compound Units power2 short-other-nominative-dgender 1191 "//ldml/units/unitLength[@type=\"narrow\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"]", // Units Compound Units power2 narrow-one-nominative-dgender 1192 "//ldml/units/unitLength[@type=\"narrow\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"]", // Units Compound Units power2 narrow-other-nominative-dgender 1193 "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"nominative\"]", // Miscellaneous 1194 // Minimal Pairs 1195 // Case 1196 // nominative 1197 "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"accusative\"]", // Miscellaneous 1198 // Minimal Pairs 1199 // Case 1200 // accusative 1201 "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"genitive\"]", // Miscellaneous 1202 // Minimal Pairs 1203 // Case genitive 1204 "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"dative\"]", // Miscellaneous 1205 // Minimal Pairs 1206 // Case dative 1207 "//ldml/numbers/minimalPairs/genderMinimalPairs[@gender=\"masculine\"]", // Miscellaneous Minimal Pairs Gender masculine 1208 "//ldml/numbers/minimalPairs/genderMinimalPairs[@gender=\"feminine\"]", // Miscellaneous 1209 // Minimal 1210 // Pairs 1211 // Gender 1212 // feminine 1213 "//ldml/numbers/minimalPairs/genderMinimalPairs[@gender=\"neuter\"]", // Miscellaneous 1214 // Minimal Pairs 1215 // Gender neuter 1216 1217 // we don't care about order here. 1218 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1", // Special Suppress compound-UnitPattern1-power2 long 1219 "//ldml/units/unitLength[@type=\"narrow\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1", // Special Suppress compound-UnitPattern1-power2 narrow 1220 "//ldml/units/unitLength[@type=\"short\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1", // Special Suppress compound-UnitPattern1-power2 short 1221 }; 1222 1223 int germanExpectedIndex = 0; 1224 int errorCount = 0; 1225 int item = 0; 1226 for (Entry<PathHeader, Collection<String>> entry : pathHeaderToPaths.asMap().entrySet()) { 1227 PathHeader ph = entry.getKey(); 1228 Collection<String> epaths = entry.getValue(); 1229 if (!assertEquals(entry.toString(), 1, epaths.size())) { 1230 ++errorCount; 1231 } 1232 if (!assertEquals( 1233 ++item + ") PathHeader order", 1234 germanExpected[germanExpectedIndex++], 1235 epaths.iterator().next())) { 1236 ++errorCount; 1237 } 1238 } 1239 if (errorCount != 0) { 1240 for (Entry<PathHeader, Collection<String>> entry : 1241 pathHeaderToPaths.asMap().entrySet()) { 1242 PathHeader ph = entry.getKey(); 1243 Collection<String> epaths = entry.getValue(); 1244 System.out.println( 1245 "\"" + epaths.iterator().next().replace("\"", "\\\"") + "\",\t// " + ph); 1246 } 1247 } 1248 } 1249 Test8414()1250 public void Test8414() { 1251 PathDescription pathDescription = 1252 new PathDescription( 1253 supplemental, english, null, null, PathDescription.ErrorHandling.CONTINUE); 1254 1255 String prefix = 1256 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\""; 1257 String suffix = "\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"morning1\"]"; 1258 1259 final String path0 = prefix + "format" + suffix; 1260 final String path1 = prefix + "stand-alone" + suffix; 1261 String v0 = english.getStringValue(path0); 1262 String v1 = english.getStringValue(path1); 1263 String p0 = pathDescription.getDescription(path0, v0, null); 1264 String p1 = pathDescription.getDescription(path1, v1, null); 1265 assertTrue("Check pd for format", p0.contains("in the morning")); 1266 assertTrue("Check pd for stand-alone", !p1.contains("in the morning")); 1267 } 1268 TestCompletenessNonLdmlDtd()1269 public void TestCompletenessNonLdmlDtd() { 1270 PathChecker pathChecker = new PathChecker(); 1271 Set<String> directories = new LinkedHashSet<>(); 1272 Multimap<String, String> pathValuePairs = LinkedListMultimap.create(); 1273 // get all the directories containing non-Ldml dtd files 1274 for (DtdType dtdType : DtdType.values()) { 1275 if (dtdType.getStatus() != DtdType.DtdStatus.active) { 1276 continue; 1277 } 1278 if (dtdType == DtdType.ldml 1279 || dtdType == DtdType.ldmlICU 1280 || dtdType == DtdType.keyboard3 1281 || dtdType == DtdType.keyboardTest3) { 1282 continue; 1283 } 1284 DtdData dtdData = DtdData.getInstance(dtdType); 1285 for (String dir : dtdType.directories) { 1286 if (DEBUG_DTD_TYPE != null && !DEBUG_DTD_TYPE.directories.contains(dir)) { 1287 continue; 1288 } 1289 File dir2 = new File(COMMON_DIR + dir); 1290 logln(dir2.getName()); 1291 for (String file : dir2.list()) { 1292 // don't need to restrict with getFilesToTest(Arrays.asList(dir2.list()), 1293 // "root", "en")) { 1294 if (!file.endsWith(".xml")) { 1295 continue; 1296 } 1297 if (DEBUG) warnln(" TestCompletenessNonLdmlDtd: " + dir + ", " + file); 1298 logln(" \t" + file); 1299 for (Pair<String, String> pathValue : 1300 XMLFileReader.loadPathValues( 1301 dir2 + "/" + file, 1302 new ArrayList<Pair<String, String>>(), 1303 true)) { 1304 final String path = pathValue.getFirst(); 1305 final String value = pathValue.getSecond(); 1306 // logln("\t\t" + path); 1307 if (path.startsWith("//supplementalData/unitPreferenceData/unitPreferences") 1308 && path.contains("skeleton")) { 1309 int debug = 0; 1310 } 1311 pathChecker.checkPathHeader(dtdData, path); 1312 } 1313 } 1314 } 1315 } 1316 if (!pathChecker.badHeaders.isEmpty()) { 1317 System.out.println("For help with DTD updates: " + CLDRURLS.CLDR_UPDATINGDTD_URL); 1318 } 1319 } 1320 1321 private class PathChecker { 1322 PathHeader.Factory phf = pathHeaderFactory; 1323 PathStarrer starrer = new PathStarrer().setSubstitutionPattern("%A"); 1324 1325 Set<String> badHeaders = new TreeSet<>(); 1326 Map<PathHeader, PathHeader> goodHeaders = new HashMap<>(); 1327 Set<PathHeader> seenBad = new HashSet<>(); 1328 1329 { phf.clearCache()1330 phf.clearCache(); 1331 } 1332 checkPathHeader(DtdData dtdData, String rawPath)1333 public void checkPathHeader(DtdData dtdData, String rawPath) { 1334 XPathParts pathPlain = XPathParts.getFrozenInstance(rawPath); 1335 if (dtdData.isMetadata(pathPlain)) { 1336 return; 1337 } 1338 if (dtdData.isDeprecated(pathPlain)) { 1339 return; 1340 } 1341 Multimap<String, String> extras = HashMultimap.create(); 1342 Set<String> fixedPaths = dtdData.getRegularizedPaths(pathPlain, extras); 1343 if (fixedPaths != null) { 1344 for (String fixedPath : fixedPaths) { 1345 checkSubpath(fixedPath); 1346 } 1347 } 1348 for (String path : extras.keySet()) { 1349 checkSubpath(path); 1350 } 1351 } 1352 checkSubpath(String path)1353 public void checkSubpath(String path) { 1354 String message = ": Can't compute path header"; 1355 if (path.contentEquals( 1356 "//supplementalData/grammaticalData/grammaticalFeatures[@targets=\"nominal\"][@locales=\"it\"]/grammaticalGender/_values")) { 1357 int debug = 0; 1358 } 1359 PathHeader ph = null; 1360 try { 1361 ph = phf.fromPath(path); 1362 if (seenBad.contains(ph)) { 1363 return; 1364 } 1365 if (ph.getPageId() == PageId.Deprecated) { 1366 return; // don't care 1367 } 1368 if (ph.getPageId() != PageId.Unknown) { 1369 PathHeader old = goodHeaders.put(ph, ph); 1370 if (old != null && !path.equals(old.getOriginalPath())) { 1371 errln( 1372 "Duplicate path header for: " 1373 + ph 1374 + "\n\t\t " 1375 + path 1376 + "\n\t\t≠" 1377 + old.getOriginalPath()); 1378 seenBad.add(ph); 1379 } 1380 return; 1381 } 1382 // for debugging 1383 phf.clearCache(); 1384 List<String> failures = new ArrayList<>(); 1385 ph = phf.fromPath(path, failures); 1386 message = ": Unknown path header" + failures; 1387 } catch (Exception e) { 1388 message = ": Exception in path header: " + e.getMessage(); 1389 } 1390 String star = starrer.set(path); 1391 if (badHeaders.add(star)) { 1392 errln(star + message + ", " + ph); 1393 System.out.println( 1394 "\tNo match in PathHeader.txt for " 1395 + path 1396 + "\n\tYou get only one message for all paths matching " 1397 + star 1398 + "\n\tFor example, check to see if the field in PathHeader.txt is in PathHeader.PageId." 1399 + "\n\tIf not, either correct PathHeader.txt or add it to PageId" 1400 + "\n\tIf you have a value attribute, you will need extra _ characters. The value attribute will show at the end with prefixed _, eg [...]/_skeleton." 1401 + "If there can be a value for the path then that element will add _. "); 1402 } 1403 } 1404 } 1405 TestSupplementalItems()1406 public void TestSupplementalItems() { 1407 // <weekOfPreference ordering="weekOfYear weekOfMonth" locales="am az bs cs cy da el et 1408 // hi ky lt mk sk ta th"/> 1409 // logln(pathHeaderFactory.getRegexInfo()); 1410 CLDRFile supplementalFile = 1411 CLDRConfig.getInstance().getSupplementalFactory().make("supplementalData", false); 1412 List<String> failures = new ArrayList<>(); 1413 Multimap<String, String> pathValuePairs = LinkedListMultimap.create(); 1414 for (String test : With.in(supplementalFile.iterator("//supplementalData/weekData"))) { 1415 failures.clear(); 1416 XPathParts parts = XPathParts.getFrozenInstance(supplementalFile.getFullXPath(test)); 1417 supplementalFile.getDtdData().getRegularizedPaths(parts, pathValuePairs); 1418 for (Entry<String, Collection<String>> entry : pathValuePairs.asMap().entrySet()) { 1419 final String normalizedPath = entry.getKey(); 1420 final Collection<String> normalizedValue = entry.getValue(); 1421 PathHeader ph = pathHeaderFactory.fromPath(normalizedPath, failures); 1422 if (ph == null || ph.getSectionId() == SectionId.Special) { 1423 errln( 1424 "Failure with " 1425 + test 1426 + " => " 1427 + normalizedPath 1428 + " = " 1429 + normalizedValue); 1430 } else { 1431 logln(ph + "\t" + test + " = " + normalizedValue); 1432 } 1433 } 1434 } 1435 } 1436 test10232()1437 public void test10232() { 1438 String[][] tests = { 1439 {"MMM", "Formats - Flexible - Date Formats"}, 1440 {"dMM", "Formats - Flexible - Date Formats"}, 1441 {"h", "Formats - Flexible - 12 Hour Time Formats"}, 1442 {"hm", "Formats - Flexible - 12 Hour Time Formats"}, 1443 {"Ehm", "Formats - Flexible - 12 Hour Time Formats"}, 1444 {"H", "Formats - Flexible - 24 Hour Time Formats"}, 1445 {"Hm", "Formats - Flexible - 24 Hour Time Formats"}, 1446 {"EHm", "Formats - Flexible - 24 Hour Time Formats"}, 1447 }; 1448 for (String[] test : tests) { 1449 String path = 1450 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"" 1451 + test[0] 1452 + "\"]"; 1453 PathHeader pathHeader = pathHeaderFactory.fromPath(path); 1454 assertEquals( 1455 "flexible formats", 1456 test[1] + "|" + test[0], 1457 pathHeader.getHeader() + "|" + pathHeader.getCode()); 1458 } 1459 } 1460 1461 // Moved from TestAnnotations and generalized testPathHeaderSize()1462 public void testPathHeaderSize() { 1463 String locale = "ar"; // choose one with lots of plurals 1464 int maxSize = 1250; 1465 boolean showTable = false; // only printed if test fails or verbose 1466 1467 Factory factory = CLDRConfig.getInstance().getCommonAndSeedAndMainAndAnnotationsFactory(); 1468 CLDRFile english = factory.make(locale, true); 1469 1470 PathHeader.Factory phf = PathHeader.getFactory(CLDRConfig.getInstance().getEnglish()); 1471 Counter<PageId> counterPageId = new Counter<>(); 1472 Counter<PageId> counterPageIdAll = new Counter<>(); 1473 for (String path : english) { 1474 Level level = 1475 CLDRConfig.getInstance() 1476 .getSupplementalDataInfo() 1477 .getCoverageLevel(path, locale); 1478 PathHeader ph = phf.fromPath(path); 1479 if (level.compareTo(Level.MODERN) <= 0) { 1480 counterPageId.add(ph.getPageId(), 1); 1481 } 1482 counterPageIdAll.add(ph.getPageId(), 1); 1483 } 1484 Set<R2<Long, PageId>> entrySetSortedByCount = 1485 counterPageId.getEntrySetSortedByCount(false, null); 1486 for (R2<Long, PageId> sizeAndPageId : entrySetSortedByCount) { 1487 long size = sizeAndPageId.get0(); 1488 PageId pageId = sizeAndPageId.get1(); 1489 if (!assertTrue( 1490 pageId.getSectionId() 1491 + "/" 1492 + pageId 1493 + " size (" 1494 + size 1495 + ") < " 1496 + maxSize 1497 + "?", 1498 size < maxSize)) { 1499 showTable = true; 1500 } 1501 // System.out.println(pageId + "\t" + size); 1502 } 1503 if (showTable || isVerbose()) { 1504 for (R2<Long, PageId> sizeAndPageId : entrySetSortedByCount) { 1505 PageId pageId = sizeAndPageId.get1(); 1506 System.out.println( 1507 pageId.getSectionId() 1508 + "\t" 1509 + pageId 1510 + "\t" 1511 + sizeAndPageId.get0() 1512 + "\t" 1513 + counterPageIdAll.get(pageId)); 1514 } 1515 } 1516 } 1517 TestCLDR_11454()1518 public void TestCLDR_11454() { 1519 PathHeader.Factory phf = PathHeader.getFactory(); 1520 PathHeader century = 1521 phf.fromPath( 1522 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"duration-century\"]/displayName"); 1523 PathHeader decade = 1524 phf.fromPath( 1525 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"duration-decade\"]/displayName"); 1526 assertEquals("Section", century.getSectionId(), decade.getSectionId()); 1527 assertEquals("Page", century.getPageId(), decade.getPageId()); 1528 } 1529 TestEmojiOrder()1530 public void TestEmojiOrder() { 1531 PathHeader.Factory phf = PathHeader.getFactory(); 1532 String[] desiredOrder = { 1533 "⚕", "⚕", "⚕", 1534 "⚖", "⚖", "⚖" 1535 }; 1536 List<PathHeader> pathHeaders = new ArrayList<>(); 1537 for (String emoji : desiredOrder) { 1538 String base = "//ldml/annotations/annotation[@cp=\"" + emoji + "\"]"; 1539 pathHeaders.add(phf.fromPath(base + "[@type=\"tts\"]")); 1540 pathHeaders.add(phf.fromPath(base)); 1541 logln( 1542 emoji 1543 + ": getEmojiMinorOrder=" 1544 + Emoji.getEmojiMinorOrder(Emoji.getMinorCategory(emoji)) 1545 + ", getEmojiToOrder=" 1546 + Emoji.getEmojiToOrder(emoji)); 1547 } 1548 PathHeader lastItem = null; 1549 for (PathHeader item : pathHeaders) { 1550 if (lastItem != null) { 1551 assertEquals("Section", lastItem.getSectionId(), item.getSectionId()); 1552 assertEquals("Page", lastItem.getPageId(), item.getPageId()); 1553 assertEquals("Header", lastItem.getHeader(), item.getHeader()); 1554 if (!assertTrue(lastItem + " < " + item, lastItem.compareTo(item) < 0)) { 1555 lastItem.compareTo(item); // for debugging 1556 } 1557 } 1558 lastItem = item; 1559 } 1560 } 1561 TestQuotes()1562 public void TestQuotes() { 1563 // quotes should never appear in result 1564 PathHeader.Factory phf = PathHeader.getFactory(); 1565 String[] tests = { 1566 "//supplementalData/plurals[@type=\"ordinal\"]/pluralRules[@locales=\"ig\"]/pluralRule[@count=\"other\"]", 1567 "//supplementalData/transforms/transform[@source=\"und-Khmr\"][@target=\"und-Latn\"]" 1568 }; 1569 for (String test : tests) { 1570 PathHeader trial = phf.fromPath(test); 1571 assertEquals("No quotes in pathheader", false, trial.toString().contains("\"")); 1572 } 1573 } 1574 /** 1575 * Make sure that the PathHeader sort order is consistent with the grammatical feature orders 1576 * "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/displayName" 1577 * //ldml/units/unitLength[@type=\long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"][@case=\"genitive\"]", 1578 * //ldml/units/unitLength[@type=\long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"accusative\"]", 1579 */ TestUnitOrder()1580 public void TestUnitOrder() { 1581 PathHeader.Factory phf = PathHeader.getFactory(); 1582 List<PathHeader> expectedOrder = new ArrayList<>(); 1583 List<Width> widths = Arrays.asList(Width.LONG, Width.SHORT, Width.NARROW); 1584 List<CaseValues> cases = Arrays.asList(GrammarInfo.CaseValues.values()).subList(0, 3); 1585 List<GenderValues> genders = Arrays.asList(GrammarInfo.GenderValues.values()).subList(0, 3); 1586 1587 for (Width width : widths) { 1588 String path = 1589 "//ldml/units/unitLength[@type=\"" 1590 + width 1591 + "\"]/unit[@type=\"length-meter\"]/displayName"; 1592 expectedOrder.add(phf.fromPath(path)); 1593 } 1594 1595 for (Width width : widths) { 1596 for (Count count : Count.values()) { 1597 for (GrammarInfo.CaseValues gCase : cases) { 1598 if (width != Width.LONG && gCase != CaseValues.nominative) { 1599 break; 1600 } 1601 String path = 1602 "//ldml/units/unitLength[@type=\"" 1603 + width 1604 + "\"]/unit[@type=\"length-meter\"]/unitPattern[@count=\"" 1605 + count 1606 + (gCase == CaseValues.nominative ? "" : "\"][@case=\"" + gCase) 1607 + "\"]"; 1608 expectedOrder.add(phf.fromPath(path)); 1609 } 1610 } 1611 } 1612 for (Width width : widths) { 1613 for (Count count : Count.values()) { 1614 for (GrammarInfo.CaseValues gCase : cases) { 1615 if (width != Width.LONG && gCase != CaseValues.nominative) { 1616 break; 1617 } 1618 for (GrammarInfo.GenderValues gGender : genders) { 1619 if (width != Width.LONG && gGender != GenderValues.neuter) { 1620 break; 1621 } 1622 String path = 1623 "//ldml/units/unitLength[@type=\"" 1624 + width 1625 + "\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"" 1626 + count 1627 + (gGender == GenderValues.neuter 1628 ? "" 1629 : "\"][@gender=\"" + gGender) 1630 + (gCase == CaseValues.nominative 1631 ? "" 1632 : "\"][@case=\"" + gCase) 1633 + "\"]"; 1634 expectedOrder.add(phf.fromPath(path)); 1635 } 1636 } 1637 } 1638 } 1639 for (Count count : Count.values()) { 1640 String path = 1641 "//ldml/numbers/minimalPairs/ordinalMinimalPairs[@ordinal=\"" + count + "\"]"; 1642 expectedOrder.add(phf.fromPath(path)); 1643 } 1644 for (Count count : Count.values()) { 1645 String path = 1646 "//ldml/numbers/minimalPairs/pluralMinimalPairs[@count=\"" + count + "\"]"; 1647 expectedOrder.add(phf.fromPath(path)); 1648 } 1649 for (GrammarInfo.CaseValues gCase : cases) { 1650 String path = "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"" + gCase + "\"]"; 1651 expectedOrder.add(phf.fromPath(path)); 1652 } 1653 for (GrammarInfo.GenderValues gGender : genders) { 1654 String path = 1655 "//ldml/numbers/minimalPairs/genderMinimalPairs[@gender=\"" + gGender + "\"]"; 1656 expectedOrder.add(phf.fromPath(path)); 1657 } 1658 1659 PathHeader last = null; 1660 int item = 0; 1661 int errorCount = 0; 1662 for (PathHeader pathHeader : expectedOrder) { 1663 if (last != null) { 1664 if (!assertTrue( 1665 ++item + ")\t" + last + "\t<\t" + pathHeader, 1666 last.compareTo(pathHeader) < 0)) { 1667 errorCount++; 1668 last.compareTo(pathHeader); 1669 } 1670 } 1671 last = pathHeader; 1672 } 1673 if (errorCount != 0 || isVerbose()) { 1674 for (PathHeader pathHeader : expectedOrder) { 1675 System.out.println( 1676 "\"" 1677 + pathHeader.getOriginalPath().replace("\"", "\\\"") 1678 + "\",\t// " 1679 + pathHeader); 1680 } 1681 } 1682 } 1683 testPageSize()1684 public void testPageSize() { 1685 final long minError = 946; // above this, emit error 1686 final long minLog = 700; // otherwise above this, emit warning 1687 Factory factory = CLDRConfig.getInstance().getCommonAndSeedAndMainAndAnnotationsFactory(); 1688 List<String> locales = 1689 StandardCodes.make() 1690 .getLocaleCoverageLocales(Organization.cldr, ImmutableSet.of(Level.MODERN)) 1691 .stream() 1692 .filter(x -> CLDRLocale.getInstance(x).getCountry().isEmpty()) 1693 .collect(Collectors.toUnmodifiableList()); 1694 List<Counter<PageId>> counters = new ArrayList<>(); 1695 final String thresholdExplanation = "log/error thresholds are " + minLog + "/" + minError; 1696 for (String locale : locales) { 1697 CLDRFile cldrFile = factory.make(locale, false); 1698 PathHeader.Factory phf = PathHeader.getFactory(); 1699 Counter<PageId> c = new Counter<>(); 1700 counters.add(c); 1701 for (String path : cldrFile) { 1702 PathHeader ph = phf.fromPath(path); 1703 c.add(ph.getPageId(), 1); 1704 } 1705 for (PageId entry : c.getKeysetSortedByKey()) { 1706 long count = c.getCount(entry); 1707 if (count > minLog) { 1708 final String message = 1709 String.format( 1710 "%s\t%s\t%s\thas too many entries:\t%d\t(%s)", 1711 locale, 1712 entry.getSectionId().toString(), 1713 entry, 1714 count, 1715 thresholdExplanation); 1716 if (count > minError) { 1717 errln(message); 1718 } else { 1719 warnln(message); 1720 } 1721 } 1722 } 1723 } 1724 if (isVerbose()) { 1725 System.out.println(); 1726 Set<PageId> sorted = new TreeSet<>(); 1727 for (Counter<PageId> counter : counters) { 1728 sorted.addAll(counter.keySet()); 1729 } 1730 int i = 0; 1731 System.out.print("Order" + "\t" + "Section" + "\t" + "Page"); 1732 for (String c : locales) { 1733 System.out.print("\t" + c); 1734 } 1735 System.out.println(); 1736 1737 for (PageId entry : sorted) { 1738 System.out.print(++i + "\t" + entry.getSectionId() + "\t" + entry); 1739 for (Counter<PageId> c : counters) { 1740 System.out.print("\t" + c.get(entry)); 1741 } 1742 System.out.println(); 1743 } 1744 } 1745 } 1746 } 1747