1 package org.unicode.cldr.unittest; 2 3 import java.io.File; 4 import java.util.ArrayList; 5 import java.util.Arrays; 6 import java.util.Collection; 7 import java.util.EnumMap; 8 import java.util.EnumSet; 9 import java.util.HashMap; 10 import java.util.HashSet; 11 import java.util.LinkedHashMap; 12 import java.util.LinkedHashSet; 13 import java.util.List; 14 import java.util.Map; 15 import java.util.Map.Entry; 16 import java.util.Set; 17 import java.util.TreeMap; 18 import java.util.TreeSet; 19 import java.util.regex.Matcher; 20 21 import org.unicode.cldr.test.CoverageLevel2; 22 import org.unicode.cldr.test.ExampleGenerator; 23 import org.unicode.cldr.util.CLDRConfig; 24 import org.unicode.cldr.util.CLDRFile; 25 import org.unicode.cldr.util.CLDRFile.Status; 26 import org.unicode.cldr.util.CLDRPaths; 27 import org.unicode.cldr.util.CldrUtility; 28 import org.unicode.cldr.util.Containment; 29 import org.unicode.cldr.util.Counter; 30 import org.unicode.cldr.util.DtdData; 31 import org.unicode.cldr.util.DtdType; 32 import org.unicode.cldr.util.Emoji; 33 import org.unicode.cldr.util.Factory; 34 import org.unicode.cldr.util.LanguageTagParser; 35 import org.unicode.cldr.util.Level; 36 import org.unicode.cldr.util.Organization; 37 import org.unicode.cldr.util.Pair; 38 import org.unicode.cldr.util.PathDescription; 39 import org.unicode.cldr.util.PathHeader; 40 import org.unicode.cldr.util.PathHeader.PageId; 41 import org.unicode.cldr.util.PathHeader.SectionId; 42 import org.unicode.cldr.util.PathHeader.SurveyToolStatus; 43 import org.unicode.cldr.util.PathStarrer; 44 import org.unicode.cldr.util.PatternCache; 45 import org.unicode.cldr.util.PatternPlaceholders; 46 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderInfo; 47 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderStatus; 48 import org.unicode.cldr.util.StandardCodes; 49 import org.unicode.cldr.util.SupplementalDataInfo; 50 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 51 import org.unicode.cldr.util.SupplementalDataInfo.PluralType; 52 import org.unicode.cldr.util.With; 53 import org.unicode.cldr.util.XMLFileReader; 54 import org.unicode.cldr.util.XPathParts; 55 56 import com.google.common.base.Joiner; 57 import com.google.common.collect.HashMultimap; 58 import com.google.common.collect.ImmutableSet; 59 import com.google.common.collect.LinkedListMultimap; 60 import com.google.common.collect.Multimap; 61 import com.google.common.collect.TreeMultimap; 62 import com.ibm.icu.impl.Relation; 63 import com.ibm.icu.impl.Row; 64 import com.ibm.icu.impl.Row.R2; 65 66 public class TestPathHeader extends TestFmwkPlus { 67 private static final DtdType DEBUG_DTD_TYPE = null; // DtdType.supplementalData; 68 private static final String COMMON_DIR = CLDRPaths.BASE_DIRECTORY + "common/"; 69 private static final boolean DEBUG = false; 70 main(String[] args)71 public static void main(String[] args) { 72 new TestPathHeader().run(args); 73 } 74 75 static final CLDRConfig info = CLDRConfig.getInstance(); 76 static final Factory factory = info.getCommonAndSeedAndMainAndAnnotationsFactory(); 77 static final CLDRFile english = factory.make("en", true); 78 static final SupplementalDataInfo supplemental = info 79 .getSupplementalDataInfo(); 80 static PathHeader.Factory pathHeaderFactory = PathHeader 81 .getFactory(english); 82 private EnumSet<PageId> badZonePages = EnumSet.of(PageId.UnknownT); 83 tempTestAnnotation()84 public void tempTestAnnotation() { 85 // NEW: <annotation cp="">face | grin</annotation> 86 // <annotation cp="" type="tts">grinning face</annotation> 87 88 final String path1 = "//ldml/annotations/annotation[@cp=\"\"]"; 89 PathHeader ph1 = pathHeaderFactory.fromPath(path1); 90 logln(ph1.toString() + "\t" + path1); 91 final String path2 = "//ldml/annotations/annotation[@cp=\"\"][@type=\"tts\"]"; 92 PathHeader ph2 = pathHeaderFactory.fromPath(path2); 93 logln(ph2.toString() + "\t" + path2); 94 final String path3 = "//ldml/annotations/annotation[@cp=\"\"]"; 95 PathHeader ph3 = pathHeaderFactory.fromPath(path2); 96 logln(ph3.toString() + "\t" + path3); 97 98 assertNotEquals("pathheader", ph1, ph2); 99 assertNotEquals("pathheader", ph1.toString(), ph2.toString()); 100 assertRelation("pathheader", true, ph1, TestFmwkPlus.LEQ, ph3); 101 assertRelation("pathheader", true, ph3, TestFmwkPlus.LEQ, ph2); 102 } 103 104 static final String[] MIN_LOCALES = {"root", "en", "de", "ru", "ko"}; // choose locales with range of case/gender structures 105 tempTestCompletenessLdmlDtd()106 public void tempTestCompletenessLdmlDtd() { 107 // List<String> failures = null; 108 pathHeaderFactory.clearCache(); 109 PathChecker pathChecker = new PathChecker(); 110 for (String directory : DtdType.ldml.directories) { 111 Factory factory2 = CLDRConfig.getInstance().getMainAndAnnotationsFactory(); 112 Set<String> source = factory2.getAvailable(); 113 for (String file : getFilesToTest(source, MIN_LOCALES)) { 114 if (DEBUG) warnln(" TestCompletenessLdmlDtd: " + directory + ", " + file); 115 DtdData dtdData = null; 116 CLDRFile cldrFile = factory2.make(file, true); 117 for (String path : cldrFile.fullIterable()) { 118 pathChecker.checkPathHeader(cldrFile.getDtdData(), path); 119 } 120 } 121 } 122 Set<String> missing = pathHeaderFactory.getUnmatchedRegexes(); 123 if (missing.size() != 0) { 124 for (String e : missing) { 125 errln("Path Regex never matched:\t" + e); 126 } 127 } 128 } 129 getFilesToTest(Collection<String> source, String... doFirst)130 private Collection<String> getFilesToTest(Collection<String> source, String... doFirst) { 131 LinkedHashSet<String> files = new LinkedHashSet<>(Arrays.asList(doFirst)); 132 files.retainAll(source); // put first 133 files.addAll(new HashSet<>(source)); // now add others semi-randomly 134 int max = Math.min(30, files.size()); 135 if (getInclusion() == 10 || files.size() <= max) { 136 return files; 137 } 138 ArrayList<String> shortFiles = new ArrayList<>(files); 139 if (getInclusion() > 5) { 140 max += (files.size() - 30) * (getInclusion() - 5) / 10; // use proportional amount 141 } 142 return shortFiles.subList(0, max); 143 } 144 TestCompleteness()145 public void TestCompleteness() { 146 PathHeader.Factory pathHeaderFactory2 = PathHeader.getFactory(english); 147 // List<String> failures = null; 148 pathHeaderFactory2.clearCache(); 149 Multimap<PathHeader.PageId, PathHeader.SectionId> pageUniqueness = TreeMultimap.create(); 150 Multimap<String, Pair<PathHeader.SectionId, PathHeader.PageId>> headerUniqueness = TreeMultimap.create(); 151 Set<String> toTest; 152 switch (getInclusion()) { 153 default: 154 toTest = StandardCodes.make().getLocaleCoverageLocales(Organization.cldr); 155 break; 156 case 10: 157 toTest = factory.getAvailable(); 158 break; 159 } 160 toTest = ImmutableSet.<String> builder().add("en").addAll(toTest).build(); 161 Set<String> seenPaths = new HashSet<>(); 162 Set<String> localSeenPaths = new TreeSet<>(); 163 for (String locale : toTest) { 164 localSeenPaths.clear(); 165 for (String p : factory.make(locale, true).fullIterable()) { 166 if (p.startsWith("//ldml/identity/")) { 167 continue; 168 } 169 if (seenPaths.contains(p)) { 170 continue; 171 } 172 seenPaths.add(p); 173 localSeenPaths.add(p); 174 // if (p.contains("symbol[@alt") && failures == null) { 175 // PathHeader result = pathHeaderFactory2.fromPath(p, failures = new 176 // ArrayList<String>()); 177 // logln("Matching " + p + ": " + result + "\t" + 178 // result.getSurveyToolStatus()); 179 // for (String failure : failures) { 180 // logln("\t" + failure); 181 // } 182 // } 183 PathHeader ph; 184 try { 185 ph = pathHeaderFactory2.fromPath(p); 186 } catch (Exception e1) { 187 try { 188 ph = pathHeaderFactory2.fromPath(p); 189 } catch (Exception e2) { 190 throw new IllegalArgumentException(locale + ":\t" + p, e2); 191 } 192 } 193 if (ph == null) { 194 errln("Failed to create path from: " + p); 195 continue; 196 } 197 final SectionId sectionId = ph.getSectionId(); 198 if (sectionId != SectionId.Special) { 199 pageUniqueness.put(ph.getPageId(), sectionId); 200 headerUniqueness.put(ph.getHeader(), new Pair<>(sectionId, ph.getPageId())); 201 } 202 } 203 if (!localSeenPaths.isEmpty()) { 204 logln(locale + ": checked " + localSeenPaths.size() + " new paths"); 205 } 206 } 207 Set<String> missing = pathHeaderFactory2.getUnmatchedRegexes(); 208 if (missing.size() != 0) { 209 for (String e : missing) { 210 if (e.contains("//ldml/")) { 211 if (e.contains("//ldml/rbnf/") || e.contains("//ldml/segmentations/") || e.contains("//ldml/collations/")) { 212 continue; 213 } 214 logln("Path Regex never matched:\t" + e); 215 } 216 } 217 } 218 219 for (Entry<PageId, Collection<SectionId>> e : pageUniqueness.asMap().entrySet()) { 220 Collection<SectionId> values = e.getValue(); 221 if (values.size() != 1) { 222 warnln("Duplicate page in section: " + CldrUtility.toString(e)); 223 } 224 } 225 226 for (Entry<String, Collection<Pair<SectionId, PageId>>> e : headerUniqueness.asMap().entrySet()) { 227 Collection<Pair<SectionId, PageId>> values = e.getValue(); 228 if (values.size() != 1) { 229 warnln("Duplicate header in (section,page): " + CldrUtility.toString(e)); 230 } 231 } 232 } 233 Test6170()234 public void Test6170() { 235 String p1 = "//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"speed-kilometer-per-hour\"]/unitPattern[@count=\"other\"]"; 236 String p2 = "//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"area-square-meter\"]/unitPattern[@count=\"other\"]"; 237 PathHeader ph1 = pathHeaderFactory.fromPath(p1); 238 PathHeader ph2 = pathHeaderFactory.fromPath(p2); 239 int comp12 = ph1.compareTo(ph2); 240 int comp21 = ph2.compareTo(ph1); 241 assertEquals("comp ph", comp12, -comp21); 242 } 243 TestVariant()244 public void TestVariant() { 245 PathHeader p1 = pathHeaderFactory 246 .fromPath("//ldml/localeDisplayNames/languages/language[@type=\"ug\"][@alt=\"variant\"]"); 247 PathHeader p2 = pathHeaderFactory 248 .fromPath("//ldml/localeDisplayNames/languages/language[@type=\"ug\"]"); 249 assertNotEquals("variants", p1, p2); 250 assertNotEquals("variants", p1.toString(), p2.toString()); 251 // Code Lists Languages Arabic Script ug-variant 252 } 253 Test4587()254 public void Test4587() { 255 String test = "//ldml/dates/timeZoneNames/metazone[@type=\"Pacific/Wallis\"]/short/standard"; 256 PathHeader ph = pathHeaderFactory.fromPath(test); 257 if (ph == null) { 258 errln("Failure with " + test); 259 } else { 260 logln(ph + "\t" + test); 261 } 262 } 263 TestMiscPatterns()264 public void TestMiscPatterns() { 265 String test = "//ldml/numbers/miscPatterns[@numberSystem=\"arab\"]/pattern[@type=\"atLeast\"]"; 266 PathHeader ph = pathHeaderFactory.fromPath(test); 267 assertNotNull("MiscPatterns path not found", ph); 268 if (false) 269 System.out.println(english.getStringValue(test)); 270 } 271 TestPluralOrder()272 public void TestPluralOrder() { 273 Set<PathHeader> sorted = new TreeSet<>(); 274 for (String locale : new String[] { "ru", "ar", "ja" }) { 275 sorted.clear(); 276 CLDRFile cldrFile = info.getCLDRFile(locale, true); 277 CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(locale); 278 for (String path : cldrFile.fullIterable()) { 279 if (!path.contains("@count")) { 280 continue; 281 } 282 Level level = coverageLevel.getLevel(path); 283 if (Level.MODERN.compareTo(level) < 0) { 284 continue; 285 } 286 PathHeader p = pathHeaderFactory.fromPath(path); 287 sorted.add(p); 288 } 289 for (PathHeader p : sorted) { 290 logln(locale + "\t" + p + "\t" + p.getOriginalPath()); 291 } 292 } 293 } 294 295 static final String APPEND_TIMEZONE = "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/appendItems/appendItem[@request=\"Timezone\"]"; 296 static final String APPEND_TIMEZONE_END = "/dateTimeFormats/appendItems/appendItem[@request=\"Timezone\"]"; 297 static final String BEFORE_PH = "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"ms\"]"; 298 static final String AFTER_PH = "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"d\"]/greatestDifference[@id=\"d\"]"; 299 TestAppendTimezone()300 public void TestAppendTimezone() { 301 CLDRFile cldrFile = info.getEnglish(); 302 CoverageLevel2 coverageLevel = CoverageLevel2.getInstance("en"); 303 assertEquals("appendItem:Timezone", Level.MODERATE, 304 coverageLevel.getLevel(APPEND_TIMEZONE)); 305 306 PathHeader ph = pathHeaderFactory.fromPath(APPEND_TIMEZONE); 307 assertEquals("appendItem:Timezone pathheader", "Timezone", ph.getCode()); 308 // check that they are in the right place (they weren't before!) 309 PathHeader phBefore = pathHeaderFactory.fromPath(BEFORE_PH); 310 PathHeader phAfter = pathHeaderFactory.fromPath(AFTER_PH); 311 assertTrue(phBefore, LEQ, ph); 312 assertTrue(ph, LEQ, phAfter); 313 314 PathDescription pathDescription = new PathDescription(supplemental, 315 english, null, null, PathDescription.ErrorHandling.CONTINUE); 316 String description = pathDescription.getDescription(APPEND_TIMEZONE, 317 "tempvalue", null, null); 318 assertTrue("appendItem:Timezone pathDescription", 319 description.contains("“Timezone”")); 320 321 PatternPlaceholders patternPlaceholders = PatternPlaceholders 322 .getInstance(); 323 PlaceholderStatus status = patternPlaceholders 324 .getStatus(APPEND_TIMEZONE); 325 assertEquals("appendItem:Timezone placeholders", 326 PlaceholderStatus.REQUIRED, status); 327 328 Map<String, PlaceholderInfo> placeholderInfo = patternPlaceholders 329 .get(APPEND_TIMEZONE); 330 PlaceholderInfo placeholderInfo2 = placeholderInfo.get("{1}"); 331 if (assertNotNull("appendItem:Timezone placeholders", placeholderInfo2)) { 332 assertEquals("appendItem:Timezone placeholders", 333 "APPEND_FIELD_FORMAT", placeholderInfo2.name); 334 assertEquals("appendItem:Timezone placeholders", "Pacific Time", 335 placeholderInfo2.example); 336 } 337 ExampleGenerator eg = new ExampleGenerator(cldrFile, cldrFile, CLDRPaths.SUPPLEMENTAL_DIRECTORY); 338 String example = eg.getExampleHtml(APPEND_TIMEZONE, cldrFile.getStringValue(APPEND_TIMEZONE)); 339 String result = ExampleGenerator.simplify(example, false); 340 assertEquals("", "〖❬6:25:59 PM❭ ❬GMT❭〗", result); 341 } 342 TestOptional()343 public void TestOptional() { 344 if (true) return; 345 Map<PathHeader, String> sorted = new TreeMap<>(); 346 for (String locale : new String[] { "af" }) { 347 sorted.clear(); 348 CLDRFile cldrFile = info.getCLDRFile(locale, true); 349 CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(locale); 350 for (String path : cldrFile.fullIterable()) { 351 // if (!path.contains("@count")) { 352 // continue; 353 // } 354 Level level = coverageLevel.getLevel(path); 355 if (supplemental.isDeprecated(DtdType.ldml, path)) { 356 continue; 357 } 358 359 if (Level.OPTIONAL.compareTo(level) != 0) { 360 continue; 361 } 362 363 PathHeader p = pathHeaderFactory.fromPath(path); 364 final SurveyToolStatus status = p.getSurveyToolStatus(); 365 if (status == SurveyToolStatus.DEPRECATED) { 366 continue; 367 } 368 sorted.put( 369 p, 370 locale + "\t" + status + "\t" + p + "\t" 371 + p.getOriginalPath()); 372 } 373 Set<String> codes = new LinkedHashSet<>(); 374 PathHeader old = null; 375 String line = null; 376 for (Entry<PathHeader, String> s : sorted.entrySet()) { 377 PathHeader p = s.getKey(); 378 String v = s.getValue(); 379 if (old == null) { 380 line = v; 381 codes.add(p.getCode()); 382 } else if (p.getSectionId() == old.getSectionId() 383 && p.getPageId() == old.getPageId() 384 && p.getHeader().equals(old.getHeader())) { 385 codes.add(p.getCode()); 386 } else { 387 logln(line + "\t" + codes.toString()); 388 codes.clear(); 389 line = v; 390 codes.add(p.getCode()); 391 } 392 old = p; 393 } 394 logln(line + "\t" + codes.toString()); 395 } 396 } 397 TestPluralCanonicals()398 public void TestPluralCanonicals() { 399 Relation<String, String> data = Relation.of( 400 new LinkedHashMap<String, Set<String>>(), TreeSet.class); 401 for (String locale : factory.getAvailable()) { 402 if (locale.contains("_")) { 403 continue; 404 } 405 PluralInfo info = supplemental.getPlurals(PluralType.cardinal, 406 locale); 407 Set<String> keywords = info.getCanonicalKeywords(); 408 data.put(keywords.toString(), locale); 409 } 410 for (Entry<String, Set<String>> entry : data.keyValuesSet()) { 411 logln(entry.getKey() + "\t" + entry.getValue()); 412 } 413 } 414 TestPluralPaths()415 public void TestPluralPaths() { 416 // do the following line once, when the file is opened 417 Set<String> filePaths = pathHeaderFactory.pathsForFile(english); 418 419 // check that English doesn't contain few or many 420 verifyContains(PageId.Duration, filePaths, "few", false); 421 verifyContains(PageId.C_NAmerica, filePaths, "many", false); 422 verifyContains(PageId.C_SAmerica, filePaths, "many", false); 423 verifyContains(PageId.C_NWEurope, filePaths, "many", false); 424 verifyContains(PageId.C_SEEurope, filePaths, "many", false); 425 verifyContains(PageId.C_NAfrica, filePaths, "many", false); 426 verifyContains(PageId.C_WAfrica, filePaths, "many", false); 427 verifyContains(PageId.C_SAfrica, filePaths, "many", false); 428 verifyContains(PageId.C_EAfrica, filePaths, "many", false); 429 verifyContains(PageId.C_CAsia, filePaths, "many", false); 430 verifyContains(PageId.C_WAsia, filePaths, "many", false); 431 verifyContains(PageId.C_SEAsia, filePaths, "many", false); 432 verifyContains(PageId.C_Oceania, filePaths, "many", false); 433 verifyContains(PageId.C_Unknown, filePaths, "many", false); 434 435 // check that Arabic does contain few and many 436 filePaths = pathHeaderFactory.pathsForFile(info.getCLDRFile("ar", true)); 437 438 verifyContains(PageId.Duration, filePaths, "few", true); 439 verifyContains(PageId.C_NAmerica, filePaths, "many", true); 440 verifyContains(PageId.C_SAmerica, filePaths, "many", true); 441 verifyContains(PageId.C_NWEurope, filePaths, "many", true); 442 verifyContains(PageId.C_SEEurope, filePaths, "many", true); 443 verifyContains(PageId.C_NAfrica, filePaths, "many", true); 444 verifyContains(PageId.C_WAfrica, filePaths, "many", true); 445 verifyContains(PageId.C_SAfrica, filePaths, "many", true); 446 verifyContains(PageId.C_EAfrica, filePaths, "many", true); 447 verifyContains(PageId.C_CAsia, filePaths, "many", true); 448 verifyContains(PageId.C_WAsia, filePaths, "many", true); 449 verifyContains(PageId.C_SEAsia, filePaths, "many", true); 450 verifyContains(PageId.C_Oceania, filePaths, "many", true); 451 verifyContains(PageId.C_Unknown, filePaths, "many", true); 452 } 453 TestCoverage()454 public void TestCoverage() { 455 Map<Row.R2<SectionId, PageId>, Counter<Level>> data = new TreeMap<>(); 456 CLDRFile cldrFile = english; 457 for (String path : cldrFile.fullIterable()) { 458 if (supplemental.isDeprecated(DtdType.ldml, path)) { 459 errln("Deprecated path in English: " + path); 460 continue; 461 } 462 Level level = supplemental.getCoverageLevel(path, 463 cldrFile.getLocaleID()); 464 PathHeader p = pathHeaderFactory.fromPath(path); 465 SurveyToolStatus status = p.getSurveyToolStatus(); 466 467 boolean hideCoverage = level == Level.OPTIONAL; 468 boolean hidePathHeader = status == SurveyToolStatus.DEPRECATED 469 || status == SurveyToolStatus.HIDE; 470 if (hidePathHeader != hideCoverage) { 471 String message = "PathHeader: " + status + ", Coverage: " 472 + level + ": " + path; 473 if (hidePathHeader && !hideCoverage) { 474 errln(message); 475 } else if (!hidePathHeader && hideCoverage) { 476 logln(message); 477 } 478 } 479 final R2<SectionId, PageId> key = Row.of(p.getSectionId(), 480 p.getPageId()); 481 Counter<Level> counter = data.get(key); 482 if (counter == null) { 483 data.put(key, counter = new Counter<>()); 484 } 485 counter.add(level, 1); 486 } 487 StringBuffer b = new StringBuffer("\t"); 488 for (Level level : Level.values()) { 489 b.append("\t" + level); 490 } 491 logln(b.toString()); 492 for (Entry<R2<SectionId, PageId>, Counter<Level>> entry : data 493 .entrySet()) { 494 b.setLength(0); 495 b.append(entry.getKey().get0() + "\t" + entry.getKey().get1()); 496 Counter<Level> counter = entry.getValue(); 497 long total = 0; 498 for (Level level : Level.values()) { 499 total += counter.getCount(level); 500 b.append("\t" + total); 501 } 502 logln(b.toString()); 503 } 504 } 505 Test00AFile()506 public void Test00AFile() { 507 final String localeId = "en"; 508 Counter<Level> counter = new Counter<>(); 509 Map<String, PathHeader> uniqueness = new HashMap<>(); 510 Set<String> alreadySeen = new HashSet<>(); 511 check(localeId, true, uniqueness, alreadySeen); 512 // check paths 513 for (Entry<SectionId, Set<PageId>> sectionAndPages : PathHeader.Factory 514 .getSectionIdsToPageIds().keyValuesSet()) { 515 final SectionId section = sectionAndPages.getKey(); 516 if (section == SectionId.Supplemental || section == SectionId.BCP47) { 517 continue; 518 } 519 logln(section.toString()); 520 for (PageId page : sectionAndPages.getValue()) { 521 final Set<String> cachedPaths = PathHeader.Factory 522 .getCachedPaths(section, page); 523 if (cachedPaths == null) { 524 if (!badZonePages.contains(page) && page != PageId.Unknown) { 525 errln("Null pages for: " + section + "\t" + page); 526 } 527 } else if (section == SectionId.Special 528 && page == PageId.Unknown) { 529 // skip 530 } else if (section == SectionId.Timezones 531 && page == PageId.UnknownT) { 532 // skip 533 } else if (section == SectionId.Misc 534 && page == PageId.Transforms) { 535 // skip 536 } else { 537 538 int count2 = cachedPaths.size(); 539 if (count2 == 0) { 540 warnln("Missing pages for: " + section + "\t" + page); 541 } else { 542 counter.clear(); 543 for (String s : cachedPaths) { 544 Level coverage = supplemental.getCoverageLevel(s, 545 localeId); 546 counter.add(coverage, 1); 547 } 548 String countString = ""; 549 int total = 0; 550 for (Level item : Level.values()) { 551 long count = counter.get(item); 552 if (count != 0) { 553 if (!countString.isEmpty()) { 554 countString += ",\t+"; 555 } 556 total += count; 557 countString += item + "=" + total; 558 } 559 } 560 logln("\t" + page + "\t" + countString); 561 if (page.toString().startsWith("Unknown")) { 562 logln("\t\t" + cachedPaths); 563 } 564 } 565 } 566 } 567 } 568 } 569 TestMetazones()570 public void TestMetazones() { 571 572 CLDRFile nativeFile = info.getEnglish(); 573 Set<PathHeader> pathHeaders = getPathHeaders(nativeFile); 574 // String oldPage = ""; 575 String oldHeader = ""; 576 for (PathHeader entry : pathHeaders) { 577 final String page = entry.getPage(); 578 // if (!oldPage.equals(page)) { 579 // logln(page); 580 // oldPage = page; 581 // } 582 String header = entry.getHeader(); 583 if (!oldHeader.equals(header)) { 584 logln(page + "\t" + header); 585 oldHeader = header; 586 } 587 } 588 } 589 getPathHeaders(CLDRFile nativeFile)590 public Set<PathHeader> getPathHeaders(CLDRFile nativeFile) { 591 Set<PathHeader> pathHeaders = new TreeSet<>(); 592 for (String path : nativeFile.fullIterable()) { 593 PathHeader p = pathHeaderFactory.fromPath(path); 594 pathHeaders.add(p); 595 } 596 return pathHeaders; 597 } 598 verifyContains(PageId pageId, Set<String> filePaths, String substring, boolean contains)599 public void verifyContains(PageId pageId, Set<String> filePaths, 600 String substring, boolean contains) { 601 String path; 602 path = findOneContaining(allPaths(pageId, filePaths), substring); 603 if (contains) { 604 if (path == null) { 605 errln("No path contains <" + substring + ">"); 606 } 607 } else { 608 if (path != null) { 609 errln("Path contains <" + substring + ">\t" + path); 610 } 611 } 612 } 613 findOneContaining(Collection<String> allPaths, String substring)614 private String findOneContaining(Collection<String> allPaths, 615 String substring) { 616 for (String path : allPaths) { 617 if (path.contains(substring)) { 618 return path; 619 } 620 } 621 return null; 622 } 623 allPaths(PageId pageId, Set<String> filePaths)624 public Set<String> allPaths(PageId pageId, Set<String> filePaths) { 625 Set<String> result = PathHeader.Factory.getCachedPaths( 626 pageId.getSectionId(), pageId); 627 result.retainAll(filePaths); 628 return result; 629 } 630 TestUniqueness()631 public void TestUniqueness() { 632 Factory factory2 = CLDRConfig.getInstance().getMainAndAnnotationsFactory(); 633 Set<String> source = factory2.getAvailable(); 634 for (String file : getFilesToTest(source, MIN_LOCALES)) { 635 CLDRFile nativeFile = factory2.make(file,true); 636 Map<PathHeader, String> headerToPath = new HashMap<>(); 637 Map<String, String> headerVisibleToPath = new HashMap<>(); 638 for (String path : nativeFile.fullIterable()) { 639 PathHeader p = pathHeaderFactory.fromPath(path); 640 if (p.getSectionId() == SectionId.Special) { 641 continue; 642 } 643 String old = headerToPath.get(p); 644 if (old == null) { 645 headerToPath.put(p, path); 646 } else if (!old.equals(path)) { 647 if (true) { // for debugging 648 pathHeaderFactory.clearCache(); 649 List<String> failuresOld = new ArrayList<>(); 650 pathHeaderFactory.fromPath(old, failuresOld); 651 List<String> failuresPath = new ArrayList<>(); 652 pathHeaderFactory.fromPath(path, failuresPath); 653 } 654 errln(file + " collision with path " + p + "\t" + old + "\t" + path); 655 } 656 final String visible = p.toString(); 657 old = headerVisibleToPath.get(visible); 658 if (old == null) { 659 headerVisibleToPath.put(visible, path); 660 } else if (!old.equals(path)) { 661 errln("Collision with path " + visible + "\t" + old + "\t" 662 + path); 663 } 664 } 665 } 666 } 667 TestStatus()668 public void TestStatus() { 669 CLDRFile nativeFile = info.getEnglish(); 670 PathStarrer starrer = new PathStarrer(); 671 EnumMap<SurveyToolStatus, Relation<String, String>> info2 = new EnumMap<>( 672 SurveyToolStatus.class); 673 Set<String> nuked = new HashSet<>(); 674 Set<String> deprecatedStar = new HashSet<>(); 675 Set<String> differentStar = new HashSet<>(); 676 677 for (String path : nativeFile.fullIterable()) { 678 679 PathHeader p = pathHeaderFactory.fromPath(path); 680 final SurveyToolStatus surveyToolStatus = p.getSurveyToolStatus(); 681 682 if (p.getSectionId() == SectionId.Special 683 && surveyToolStatus == SurveyToolStatus.READ_WRITE) { 684 errln("SurveyToolStatus should not be " + surveyToolStatus 685 + ": " + p); 686 } 687 688 final SurveyToolStatus tempSTS = surveyToolStatus == SurveyToolStatus.DEPRECATED ? SurveyToolStatus.HIDE 689 : surveyToolStatus; 690 String starred = starrer.set(path); 691 List<String> attr = starrer.getAttributes(); 692 if (surveyToolStatus != SurveyToolStatus.READ_WRITE) { 693 nuked.add(starred); 694 } 695 696 // check against old 697 SurveyToolStatus oldStatus = SurveyToolStatus.READ_WRITE; 698 699 if (tempSTS != oldStatus 700 && oldStatus != SurveyToolStatus.READ_WRITE 701 && !path.endsWith(APPEND_TIMEZONE_END)) { 702 if (!differentStar.contains(starred)) { 703 errln("Different from old:\t" + oldStatus + "\tnew:\t" 704 + surveyToolStatus + "\t" + path); 705 differentStar.add(starred); 706 } 707 } 708 709 // check against deprecated 710 boolean isDeprecated = supplemental.isDeprecated(DtdType.ldml, path); 711 if (isDeprecated != (surveyToolStatus == SurveyToolStatus.DEPRECATED)) { 712 if (!deprecatedStar.contains(starred)) { 713 errln("Different from DtdData deprecated:\t" 714 + isDeprecated + "\t" + surveyToolStatus + "\t" 715 + path); 716 deprecatedStar.add(starred); 717 } 718 } 719 720 Relation<String, String> data = info2.get(surveyToolStatus); 721 if (data == null) { 722 info2.put( 723 surveyToolStatus, 724 data = Relation.of(new TreeMap<String, Set<String>>(), 725 TreeSet.class)); 726 } 727 data.put(starred, Joiner.on("|").join(attr)); 728 } 729 for (Entry<SurveyToolStatus, Relation<String, String>> entry : info2 730 .entrySet()) { 731 final SurveyToolStatus status = entry.getKey(); 732 for (Entry<String, Set<String>> item : entry.getValue() 733 .keyValuesSet()) { 734 final String starred = item.getKey(); 735 if (status == SurveyToolStatus.READ_WRITE 736 && !nuked.contains(starred)) { 737 continue; 738 } 739 logln(status + "\t" + starred + "\t" + item.getValue()); 740 } 741 } 742 } 743 TestPathsNotInEnglish()744 public void TestPathsNotInEnglish() { 745 Set<String> englishPaths = new HashSet<>(); 746 for (String path : english.fullIterable()) { 747 englishPaths.add(path); 748 } 749 Set<String> alreadySeen = new HashSet<>(englishPaths); 750 751 for (String locale : factory.getAvailable()) { 752 CLDRFile nativeFile = info.getCLDRFile(locale, false); 753 CoverageLevel2 coverageLevel2 = null; 754 for (String path : nativeFile.fullIterable()) { 755 if (alreadySeen.contains(path) || path.contains("@count")) { 756 continue; 757 } 758 if (coverageLevel2 == null) { 759 coverageLevel2 = CoverageLevel2.getInstance(locale); 760 } 761 Level level = coverageLevel2.getLevel(path); 762 if (Level.COMPREHENSIVE.compareTo(level) < 0) { 763 continue; 764 } 765 logln("Path not in English\t" + locale + "\t" + path); 766 alreadySeen.add(path); 767 } 768 } 769 } 770 TestPathDescriptionCompleteness()771 public void TestPathDescriptionCompleteness() { 772 PathDescription pathDescription = new PathDescription(supplemental, 773 english, null, null, PathDescription.ErrorHandling.CONTINUE); 774 Matcher normal = PatternCache.get( 775 "http://cldr.org/translation/[-a-zA-Z0-9_]").matcher(""); 776 // http://cldr.unicode.org/translation/plurals#TOC-Minimal-Pairs 777 Set<String> alreadySeen = new HashSet<>(); 778 PathStarrer starrer = new PathStarrer(); 779 780 checkPathDescriptionCompleteness(pathDescription, normal, 781 "//ldml/numbers/defaultNumberingSystem", alreadySeen, starrer); 782 for (PathHeader pathHeader : getPathHeaders(english)) { 783 final SurveyToolStatus surveyToolStatus = pathHeader 784 .getSurveyToolStatus(); 785 if (surveyToolStatus == SurveyToolStatus.DEPRECATED 786 || surveyToolStatus == SurveyToolStatus.HIDE) { 787 continue; 788 } 789 String path = pathHeader.getOriginalPath(); 790 checkPathDescriptionCompleteness(pathDescription, normal, path, 791 alreadySeen, starrer); 792 } 793 } 794 checkPathDescriptionCompleteness( PathDescription pathDescription, Matcher normal, String path, Set<String> alreadySeen, PathStarrer starrer)795 public void checkPathDescriptionCompleteness( 796 PathDescription pathDescription, Matcher normal, String path, 797 Set<String> alreadySeen, PathStarrer starrer) { 798 String value = english.getStringValue(path); 799 String description = pathDescription.getDescription(path, value, null, 800 null); 801 String starred = starrer.set(path); 802 if (alreadySeen.contains(starred)) { 803 return; 804 } else if (description == null) { 805 errln("Path has no description:\t" + value + "\t" + path); 806 } else if (!description.contains("http://")) { 807 errln("Description has no URL:\t" + description + "\t" + value 808 + "\t" + path); 809 } else if (!normal.reset(description).find()) { 810 errln("Description has generic URL, fix to be specific:\t" 811 + description + "\t" + value + "\t" + path); 812 } else if (description == PathDescription.MISSING_DESCRIPTION) { 813 errln("Fallback Description:\t" + value + "\t" + path); 814 } else { 815 return; 816 } 817 // Add if we had a problem, keeping us from being overwhelmed with 818 // errors. 819 alreadySeen.add(starred); 820 } 821 TestTerritoryOrder()822 public void TestTerritoryOrder() { 823 final Set<String> goodAvailableCodes = CLDRConfig.getInstance() 824 .getStandardCodes().getGoodAvailableCodes("territory"); 825 Set<String> results = showContained("001", 0, new HashSet<>( 826 goodAvailableCodes)); 827 results.remove("ZZ"); 828 for (String territory : results) { 829 String sub = Containment.getSubcontinent(territory); 830 String cont = Containment.getContinent(territory); 831 errln("Missing\t" + getNameAndOrder(territory) + "\t" 832 + getNameAndOrder(sub) + "\t" + getNameAndOrder(cont)); 833 } 834 } 835 showContained(String territory, int level, Set<String> soFar)836 private Set<String> showContained(String territory, int level, 837 Set<String> soFar) { 838 if (!soFar.contains(territory)) { 839 return soFar; 840 } 841 soFar.remove(territory); 842 Set<String> contained = supplemental.getContained(territory); 843 if (contained == null) { 844 return soFar; 845 } 846 for (String containedItem : contained) { 847 logln(level + "\t" + getNameAndOrder(territory) + "\t" 848 + getNameAndOrder(containedItem)); 849 } 850 for (String containedItem : contained) { 851 showContained(containedItem, level + 1, soFar); 852 } 853 return soFar; 854 } 855 getNameAndOrder(String territory)856 private String getNameAndOrder(String territory) { 857 return territory + "\t" 858 + english.getName(CLDRFile.TERRITORY_NAME, territory) + "\t" 859 + Containment.getOrder(territory); 860 } 861 TestZCompleteness()862 public void TestZCompleteness() { 863 Map<String, PathHeader> uniqueness = new HashMap<>(); 864 Set<String> alreadySeen = new HashSet<>(); 865 LanguageTagParser ltp = new LanguageTagParser(); 866 int count = 0; 867 for (String locale : factory.getAvailable()) { 868 if (!ltp.set(locale).getRegion().isEmpty()) { 869 continue; 870 } 871 check(locale, false, uniqueness, alreadySeen); 872 ++count; 873 } 874 logln("Count:\t" + count); 875 } 876 check(String localeID, boolean resolved, Map<String, PathHeader> uniqueness, Set<String> alreadySeen)877 public void check(String localeID, boolean resolved, 878 Map<String, PathHeader> uniqueness, Set<String> alreadySeen) { 879 CLDRFile nativeFile = info.getCLDRFile(localeID, resolved); 880 int count = 0; 881 for (String path : nativeFile) { 882 if (alreadySeen.contains(path)) { 883 continue; 884 } 885 alreadySeen.add(path); 886 final PathHeader pathHeader = pathHeaderFactory.fromPath(path); 887 ++count; 888 if (pathHeader == null) { 889 errln("Null pathheader for " + path); 890 } else { 891 String visible = pathHeader.toString(); 892 PathHeader old = uniqueness.get(visible); 893 if (pathHeader.getSectionId() == SectionId.Timezones) { 894 final PageId pageId = pathHeader.getPageId(); 895 if (badZonePages.contains(pageId) 896 && !pathHeader.getCode().equals("Unknown")) { 897 String msg = "Bad page ID:\t" + pageId + "\t" + pathHeader + "\t" + path; 898 if (!logKnownIssue("cldrbug:7802", "ICU/CLDR time zone data sync problem - " + msg)) { 899 errln("Bad page ID:\t" + pageId + "\t" + pathHeader 900 + "\t" + path); 901 } 902 } 903 } 904 if (old == null) { 905 if (pathHeader.getSection().equals("Special")) { 906 if (pathHeader.getSection().equals("Unknown")) { 907 errln("PathHeader has fallback: " + visible + "\t" 908 + pathHeader.getOriginalPath()); 909 // } else { 910 // logln("Special:\t" + visible + "\t" + 911 // pathHeader.getOriginalPath()); 912 } 913 } 914 uniqueness.put(visible, pathHeader); 915 } else if (!old.equals(pathHeader)) { 916 if (pathHeader.getSectionId() == SectionId.Special) { 917 logln("Special PathHeader not unique: " + visible 918 + "\t" + pathHeader.getOriginalPath() + "\t" 919 + old.getOriginalPath()); 920 } else { 921 errln("PathHeader not unique: " + visible + "\t" 922 + pathHeader.getOriginalPath() + "\t" 923 + old.getOriginalPath()); 924 } 925 } 926 } 927 } 928 logln(localeID + "\t" + count); 929 } 930 TestContainment()931 public void TestContainment() { 932 Map<String, Map<String, String>> metazoneToRegionToZone = supplemental 933 .getMetazoneToRegionToZone(); 934 Map<String, String> metazoneToContinent = supplemental 935 .getMetazoneToContinentMap(); 936 for (String metazone : metazoneToRegionToZone.keySet()) { 937 Map<String, String> regionToZone = metazoneToRegionToZone 938 .get(metazone); 939 String worldZone = regionToZone.get("001"); 940 String territory = Containment.getRegionFromZone(worldZone); 941 if (territory == null) { 942 territory = "ZZ"; 943 } 944 String cont = Containment.getContinent(territory); 945 int order = Containment.getOrder(territory); 946 String sub = Containment.getSubcontinent(territory); 947 String revision = PathHeader.getMetazonePageTerritory(metazone); 948 String continent = metazoneToContinent.get(metazone); 949 if (continent == null) { 950 continent = "UnknownT"; 951 } 952 // Russia, Antarctica => territory 953 // in Australasia, Asia, S. America => subcontinent 954 // in N. America => N. America (grouping of 3 subcontinents) 955 // in everything else => continent 956 957 if (territory.equals("RU")) { 958 assertEquals("Russia special case", "RU", revision); 959 } else if (territory.equals("US")) { 960 assertEquals("N. America special case", "003", revision); 961 } else if (territory.equals("BR")) { 962 assertEquals("S. America special case", "005", revision); 963 } 964 if (isVerbose()) { 965 String name = english.getName(CLDRFile.TERRITORY_NAME, cont); 966 String name2 = english.getName(CLDRFile.TERRITORY_NAME, sub); 967 String name3 = english.getName(CLDRFile.TERRITORY_NAME, 968 territory); 969 String name4 = english.getName(CLDRFile.TERRITORY_NAME, 970 revision); 971 972 logln(metazone + "\t" + continent + "\t" + name + "\t" + name2 973 + "\t" + name3 + "\t" + order + "\t" + name4); 974 } 975 } 976 } 977 TestZ()978 public void TestZ() { 979 PathStarrer pathStarrer = new PathStarrer(); 980 pathStarrer.setSubstitutionPattern("%A"); 981 982 Set<PathHeader> sorted = new TreeSet<>(); 983 Map<String, String> missing = new TreeMap<>(); 984 Map<String, String> skipped = new TreeMap<>(); 985 Map<String, String> collide = new TreeMap<>(); 986 987 logln("Traversing Paths"); 988 for (String path : english) { 989 PathHeader pathHeader = pathHeaderFactory.fromPath(path); 990 String value = english.getStringValue(path); 991 if (pathHeader == null) { 992 final String starred = pathStarrer.set(path); 993 missing.put(starred, value + "\t" + path); 994 continue; 995 } 996 if (pathHeader.getSection().equalsIgnoreCase("skip")) { 997 final String starred = pathStarrer.set(path); 998 skipped.put(starred, value + "\t" + path); 999 continue; 1000 } 1001 sorted.add(pathHeader); 1002 } 1003 logln("\nConverted:\t" + sorted.size()); 1004 String lastHeader = ""; 1005 String lastPage = ""; 1006 String lastSection = ""; 1007 List<String> threeLevel = new ArrayList<>(); 1008 Status status = new Status(); 1009 CoverageLevel2 coverageLevel2 = CoverageLevel2.getInstance("en"); 1010 1011 for (PathHeader pathHeader : sorted) { 1012 String original = pathHeader.getOriginalPath(); 1013 if (!original.equals(status.pathWhereFound)) { 1014 continue; 1015 } 1016 if (!lastSection.equals(pathHeader.getSection())) { 1017 logln(""); 1018 threeLevel.add(pathHeader.getSection()); 1019 threeLevel.add("\t" + pathHeader.getPage()); 1020 threeLevel.add("\t\t" + pathHeader.getHeader()); 1021 lastSection = pathHeader.getSection(); 1022 lastPage = pathHeader.getPage(); 1023 lastHeader = pathHeader.getHeader(); 1024 } else if (!lastPage.equals(pathHeader.getPage())) { 1025 logln(""); 1026 threeLevel.add("\t" + pathHeader.getPage()); 1027 threeLevel.add("\t\t" + pathHeader.getHeader()); 1028 lastPage = pathHeader.getPage(); 1029 lastHeader = pathHeader.getHeader(); 1030 } else if (!lastHeader.equals(pathHeader.getHeader())) { 1031 logln(""); 1032 threeLevel.add("\t\t" + pathHeader.getHeader()); 1033 lastHeader = pathHeader.getHeader(); 1034 } 1035 logln(pathHeader + "\t" + coverageLevel2.getLevel(original) + "\t" 1036 + english.getStringValue(pathHeader.getOriginalPath()) 1037 + "\t" + pathHeader.getOriginalPath()); 1038 } 1039 if (collide.size() != 0) { 1040 errln("\nCollide:\t" + collide.size()); 1041 for (Entry<String, String> item : collide.entrySet()) { 1042 errln("\t" + item); 1043 } 1044 } 1045 if (missing.size() != 0) { 1046 errln("\nMissing:\t" + missing.size()); 1047 for (Entry<String, String> item : missing.entrySet()) { 1048 errln("\t" + item.getKey() + "\tvalue:\t" + item.getValue()); 1049 } 1050 } 1051 if (skipped.size() != 0) { 1052 errln("\nSkipped:\t" + skipped.size()); 1053 for (Entry<String, String> item : skipped.entrySet()) { 1054 errln("\t" + item); 1055 } 1056 } 1057 Counter<PathHeader.Factory.CounterData> counterData = pathHeaderFactory 1058 .getInternalCounter(); 1059 logln("\nInternal Counter:\t" + counterData.size()); 1060 for (PathHeader.Factory.CounterData item : counterData.keySet()) { 1061 logln("\t" + counterData.getCount(item) + "\t" + item.get2() // externals 1062 + "\t" + item.get3() + "\t" + item.get0() // internals 1063 + "\t" + item.get1()); 1064 } 1065 logln("\nMenus/Headers:\t" + threeLevel.size()); 1066 for (String item : threeLevel) { 1067 logln(item); 1068 } 1069 LinkedHashMap<String, Set<String>> sectionsToPages = org.unicode.cldr.util.PathHeader.Factory 1070 .getSectionsToPages(); 1071 logln("\nMenus:\t" + sectionsToPages.size()); 1072 for (Entry<String, Set<String>> item : sectionsToPages.entrySet()) { 1073 final String section = item.getKey(); 1074 for (String page : item.getValue()) { 1075 logln("\t" + section + "\t" + page); 1076 int count = 0; 1077 for (String path : pathHeaderFactory.filterCldr(section, page, 1078 english)) { 1079 count += 1; // just count them. 1080 } 1081 logln("\t" + count); 1082 } 1083 } 1084 } 1085 1086 public static final Set<String> GERMAN_UNIT_ORDER = ImmutableSet.of( 1087 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]", 1088 "//ldml/units/unitLength[@type=\"short\"]/compoundUnit[@type=\"power2\"]", 1089 "//ldml/units/unitLength[@type=\"narrow\"]/compoundUnit[@type=\"power2\"]", 1090 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]", 1091 "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]", 1092 "//ldml/units/unitLength[@type=\"narrrow\"]/unit[@type=\"volume-liter\"]", 1093 "//ldml/numbers/minimalPairs/caseMinimalPairs", 1094 "//ldml/numbers/minimalPairs/genderMinimalPairs" 1095 ); TestOrder()1096 public void TestOrder() { 1097 String[] paths = { 1098 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"narrow\"]/dayPeriod[@type=\"noon\"]", 1099 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"narrow\"]/dayPeriod[@type=\"afternoon1\"]", 1100 }; 1101 PathHeader pathHeaderLast = null; 1102 for (String path : paths) { 1103 PathHeader pathHeader = pathHeaderFactory.fromPath(path); 1104 if (pathHeaderLast != null) { 1105 assertRelation("ordering", true, pathHeaderLast, LEQ, pathHeader); 1106 } 1107 pathHeaderLast = pathHeader; 1108 } 1109 CLDRFile german = factory.make("de", true); 1110 Multimap<PathHeader, String> pathHeaderToPaths = TreeMultimap.create(); 1111 for (String path : german.fullIterable()) { 1112 for (String prefix : GERMAN_UNIT_ORDER) { 1113 if (path.startsWith(prefix)) { 1114 PathHeader pathHeader = pathHeaderFactory.fromPath(path); 1115 pathHeaderToPaths.put(pathHeader, path); 1116 } 1117 } 1118 } 1119 String[] germanExpected = { 1120 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/gender", 1121 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/displayName", 1122 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/perUnitPattern", 1123 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"][@case=\"accusative\"]", 1124 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"][@case=\"dative\"]", 1125 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"][@case=\"genitive\"]", 1126 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"]", 1127 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"][@case=\"accusative\"]", 1128 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"][@case=\"dative\"]", 1129 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"][@case=\"genitive\"]", 1130 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"]", 1131 "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]/displayName", 1132 "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]/perUnitPattern", 1133 "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"]", 1134 "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"]", 1135 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@case=\"accusative\"]", 1136 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"accusative\"]", 1137 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"masculine\"][@case=\"accusative\"]", 1138 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@case=\"dative\"]", 1139 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"dative\"]", 1140 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"masculine\"][@case=\"dative\"]", 1141 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@case=\"genitive\"]", 1142 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"genitive\"]", 1143 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"masculine\"][@case=\"genitive\"]", 1144 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"]", 1145 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"]", 1146 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"masculine\"]", 1147 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@case=\"accusative\"]", 1148 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"][@case=\"accusative\"]", 1149 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"masculine\"][@case=\"accusative\"]", 1150 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@case=\"dative\"]", 1151 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"][@case=\"dative\"]", 1152 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"masculine\"][@case=\"dative\"]", 1153 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@case=\"genitive\"]", 1154 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"][@case=\"genitive\"]", 1155 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"masculine\"][@case=\"genitive\"]", 1156 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"]", 1157 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"]", 1158 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"masculine\"]", 1159 "//ldml/units/unitLength[@type=\"short\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"]", 1160 "//ldml/units/unitLength[@type=\"short\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"]", 1161 "//ldml/units/unitLength[@type=\"narrow\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"]", 1162 "//ldml/units/unitLength[@type=\"narrow\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"]", 1163 "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"accusative\"]", 1164 "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"dative\"]", 1165 "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"genitive\"]", 1166 "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"nominative\"]", 1167 "//ldml/numbers/minimalPairs/genderMinimalPairs[@gender=\"feminine\"]", 1168 "//ldml/numbers/minimalPairs/genderMinimalPairs[@gender=\"masculine\"]", 1169 "//ldml/numbers/minimalPairs/genderMinimalPairs[@gender=\"neuter\"]", 1170 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1", 1171 "//ldml/units/unitLength[@type=\"short\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1", 1172 "//ldml/units/unitLength[@type=\"narrow\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1"}; 1173 1174 int germanExpectedIndex = 0; 1175 int errorCount = 0; 1176 for (Entry<PathHeader, Collection<String>> entry : pathHeaderToPaths.asMap().entrySet()) { 1177 PathHeader ph = entry.getKey(); 1178 Collection<String> epaths = entry.getValue(); 1179 if (!assertEquals(entry.toString(), 1, epaths.size())) { 1180 ++errorCount; 1181 } 1182 if (!assertEquals("PathHeader order", germanExpected[germanExpectedIndex++], epaths.iterator().next())) { 1183 ++errorCount; 1184 } 1185 } 1186 if (errorCount != 0) { 1187 for (Entry<PathHeader, Collection<String>> entry : pathHeaderToPaths.asMap().entrySet()) { 1188 PathHeader ph = entry.getKey(); 1189 Collection<String> epaths = entry.getValue(); 1190 System.out.println("\"" + epaths.iterator().next().replace("\"", "\\\"") + "\",\t// " + ph); 1191 } 1192 } 1193 } 1194 Test8414()1195 public void Test8414() { 1196 PathDescription pathDescription = new PathDescription(supplemental, 1197 english, null, null, PathDescription.ErrorHandling.CONTINUE); 1198 1199 String prefix = "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\""; 1200 String suffix = "\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"morning1\"]"; 1201 1202 final String path0 = prefix + "format" + suffix; 1203 final String path1 = prefix + "stand-alone" + suffix; 1204 String v0 = english.getStringValue(path0); 1205 String v1 = english.getStringValue(path1); 1206 String p0 = pathDescription.getDescription(path0, v0, null, null); 1207 String p1 = pathDescription.getDescription(path1, v1, null, null); 1208 assertTrue("Check pd for format", p0.contains("in the morning")); 1209 assertTrue("Check pd for stand-alone", !p1.contains("in the morning")); 1210 } 1211 TestCompletenessNonLdmlDtd()1212 public void TestCompletenessNonLdmlDtd() { 1213 PathChecker pathChecker = new PathChecker(); 1214 Set<String> directories = new LinkedHashSet<>(); 1215 Multimap<String, String> pathValuePairs = LinkedListMultimap.create(); 1216 // get all the directories containing non-Ldml dtd files 1217 for (DtdType dtdType : DtdType.values()) { 1218 if (dtdType == DtdType.ldml || dtdType == DtdType.ldmlICU) { 1219 continue; 1220 } 1221 DtdData dtdData = DtdData.getInstance(dtdType); 1222 for (String dir : dtdType.directories) { 1223 if (DEBUG_DTD_TYPE != null && !DEBUG_DTD_TYPE.directories.contains(dir)) { 1224 continue; 1225 } 1226 File dir2 = new File(COMMON_DIR + dir); 1227 logln(dir2.getName()); 1228 for (String file : dir2.list()) { 1229 // don't need to restrict with getFilesToTest(Arrays.asList(dir2.list()), "root", "en")) { 1230 if (!file.endsWith(".xml")) { 1231 continue; 1232 } 1233 if (DEBUG) warnln(" TestCompletenessNonLdmlDtd: " + dir + ", " + file); 1234 logln(" \t" + file); 1235 for (Pair<String, String> pathValue : XMLFileReader.loadPathValues( 1236 dir2 + "/" + file, new ArrayList<Pair<String, String>>(), true)) { 1237 final String path = pathValue.getFirst(); 1238 final String value = pathValue.getSecond(); 1239 // logln("\t\t" + path); 1240 if (path.startsWith("//supplementalData/unitPreferenceData/unitPreferences") 1241 && path.contains("skeleton")) { 1242 int debug = 0; 1243 } 1244 pathChecker.checkPathHeader(dtdData, path); 1245 } 1246 } 1247 } 1248 } 1249 } 1250 1251 private class PathChecker { 1252 PathHeader.Factory phf = pathHeaderFactory; 1253 PathStarrer starrer = new PathStarrer().setSubstitutionPattern("%A"); 1254 1255 Set<String> badHeaders = new TreeSet<>(); 1256 Map<PathHeader, PathHeader> goodHeaders = new HashMap<>(); 1257 Set<PathHeader> seenBad = new HashSet<>(); 1258 { phf.clearCache()1259 phf.clearCache(); 1260 } 1261 checkPathHeader(DtdData dtdData, String rawPath)1262 public void checkPathHeader(DtdData dtdData, String rawPath) { 1263 XPathParts pathPlain = XPathParts.getFrozenInstance(rawPath); 1264 if (dtdData.isMetadata(pathPlain)) { 1265 return; 1266 } 1267 if (dtdData.isDeprecated(pathPlain)) { 1268 return; 1269 } 1270 Multimap<String, String> extras = HashMultimap.create(); 1271 Set<String> fixedPaths = dtdData.getRegularizedPaths(pathPlain, extras); 1272 if (fixedPaths != null) { 1273 for (String fixedPath : fixedPaths) { 1274 checkSubpath(fixedPath); 1275 } 1276 } 1277 for (String path : extras.keySet()) { 1278 checkSubpath(path); 1279 } 1280 } 1281 checkSubpath(String path)1282 public void checkSubpath(String path) { 1283 String message = ": Can't compute path header"; 1284 if (path.contentEquals("//supplementalData/grammaticalData/grammaticalFeatures[@targets=\"nominal\"][@locales=\"it\"]/grammaticalGender/_values") ) { 1285 int debug = 0; 1286 } 1287 PathHeader ph = null; 1288 try { 1289 ph = phf.fromPath(path); 1290 if (seenBad.contains(ph)) { 1291 return; 1292 } 1293 if (ph.getPageId() == PageId.Deprecated) { 1294 return; // don't care 1295 } 1296 if (ph.getPageId() != PageId.Unknown) { 1297 PathHeader old = goodHeaders.put(ph, ph); 1298 if (old != null && !path.equals(old.getOriginalPath())) { 1299 errln("Duplicate path header for: " + ph 1300 + "\n\t\t " + path 1301 + "\n\t\t≠" + old.getOriginalPath()); 1302 seenBad.add(ph); 1303 } 1304 return; 1305 } 1306 // for debugging 1307 phf.clearCache(); 1308 List<String> failures = new ArrayList<>(); 1309 ph = phf.fromPath(path, failures); 1310 message = ": Unknown path header" + failures; 1311 } catch (Exception e) { 1312 message = ": Exception in path header: " + e.getMessage(); 1313 } 1314 String star = starrer.set(path); 1315 if (badHeaders.add(star)) { 1316 errln(star + message + ", " + ph); 1317 System.out.println("\tNo match in PathHeader.txt for " + path 1318 + "\n\tYou get only one message for all paths matching " + star 1319 + "\n\tFor example, check to see if the field in PathHeader.txt is in PathHeader.PageId." 1320 + "\n\tIf not, either correct PathHeader.txt or add it to PageId" 1321 + "\n\tIf you have a value attribute, you will need extra _ characters. The value attribute will show at the end with prefixed _, eg [...]/_skeleton." 1322 + "If there can be a value for the path then that element will add _. " 1323 ); 1324 } 1325 } 1326 } 1327 TestSupplementalItems()1328 public void TestSupplementalItems() { 1329 // <weekOfPreference ordering="weekOfYear weekOfMonth" locales="am az bs cs cy da el et hi ky lt mk sk ta th"/> 1330 // logln(pathHeaderFactory.getRegexInfo()); 1331 CLDRFile supplementalFile = CLDRConfig.getInstance().getSupplementalFactory().make("supplementalData", false); 1332 List<String> failures = new ArrayList<>(); 1333 Multimap<String, String> pathValuePairs = LinkedListMultimap.create(); 1334 for (String test : With.in(supplementalFile.iterator("//supplementalData/weekData"))) { 1335 failures.clear(); 1336 XPathParts parts = XPathParts.getFrozenInstance(supplementalFile.getFullXPath(test)); 1337 supplementalFile.getDtdData().getRegularizedPaths(parts, pathValuePairs); 1338 for (Entry<String, Collection<String>> entry : pathValuePairs.asMap().entrySet()) { 1339 final String normalizedPath = entry.getKey(); 1340 final Collection<String> normalizedValue = entry.getValue(); 1341 PathHeader ph = pathHeaderFactory.fromPath(normalizedPath, failures); 1342 if (ph == null || ph.getSectionId() == SectionId.Special) { 1343 errln("Failure with " + test + " => " + normalizedPath + " = " + normalizedValue); 1344 } else { 1345 logln(ph + "\t" + test + " = " + normalizedValue); 1346 } 1347 } 1348 } 1349 } 1350 test10232()1351 public void test10232() { 1352 String[][] tests = { 1353 { "MMM", "Formats - Flexible - Date Formats" }, 1354 { "dMM", "Formats - Flexible - Date Formats" }, 1355 { "h", "Formats - Flexible - 12 Hour Time Formats" }, 1356 { "hm", "Formats - Flexible - 12 Hour Time Formats" }, 1357 { "Ehm", "Formats - Flexible - 12 Hour Time Formats" }, 1358 { "H", "Formats - Flexible - 24 Hour Time Formats" }, 1359 { "Hm", "Formats - Flexible - 24 Hour Time Formats" }, 1360 { "EHm", "Formats - Flexible - 24 Hour Time Formats" }, 1361 }; 1362 for (String[] test : tests) { 1363 String path = "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"" 1364 + test[0] + "\"]"; 1365 PathHeader pathHeader = pathHeaderFactory.fromPath(path); 1366 assertEquals("flexible formats", test[1] + "|" + test[0], pathHeader.getHeader() + "|" + pathHeader.getCode()); 1367 } 1368 } 1369 1370 // Moved from TestAnnotations and generalized testPathHeaderSize()1371 public void testPathHeaderSize() { 1372 String locale = "ar"; // choose one with lots of plurals 1373 int maxSize = 750; 1374 boolean showTable = false; // only printed if test fails or verbose 1375 1376 Factory factory = CLDRConfig.getInstance().getCommonAndSeedAndMainAndAnnotationsFactory(); 1377 CLDRFile english = factory.make(locale, true); 1378 1379 PathHeader.Factory phf = PathHeader.getFactory(CLDRConfig.getInstance().getEnglish()); 1380 Counter<PageId> counterPageId = new Counter<>(); 1381 Counter<PageId> counterPageIdAll = new Counter<>(); 1382 for (String path : english) { 1383 Level level = CLDRConfig.getInstance().getSupplementalDataInfo().getCoverageLevel(path, locale); 1384 PathHeader ph = phf.fromPath(path); 1385 if (level.compareTo(Level.MODERN) <= 0) { 1386 counterPageId.add(ph.getPageId(), 1); 1387 } 1388 counterPageIdAll.add(ph.getPageId(), 1); 1389 } 1390 Set<R2<Long, PageId>> entrySetSortedByCount = counterPageId.getEntrySetSortedByCount(false, null); 1391 for (R2<Long, PageId> sizeAndPageId : entrySetSortedByCount) { 1392 long size = sizeAndPageId.get0(); 1393 PageId pageId = sizeAndPageId.get1(); 1394 if (!assertTrue(pageId.getSectionId() + "/" + pageId + " size (" + size 1395 + ") < " + maxSize + "?", size < maxSize)) { 1396 showTable = true; 1397 } 1398 // System.out.println(pageId + "\t" + size); 1399 } 1400 if (showTable || isVerbose()) { 1401 for (R2<Long, PageId> sizeAndPageId : entrySetSortedByCount) { 1402 PageId pageId = sizeAndPageId.get1(); 1403 System.out.println(pageId.getSectionId() + "\t" + pageId + "\t" + sizeAndPageId.get0() + "\t" + counterPageIdAll.get(pageId)); 1404 } 1405 } 1406 } TestCLDR_11454()1407 public void TestCLDR_11454() { 1408 PathHeader.Factory phf = PathHeader.getFactory(); 1409 PathHeader century = phf.fromPath("//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"duration-century\"]/displayName"); 1410 PathHeader decade = phf.fromPath("//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"duration-decade\"]/displayName"); 1411 assertEquals("Section", century.getSectionId(), decade.getSectionId()); 1412 assertEquals("Page", century.getPageId(), decade.getPageId()); 1413 } 1414 TestEmojiOrder()1415 public void TestEmojiOrder() { 1416 PathHeader.Factory phf = PathHeader.getFactory(); 1417 String[] desiredOrder = { 1418 "⚕", "⚕", "⚕", 1419 "⚖", "⚖", "⚖"}; 1420 List<PathHeader> pathHeaders = new ArrayList<>(); 1421 for (String emoji : desiredOrder) { 1422 String base = "//ldml/annotations/annotation[@cp=\"" + emoji + "\"]"; 1423 pathHeaders.add(phf.fromPath(base + "[@type=\"tts\"]")); 1424 pathHeaders.add(phf.fromPath(base)); 1425 logln(emoji 1426 + ": getEmojiMinorOrder="+ Emoji.getEmojiMinorOrder(Emoji.getMinorCategory(emoji)) 1427 + ", getEmojiToOrder="+ Emoji.getEmojiToOrder(emoji) 1428 ); 1429 } 1430 PathHeader lastItem = null; 1431 for (PathHeader item : pathHeaders) { 1432 if (lastItem != null) { 1433 assertEquals("Section", lastItem.getSectionId(), item.getSectionId()); 1434 assertEquals("Page", lastItem.getPageId(), item.getPageId()); 1435 assertEquals("Header", lastItem.getHeader(), item.getHeader()); 1436 if (!assertTrue(lastItem + " < " + item, lastItem.compareTo(item) < 0)) { 1437 lastItem.compareTo(item); // for debugging 1438 } 1439 } 1440 lastItem = item; 1441 } 1442 } 1443 TestQuotes()1444 public void TestQuotes() { 1445 // quotes should never appear in result 1446 PathHeader.Factory phf = PathHeader.getFactory(); 1447 String[] tests = { 1448 "//supplementalData/plurals[@type=\"ordinal\"]/pluralRules[@locales=\"ig\"]/pluralRule[@count=\"other\"]", 1449 "//supplementalData/transforms/transform[@source=\"und-Khmr\"][@target=\"und-Latn\"]" 1450 }; 1451 for (String test : tests) { 1452 PathHeader trial = phf.fromPath(test); 1453 assertEquals("No quotes in pathheader", false, trial.toString().contains("\"")); 1454 } 1455 } 1456 } 1457