• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.unittest;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.collect.HashMultimap;
5 import com.google.common.collect.ImmutableSet;
6 import com.google.common.collect.LinkedListMultimap;
7 import com.google.common.collect.Multimap;
8 import com.google.common.collect.TreeMultimap;
9 import com.ibm.icu.impl.Relation;
10 import com.ibm.icu.impl.Row;
11 import com.ibm.icu.impl.Row.R2;
12 import java.io.File;
13 import java.util.ArrayList;
14 import java.util.Arrays;
15 import java.util.Collection;
16 import java.util.EnumMap;
17 import java.util.EnumSet;
18 import java.util.HashMap;
19 import java.util.HashSet;
20 import java.util.LinkedHashMap;
21 import java.util.LinkedHashSet;
22 import java.util.List;
23 import java.util.Map;
24 import java.util.Map.Entry;
25 import java.util.Set;
26 import java.util.TreeMap;
27 import java.util.TreeSet;
28 import java.util.regex.Matcher;
29 import java.util.stream.Collectors;
30 import org.unicode.cldr.test.CoverageLevel2;
31 import org.unicode.cldr.test.ExampleGenerator;
32 import org.unicode.cldr.util.CLDRConfig;
33 import org.unicode.cldr.util.CLDRFile;
34 import org.unicode.cldr.util.CLDRFile.Status;
35 import org.unicode.cldr.util.CLDRLocale;
36 import org.unicode.cldr.util.CLDRPaths;
37 import org.unicode.cldr.util.CLDRURLS;
38 import org.unicode.cldr.util.CldrUtility;
39 import org.unicode.cldr.util.Containment;
40 import org.unicode.cldr.util.Counter;
41 import org.unicode.cldr.util.DtdData;
42 import org.unicode.cldr.util.DtdType;
43 import org.unicode.cldr.util.Emoji;
44 import org.unicode.cldr.util.Factory;
45 import org.unicode.cldr.util.GrammarInfo;
46 import org.unicode.cldr.util.GrammarInfo.CaseValues;
47 import org.unicode.cldr.util.GrammarInfo.GenderValues;
48 import org.unicode.cldr.util.Iso3166Data;
49 import org.unicode.cldr.util.LanguageTagParser;
50 import org.unicode.cldr.util.Level;
51 import org.unicode.cldr.util.Organization;
52 import org.unicode.cldr.util.Pair;
53 import org.unicode.cldr.util.PathDescription;
54 import org.unicode.cldr.util.PathHeader;
55 import org.unicode.cldr.util.PathHeader.PageId;
56 import org.unicode.cldr.util.PathHeader.SectionId;
57 import org.unicode.cldr.util.PathHeader.SurveyToolStatus;
58 import org.unicode.cldr.util.PathHeader.Width;
59 import org.unicode.cldr.util.PathStarrer;
60 import org.unicode.cldr.util.PatternCache;
61 import org.unicode.cldr.util.PatternPlaceholders;
62 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderInfo;
63 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderStatus;
64 import org.unicode.cldr.util.StandardCodes;
65 import org.unicode.cldr.util.SupplementalDataInfo;
66 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
67 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
68 import org.unicode.cldr.util.SupplementalDataInfo.PluralType;
69 import org.unicode.cldr.util.With;
70 import org.unicode.cldr.util.XMLFileReader;
71 import org.unicode.cldr.util.XPathParts;
72 
73 public class TestPathHeader extends TestFmwkPlus {
74     private static final DtdType DEBUG_DTD_TYPE = null; // DtdType.supplementalData;
75     private static final String COMMON_DIR = CLDRPaths.BASE_DIRECTORY + "common/";
76     private static final boolean DEBUG = false;
77 
main(String[] args)78     public static void main(String[] args) {
79         new TestPathHeader().run(args);
80     }
81 
82     static final CLDRConfig info = CLDRConfig.getInstance();
83     static final Factory factory = info.getCommonAndSeedAndMainAndAnnotationsFactory();
84     static final CLDRFile english = factory.make("en", true);
85     static final SupplementalDataInfo supplemental = info.getSupplementalDataInfo();
86     static PathHeader.Factory pathHeaderFactory = PathHeader.getFactory(english);
87     private EnumSet<PageId> badZonePages = EnumSet.of(PageId.UnknownT);
88 
tempTestAnnotation()89     public void tempTestAnnotation() {
90         // NEW:     <annotation cp="��">face | grin</annotation>
91         //          <annotation cp="��" type="tts">grinning face</annotation>
92 
93         final String path1 = "//ldml/annotations/annotation[@cp=\"��\"]";
94         PathHeader ph1 = pathHeaderFactory.fromPath(path1);
95         logln(ph1.toString() + "\t" + path1);
96         final String path2 = "//ldml/annotations/annotation[@cp=\"��\"][@type=\"tts\"]";
97         PathHeader ph2 = pathHeaderFactory.fromPath(path2);
98         logln(ph2.toString() + "\t" + path2);
99         final String path3 = "//ldml/annotations/annotation[@cp=\"��\"]";
100         PathHeader ph3 = pathHeaderFactory.fromPath(path2);
101         logln(ph3.toString() + "\t" + path3);
102 
103         assertNotEquals("pathheader", ph1, ph2);
104         assertNotEquals("pathheader", ph1.toString(), ph2.toString());
105         assertRelation("pathheader", true, ph1, TestFmwkPlus.LEQ, ph3);
106         assertRelation("pathheader", true, ph3, TestFmwkPlus.LEQ, ph2);
107     }
108 
109     static final String[] MIN_LOCALES = {
110         "root", "en", "de", "ru", "ko"
111     }; // choose locales with range of case/gender structures
112 
tempTestCompletenessLdmlDtd()113     public void tempTestCompletenessLdmlDtd() {
114         // List<String> failures = null;
115         pathHeaderFactory.clearCache();
116         PathChecker pathChecker = new PathChecker();
117         for (String directory : DtdType.ldml.directories) {
118             Factory factory2 = CLDRConfig.getInstance().getMainAndAnnotationsFactory();
119             Set<String> source = factory2.getAvailable();
120             for (String file : getFilesToTest(source, MIN_LOCALES)) {
121                 if (DEBUG) warnln(" TestCompletenessLdmlDtd: " + directory + ", " + file);
122                 DtdData dtdData = null;
123                 CLDRFile cldrFile = factory2.make(file, true);
124                 for (String path : cldrFile.fullIterable()) {
125                     pathChecker.checkPathHeader(cldrFile.getDtdData(), path);
126                 }
127             }
128         }
129         Set<String> missing = pathHeaderFactory.getUnmatchedRegexes();
130         if (missing.size() != 0) {
131             for (String e : missing) {
132                 errln("Path Regex never matched:\t" + e);
133             }
134         }
135         if (!pathChecker.badHeaders.isEmpty()) {
136             System.out.println("For help with DTD updates: " + CLDRURLS.CLDR_UPDATINGDTD_URL);
137         }
138     }
139 
getFilesToTest(Collection<String> source, String... doFirst)140     private Collection<String> getFilesToTest(Collection<String> source, String... doFirst) {
141         LinkedHashSet<String> files = new LinkedHashSet<>(Arrays.asList(doFirst));
142         files.retainAll(source); // put first
143         files.addAll(new HashSet<>(source)); // now add others semi-randomly
144         int max = Math.min(30, files.size());
145         if (getInclusion() == 10 || files.size() <= max) {
146             return files;
147         }
148         ArrayList<String> shortFiles = new ArrayList<>(files);
149         if (getInclusion() > 5) {
150             max += (files.size() - 30) * (getInclusion() - 5) / 10; // use proportional amount
151         }
152         return shortFiles.subList(0, max);
153     }
154 
TestCompleteness()155     public void TestCompleteness() {
156         PathHeader.Factory pathHeaderFactory2 = PathHeader.getFactory(english);
157         // List<String> failures = null;
158         pathHeaderFactory2.clearCache();
159         Multimap<PathHeader.PageId, PathHeader.SectionId> pageUniqueness = TreeMultimap.create();
160         Multimap<String, Pair<PathHeader.SectionId, PathHeader.PageId>> headerUniqueness =
161                 TreeMultimap.create();
162         Set<String> toTest;
163         switch (getInclusion()) {
164             default:
165                 toTest = StandardCodes.make().getLocaleCoverageLocales(Organization.cldr);
166                 break;
167             case 10:
168                 toTest = factory.getAvailable();
169                 break;
170         }
171         toTest = ImmutableSet.<String>builder().add("en").addAll(toTest).build();
172         Set<String> seenPaths = new HashSet<>();
173         Set<String> localSeenPaths = new TreeSet<>();
174         for (String locale : toTest) {
175             localSeenPaths.clear();
176             for (String p : factory.make(locale, true).fullIterable()) {
177                 if (p.startsWith("//ldml/identity/")) {
178                     continue;
179                 }
180                 if (seenPaths.contains(p)) {
181                     continue;
182                 }
183                 seenPaths.add(p);
184                 localSeenPaths.add(p);
185                 // if (p.contains("symbol[@alt") && failures == null) {
186                 // PathHeader result = pathHeaderFactory2.fromPath(p, failures = new
187                 // ArrayList<String>());
188                 // logln("Matching " + p + ": " + result + "\t" +
189                 // result.getSurveyToolStatus());
190                 // for (String failure : failures) {
191                 // logln("\t" + failure);
192                 // }
193                 // }
194                 PathHeader ph;
195                 try {
196                     ph = pathHeaderFactory2.fromPath(p);
197                 } catch (Exception e1) {
198                     try {
199                         ph = pathHeaderFactory2.fromPath(p);
200                     } catch (Exception e2) {
201                         throw new IllegalArgumentException(locale + ":\t" + p, e2);
202                     }
203                 }
204                 if (ph == null) {
205                     errln("Failed to create path from: " + p);
206                     continue;
207                 }
208                 final SectionId sectionId = ph.getSectionId();
209                 if (sectionId != SectionId.Special) {
210                     pageUniqueness.put(ph.getPageId(), sectionId);
211                     headerUniqueness.put(ph.getHeader(), new Pair<>(sectionId, ph.getPageId()));
212                 }
213             }
214             if (!localSeenPaths.isEmpty()) {
215                 logln(locale + ": checked " + localSeenPaths.size() + " new paths");
216             }
217         }
218         Set<String> missing = pathHeaderFactory2.getUnmatchedRegexes();
219         if (missing.size() != 0) {
220             for (String e : missing) {
221                 if (e.contains("//ldml/")) {
222                     if (e.contains("//ldml/rbnf/")
223                             || e.contains("//ldml/segmentations/")
224                             || e.contains("//ldml/collations/")) {
225                         continue;
226                     }
227                     logln("Path Regex never matched:\t" + e);
228                 }
229             }
230         }
231 
232         for (Entry<PageId, Collection<SectionId>> e : pageUniqueness.asMap().entrySet()) {
233             Collection<SectionId> values = e.getValue();
234             if (values.size() != 1) {
235                 warnln("Duplicate page in section: " + CldrUtility.toString(e));
236             }
237         }
238 
239         for (Entry<String, Collection<Pair<SectionId, PageId>>> e :
240                 headerUniqueness.asMap().entrySet()) {
241             Collection<Pair<SectionId, PageId>> values = e.getValue();
242             if (values.size() != 1) {
243                 warnln("Duplicate header in (section,page): " + CldrUtility.toString(e));
244             }
245         }
246     }
247 
Test6170()248     public void Test6170() {
249         String p1 =
250                 "//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"speed-kilometer-per-hour\"]/unitPattern[@count=\"other\"]";
251         String p2 =
252                 "//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"area-square-meter\"]/unitPattern[@count=\"other\"]";
253         PathHeader ph1 = pathHeaderFactory.fromPath(p1);
254         PathHeader ph2 = pathHeaderFactory.fromPath(p2);
255         int comp12 = ph1.compareTo(ph2);
256         int comp21 = ph2.compareTo(ph1);
257         assertEquals("comp ph", comp12, -comp21);
258     }
259 
TestVariant()260     public void TestVariant() {
261         PathHeader p1 =
262                 pathHeaderFactory.fromPath(
263                         "//ldml/localeDisplayNames/languages/language[@type=\"ug\"][@alt=\"variant\"]");
264         PathHeader p2 =
265                 pathHeaderFactory.fromPath(
266                         "//ldml/localeDisplayNames/languages/language[@type=\"ug\"]");
267         assertNotEquals("variants", p1, p2);
268         assertNotEquals("variants", p1.toString(), p2.toString());
269         // Code Lists Languages Arabic Script ug-variant
270     }
271 
Test4587()272     public void Test4587() {
273         String test =
274                 "//ldml/dates/timeZoneNames/metazone[@type=\"Pacific/Wallis\"]/short/standard";
275         PathHeader ph = pathHeaderFactory.fromPath(test);
276         if (ph == null) {
277             errln("Failure with " + test);
278         } else {
279             logln(ph + "\t" + test);
280         }
281     }
282 
TestMiscPatterns()283     public void TestMiscPatterns() {
284         String test =
285                 "//ldml/numbers/miscPatterns[@numberSystem=\"arab\"]/pattern[@type=\"atLeast\"]";
286         PathHeader ph = pathHeaderFactory.fromPath(test);
287         assertNotNull("MiscPatterns path not found", ph);
288         if (false) System.out.println(english.getStringValue(test));
289     }
290 
TestPluralOrder()291     public void TestPluralOrder() {
292         Set<PathHeader> sorted = new TreeSet<>();
293         for (String locale : new String[] {"ru", "ar", "ja"}) {
294             sorted.clear();
295             CLDRFile cldrFile = info.getCLDRFile(locale, true);
296             CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(locale);
297             for (String path : cldrFile.fullIterable()) {
298                 if (!path.contains("@count")) {
299                     continue;
300                 }
301                 Level level = coverageLevel.getLevel(path);
302                 if (Level.MODERN.compareTo(level) < 0) {
303                     continue;
304                 }
305                 PathHeader p = pathHeaderFactory.fromPath(path);
306                 sorted.add(p);
307             }
308             for (PathHeader p : sorted) {
309                 logln(locale + "\t" + p + "\t" + p.getOriginalPath());
310             }
311         }
312     }
313 
314     static final String APPEND_TIMEZONE =
315             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/appendItems/appendItem[@request=\"Timezone\"]";
316     static final String APPEND_TIMEZONE_END =
317             "/dateTimeFormats/appendItems/appendItem[@request=\"Timezone\"]";
318     static final String BEFORE_PH =
319             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"ms\"]";
320     static final String AFTER_PH =
321             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"d\"]/greatestDifference[@id=\"d\"]";
322 
TestAppendTimezone()323     public void TestAppendTimezone() {
324         CLDRFile cldrFile = info.getEnglish();
325         CoverageLevel2 coverageLevel = CoverageLevel2.getInstance("en");
326         assertEquals(
327                 "appendItem:Timezone", Level.MODERATE, coverageLevel.getLevel(APPEND_TIMEZONE));
328 
329         PathHeader ph = pathHeaderFactory.fromPath(APPEND_TIMEZONE);
330         assertEquals("appendItem:Timezone pathheader", "Timezone", ph.getCode());
331         // check that they are in the right place (they weren't before!)
332         PathHeader phBefore = pathHeaderFactory.fromPath(BEFORE_PH);
333         PathHeader phAfter = pathHeaderFactory.fromPath(AFTER_PH);
334         assertTrue(phBefore, LEQ, ph);
335         assertTrue(ph, LEQ, phAfter);
336 
337         PathDescription pathDescription =
338                 new PathDescription(
339                         supplemental, english, null, null, PathDescription.ErrorHandling.CONTINUE);
340         String description = pathDescription.getDescription(APPEND_TIMEZONE, "tempvalue", null);
341         assertTrue("appendItem:Timezone pathDescription", description.contains("“Timezone”"));
342 
343         PatternPlaceholders patternPlaceholders = PatternPlaceholders.getInstance();
344         PlaceholderStatus status = patternPlaceholders.getStatus(APPEND_TIMEZONE);
345         assertEquals("appendItem:Timezone placeholders", PlaceholderStatus.REQUIRED, status);
346 
347         Map<String, PlaceholderInfo> placeholderInfo = patternPlaceholders.get(APPEND_TIMEZONE);
348         PlaceholderInfo placeholderInfo2 = placeholderInfo.get("{1}");
349         if (assertNotNull("appendItem:Timezone placeholders", placeholderInfo2)) {
350             assertEquals(
351                     "appendItem:Timezone placeholders",
352                     "APPEND_FIELD_FORMAT",
353                     placeholderInfo2.name);
354             assertEquals(
355                     "appendItem:Timezone placeholders", "Pacific Time", placeholderInfo2.example);
356         }
357         ExampleGenerator eg = new ExampleGenerator(cldrFile, cldrFile);
358         String example =
359                 eg.getExampleHtml(APPEND_TIMEZONE, cldrFile.getStringValue(APPEND_TIMEZONE));
360         String result = ExampleGenerator.simplify(example, false);
361         assertEquals("", "〖❬6:25:59 PM❭ ❬GMT❭〗", result);
362     }
363 
TestOptional()364     public void TestOptional() {
365         if (true) return;
366         Map<PathHeader, String> sorted = new TreeMap<>();
367         for (String locale : new String[] {"af"}) {
368             sorted.clear();
369             CLDRFile cldrFile = info.getCLDRFile(locale, true);
370             CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(locale);
371             for (String path : cldrFile.fullIterable()) {
372                 Level level = coverageLevel.getLevel(path);
373                 if (supplemental.isDeprecated(DtdType.ldml, path)) {
374                     continue;
375                 }
376 
377                 if (Level.COMPREHENSIVE.compareTo(level) != 0) {
378                     continue;
379                 }
380 
381                 PathHeader ph = pathHeaderFactory.fromPath(path);
382                 if (ph == null || ph.shouldHide()) {
383                     continue;
384                 }
385                 final SurveyToolStatus status = ph.getSurveyToolStatus();
386                 sorted.put(ph, locale + "\t" + status + "\t" + ph + "\t" + ph.getOriginalPath());
387             }
388             Set<String> codes = new LinkedHashSet<>();
389             PathHeader old = null;
390             String line = null;
391             for (Entry<PathHeader, String> s : sorted.entrySet()) {
392                 PathHeader p = s.getKey();
393                 String v = s.getValue();
394                 if (old == null) {
395                     line = v;
396                     codes.add(p.getCode());
397                 } else if (p.getSectionId() == old.getSectionId()
398                         && p.getPageId() == old.getPageId()
399                         && p.getHeader().equals(old.getHeader())) {
400                     codes.add(p.getCode());
401                 } else {
402                     logln(line + "\t" + codes.toString());
403                     codes.clear();
404                     line = v;
405                     codes.add(p.getCode());
406                 }
407                 old = p;
408             }
409             logln(line + "\t" + codes.toString());
410         }
411     }
412 
TestPluralCanonicals()413     public void TestPluralCanonicals() {
414         Relation<String, String> data =
415                 Relation.of(new LinkedHashMap<String, Set<String>>(), TreeSet.class);
416         for (String locale : factory.getAvailable()) {
417             if (locale.contains("_")) {
418                 continue;
419             }
420             PluralInfo info = supplemental.getPlurals(PluralType.cardinal, locale);
421             Set<String> keywords = info.getCanonicalKeywords();
422             data.put(keywords.toString(), locale);
423         }
424         for (Entry<String, Set<String>> entry : data.keyValuesSet()) {
425             logln(entry.getKey() + "\t" + entry.getValue());
426         }
427     }
428 
TestPluralPaths()429     public void TestPluralPaths() {
430         // do the following line once, when the file is opened
431         Set<String> filePaths = pathHeaderFactory.pathsForFile(english);
432 
433         // check that English doesn't contain few or many
434         verifyContains(PageId.Duration, filePaths, "few", false);
435         verifyContains(PageId.C_NAmerica, filePaths, "many", false);
436         verifyContains(PageId.C_SAmerica, filePaths, "many", false);
437         verifyContains(PageId.C_NWEurope, filePaths, "many", false);
438         verifyContains(PageId.C_SEEurope, filePaths, "many", false);
439         verifyContains(PageId.C_NAfrica, filePaths, "many", false);
440         verifyContains(PageId.C_WAfrica, filePaths, "many", false);
441         verifyContains(PageId.C_SAfrica, filePaths, "many", false);
442         verifyContains(PageId.C_EAfrica, filePaths, "many", false);
443         verifyContains(PageId.C_CAsia, filePaths, "many", false);
444         verifyContains(PageId.C_WAsia, filePaths, "many", false);
445         verifyContains(PageId.C_SEAsia, filePaths, "many", false);
446         verifyContains(PageId.C_Oceania, filePaths, "many", false);
447         verifyContains(PageId.C_Unknown, filePaths, "many", false);
448 
449         // check that Arabic does contain few and many
450         filePaths = pathHeaderFactory.pathsForFile(info.getCLDRFile("ar", true));
451 
452         verifyContains(PageId.Duration, filePaths, "few", true);
453         verifyContains(PageId.C_NAmerica, filePaths, "many", true);
454         verifyContains(PageId.C_SAmerica, filePaths, "many", true);
455         verifyContains(PageId.C_NWEurope, filePaths, "many", true);
456         verifyContains(PageId.C_SEEurope, filePaths, "many", true);
457         verifyContains(PageId.C_NAfrica, filePaths, "many", true);
458         verifyContains(PageId.C_WAfrica, filePaths, "many", true);
459         verifyContains(PageId.C_SAfrica, filePaths, "many", true);
460         verifyContains(PageId.C_EAfrica, filePaths, "many", true);
461         verifyContains(PageId.C_CAsia, filePaths, "many", true);
462         verifyContains(PageId.C_WAsia, filePaths, "many", true);
463         verifyContains(PageId.C_SEAsia, filePaths, "many", true);
464         verifyContains(PageId.C_Oceania, filePaths, "many", true);
465         verifyContains(PageId.C_Unknown, filePaths, "many", true);
466     }
467 
TestCoverage()468     public void TestCoverage() {
469         Map<Row.R2<SectionId, PageId>, Counter<Level>> data = new TreeMap<>();
470         CLDRFile cldrFile = english;
471         for (String path : cldrFile.fullIterable()) {
472             if (supplemental.isDeprecated(DtdType.ldml, path)) {
473                 errln("Deprecated path in English: " + path);
474                 continue;
475             }
476             Level level = supplemental.getCoverageLevel(path, cldrFile.getLocaleID());
477             PathHeader p = pathHeaderFactory.fromPath(path);
478             SurveyToolStatus status = p.getSurveyToolStatus();
479 
480             boolean hideCoverage = level == Level.COMPREHENSIVE;
481             boolean hidePathHeader = p.shouldHide();
482             if (hidePathHeader != hideCoverage) {
483                 String message = "PathHeader: " + status + ", Coverage: " + level + ": " + path;
484                 if (hidePathHeader && !hideCoverage) {
485                     errln(
486                             message
487                                     + " - PathHeader says to HIDE this, but it visible at <comprehensive coverage. Fix PathHeader to show, or fix coverage.");
488                 } else if (!hidePathHeader && hideCoverage) {
489                     logln(message);
490                 }
491             }
492             final R2<SectionId, PageId> key = Row.of(p.getSectionId(), p.getPageId());
493             Counter<Level> counter = data.get(key);
494             if (counter == null) {
495                 data.put(key, counter = new Counter<>());
496             }
497             counter.add(level, 1);
498         }
499         StringBuffer b = new StringBuffer("\t");
500         for (Level level : Level.values()) {
501             b.append("\t" + level);
502         }
503         logln(b.toString());
504         for (Entry<R2<SectionId, PageId>, Counter<Level>> entry : data.entrySet()) {
505             b.setLength(0);
506             b.append(entry.getKey().get0() + "\t" + entry.getKey().get1());
507             Counter<Level> counter = entry.getValue();
508             long total = 0;
509             for (Level level : Level.values()) {
510                 total += counter.getCount(level);
511                 b.append("\t" + total);
512             }
513             logln(b.toString());
514         }
515     }
516 
Test00AFile()517     public void Test00AFile() {
518         final String localeId = "en";
519         Counter<Level> counter = new Counter<>();
520         Map<String, PathHeader> uniqueness = new HashMap<>();
521         Set<String> alreadySeen = new HashSet<>();
522         check(localeId, true, uniqueness, alreadySeen);
523         // check paths
524         for (Entry<SectionId, Set<PageId>> sectionAndPages :
525                 PathHeader.Factory.getSectionIdsToPageIds().keyValuesSet()) {
526             final SectionId section = sectionAndPages.getKey();
527             if (section == SectionId.Supplemental || section == SectionId.BCP47) {
528                 continue;
529             }
530             logln(section.toString());
531             for (PageId page : sectionAndPages.getValue()) {
532                 final Set<String> cachedPaths = PathHeader.Factory.getCachedPaths(section, page);
533                 if (cachedPaths == null) {
534                     if (!badZonePages.contains(page) && page != PageId.Unknown) {
535                         errln("Null pages for: " + section + "\t" + page);
536                     }
537                 } else if (section == SectionId.Special && page == PageId.Unknown) {
538                     // skip
539                 } else if (section == SectionId.Timezones && page == PageId.UnknownT) {
540                     // skip
541                 } else if (section == SectionId.Misc && page == PageId.Transforms) {
542                     // skip
543                 } else {
544 
545                     int count2 = cachedPaths.size();
546                     if (count2 == 0) {
547                         warnln("Missing pages for: " + section + "\t" + page);
548                     } else {
549                         counter.clear();
550                         for (String s : cachedPaths) {
551                             Level coverage = supplemental.getCoverageLevel(s, localeId);
552                             counter.add(coverage, 1);
553                         }
554                         String countString = "";
555                         int total = 0;
556                         for (Level item : Level.values()) {
557                             long count = counter.get(item);
558                             if (count != 0) {
559                                 if (!countString.isEmpty()) {
560                                     countString += ",\t+";
561                                 }
562                                 total += count;
563                                 countString += item + "=" + total;
564                             }
565                         }
566                         logln("\t" + page + "\t" + countString);
567                         if (page.toString().startsWith("Unknown")) {
568                             logln("\t\t" + cachedPaths);
569                         }
570                     }
571                 }
572             }
573         }
574     }
575 
TestMetazones()576     public void TestMetazones() {
577 
578         CLDRFile nativeFile = info.getEnglish();
579         Set<PathHeader> pathHeaders = getPathHeaders(nativeFile);
580         // String oldPage = "";
581         String oldHeader = "";
582         for (PathHeader entry : pathHeaders) {
583             final String page = entry.getPage();
584             // if (!oldPage.equals(page)) {
585             // logln(page);
586             // oldPage = page;
587             // }
588             String header = entry.getHeader();
589             if (!oldHeader.equals(header)) {
590                 logln(page + "\t" + header);
591                 oldHeader = header;
592             }
593         }
594     }
595 
getPathHeaders(CLDRFile nativeFile)596     public Set<PathHeader> getPathHeaders(CLDRFile nativeFile) {
597         Set<PathHeader> pathHeaders = new TreeSet<>();
598         for (String path : nativeFile.fullIterable()) {
599             PathHeader p = pathHeaderFactory.fromPath(path);
600             pathHeaders.add(p);
601         }
602         return pathHeaders;
603     }
604 
verifyContains( PageId pageId, Set<String> filePaths, String substring, boolean contains)605     public void verifyContains(
606             PageId pageId, Set<String> filePaths, String substring, boolean contains) {
607         String path;
608         path = findOneContaining(allPaths(pageId, filePaths), substring);
609         if (contains) {
610             if (path == null) {
611                 errln("No path contains <" + substring + ">");
612             }
613         } else {
614             if (path != null) {
615                 errln("Path contains <" + substring + ">\t" + path);
616             }
617         }
618     }
619 
findOneContaining(Collection<String> allPaths, String substring)620     private String findOneContaining(Collection<String> allPaths, String substring) {
621         for (String path : allPaths) {
622             if (path.contains(substring)) {
623                 return path;
624             }
625         }
626         return null;
627     }
628 
allPaths(PageId pageId, Set<String> filePaths)629     public Set<String> allPaths(PageId pageId, Set<String> filePaths) {
630         Set<String> result = PathHeader.Factory.getCachedPaths(pageId.getSectionId(), pageId);
631         result.retainAll(filePaths);
632         return result;
633     }
634 
TestUniqueness()635     public void TestUniqueness() {
636         Factory factory2 = CLDRConfig.getInstance().getMainAndAnnotationsFactory();
637         Set<String> source = factory2.getAvailable();
638         for (String file : getFilesToTest(source, MIN_LOCALES)) {
639             CLDRFile nativeFile = factory2.make(file, true);
640             Map<PathHeader, String> headerToPath = new HashMap<>();
641             Map<String, String> headerVisibleToPath = new HashMap<>();
642             for (String path : nativeFile.fullIterable()) {
643                 PathHeader p = pathHeaderFactory.fromPath(path);
644                 if (p.getSectionId() == SectionId.Special) {
645                     continue;
646                 }
647                 String old = headerToPath.get(p);
648                 if (old == null) {
649                     headerToPath.put(p, path);
650                 } else if (!old.equals(path)) {
651                     if (true) { // for debugging
652                         pathHeaderFactory.clearCache();
653                         List<String> failuresOld = new ArrayList<>();
654                         pathHeaderFactory.fromPath(old, failuresOld);
655                         List<String> failuresPath = new ArrayList<>();
656                         pathHeaderFactory.fromPath(path, failuresPath);
657                     }
658                     errln(file + " collision with path " + p + "\t" + old + "\t" + path);
659                 }
660                 final String visible = p.toString();
661                 old = headerVisibleToPath.get(visible);
662                 if (old == null) {
663                     headerVisibleToPath.put(visible, path);
664                 } else if (!old.equals(path)) {
665                     errln("Collision with path " + visible + "\t" + old + "\t" + path);
666                 }
667             }
668         }
669     }
670 
TestStatus()671     public void TestStatus() {
672         CLDRFile nativeFile = info.getEnglish();
673         PathStarrer starrer = new PathStarrer();
674         EnumMap<SurveyToolStatus, Relation<String, String>> info2 =
675                 new EnumMap<>(SurveyToolStatus.class);
676         Set<String> nuked = new HashSet<>();
677         Set<String> deprecatedStar = new HashSet<>();
678 
679         for (String path : nativeFile.fullIterable()) {
680 
681             PathHeader p = pathHeaderFactory.fromPath(path);
682             final SurveyToolStatus surveyToolStatus = p.getSurveyToolStatus();
683 
684             if (p.getSectionId() == SectionId.Special
685                     && surveyToolStatus == SurveyToolStatus.READ_WRITE) {
686                 errln("SurveyToolStatus should not be " + surveyToolStatus + ": " + p);
687             }
688 
689             String starred = starrer.set(path);
690             List<String> attr = starrer.getAttributes();
691             if (surveyToolStatus != SurveyToolStatus.READ_WRITE) {
692                 nuked.add(starred);
693             }
694 
695             // check against deprecated
696             boolean isDeprecated = supplemental.isDeprecated(DtdType.ldml, path);
697             if (isDeprecated != (surveyToolStatus == SurveyToolStatus.DEPRECATED)) {
698                 if (!deprecatedStar.contains(starred)) {
699                     errln(
700                             "Different from DtdData deprecated:\t"
701                                     + isDeprecated
702                                     + "\t"
703                                     + surveyToolStatus
704                                     + "\t"
705                                     + path);
706                     deprecatedStar.add(starred);
707                 }
708             }
709 
710             Relation<String, String> data = info2.get(surveyToolStatus);
711             if (data == null) {
712                 info2.put(
713                         surveyToolStatus,
714                         data = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class));
715             }
716             data.put(starred, Joiner.on("|").join(attr));
717         }
718         for (Entry<SurveyToolStatus, Relation<String, String>> entry : info2.entrySet()) {
719             final SurveyToolStatus status = entry.getKey();
720             for (Entry<String, Set<String>> item : entry.getValue().keyValuesSet()) {
721                 final String starred = item.getKey();
722                 if (status == SurveyToolStatus.READ_WRITE && !nuked.contains(starred)) {
723                     continue;
724                 }
725                 logln(status + "\t" + starred + "\t" + item.getValue());
726             }
727         }
728     }
729 
TestPathsNotInEnglish()730     public void TestPathsNotInEnglish() {
731         Set<String> englishPaths = new HashSet<>();
732         for (String path : english.fullIterable()) {
733             englishPaths.add(path);
734         }
735         Set<String> alreadySeen = new HashSet<>(englishPaths);
736 
737         for (String locale : factory.getAvailable()) {
738             CLDRFile nativeFile = info.getCLDRFile(locale, false);
739             CoverageLevel2 coverageLevel2 = null;
740             for (String path : nativeFile.fullIterable()) {
741                 if (alreadySeen.contains(path) || path.contains("@count")) {
742                     continue;
743                 }
744                 if (coverageLevel2 == null) {
745                     coverageLevel2 = CoverageLevel2.getInstance(locale);
746                 }
747                 Level level = coverageLevel2.getLevel(path);
748                 if (Level.COMPREHENSIVE.compareTo(level) < 0) {
749                     continue;
750                 }
751                 logln("Path not in English\t" + locale + "\t" + path);
752                 alreadySeen.add(path);
753             }
754         }
755     }
756 
TestPathDescriptionCompleteness()757     public void TestPathDescriptionCompleteness() {
758         PathDescription pathDescription =
759                 new PathDescription(
760                         supplemental, english, null, null, PathDescription.ErrorHandling.CONTINUE);
761         Matcher normal =
762                 PatternCache.get("https://cldr.unicode.org/translation/[-a-zA-Z0-9_]").matcher("");
763         // https://cldr.unicode.org/translation/plurals#TOC-Minimal-Pairs
764         Set<String> alreadySeen = new HashSet<>();
765         PathStarrer starrer = new PathStarrer();
766 
767         checkPathDescriptionCompleteness(
768                 pathDescription,
769                 normal,
770                 "//ldml/numbers/defaultNumberingSystem",
771                 alreadySeen,
772                 starrer);
773         for (PathHeader pathHeader : getPathHeaders(english)) {
774             if (pathHeader.shouldHide()) {
775                 continue;
776             }
777             String path = pathHeader.getOriginalPath();
778             checkPathDescriptionCompleteness(pathDescription, normal, path, alreadySeen, starrer);
779         }
780     }
781 
checkPathDescriptionCompleteness( PathDescription pathDescription, Matcher normal, String path, Set<String> alreadySeen, PathStarrer starrer)782     public void checkPathDescriptionCompleteness(
783             PathDescription pathDescription,
784             Matcher normal,
785             String path,
786             Set<String> alreadySeen,
787             PathStarrer starrer) {
788         String value = english.getStringValue(path);
789         String description = pathDescription.getDescription(path, value, null);
790         String starred = starrer.set(path);
791         if (alreadySeen.contains(starred)) {
792             return;
793         } else if (description == null) {
794             errln("Path has no description:\t" + value + "\t" + path);
795         } else if (!description.contains("https://")) {
796             errln("Description has no URL:\t" + description + "\t" + value + "\t" + path);
797         } else if (!normal.reset(description).find()) {
798             errln(
799                     "Description has generic URL, fix to be specific:\t"
800                             + description
801                             + "\t"
802                             + value
803                             + "\t"
804                             + path);
805         } else if (description == PathDescription.MISSING_DESCRIPTION) {
806             errln("Fallback Description:\t" + value + "\t" + path);
807         } else {
808             return;
809         }
810         // Add if we had a problem, keeping us from being overwhelmed with
811         // errors.
812         alreadySeen.add(starred);
813     }
814 
TestTerritoryOrder()815     public void TestTerritoryOrder() {
816         final Set<String> goodAvailableCodes =
817                 StandardCodes.make().getGoodAvailableCodes("territory");
818         Set<String> results = showContained("001", 0, new HashSet<>(goodAvailableCodes));
819         results.remove("ZZ");
820         results.removeAll(Iso3166Data.getRegionCodesNotForTranslation());
821         for (String territory : results) {
822             String sub = Containment.getSubcontinent(territory);
823             String cont = Containment.getContinent(territory);
824             errln(
825                     "Missing\t"
826                             + getNameAndOrder(territory)
827                             + "\t"
828                             + getNameAndOrder(sub)
829                             + "\t"
830                             + getNameAndOrder(cont));
831         }
832     }
833 
showContained(String territory, int level, Set<String> soFar)834     private Set<String> showContained(String territory, int level, Set<String> soFar) {
835         if (!soFar.contains(territory)) {
836             return soFar;
837         }
838         soFar.remove(territory);
839         Set<String> contained = supplemental.getContained(territory);
840         if (contained == null) {
841             return soFar;
842         }
843         for (String containedItem : contained) {
844             logln(
845                     level
846                             + "\t"
847                             + getNameAndOrder(territory)
848                             + "\t"
849                             + getNameAndOrder(containedItem));
850         }
851         for (String containedItem : contained) {
852             showContained(containedItem, level + 1, soFar);
853         }
854         return soFar;
855     }
856 
getNameAndOrder(String territory)857     private String getNameAndOrder(String territory) {
858         return territory
859                 + "\t"
860                 + english.getName(CLDRFile.TERRITORY_NAME, territory)
861                 + "\t"
862                 + Containment.getOrder(territory);
863     }
864 
TestZCompleteness()865     public void TestZCompleteness() {
866         Map<String, PathHeader> uniqueness = new HashMap<>();
867         Set<String> alreadySeen = new HashSet<>();
868         LanguageTagParser ltp = new LanguageTagParser();
869         int count = 0;
870         for (String locale : factory.getAvailable()) {
871             if (!ltp.set(locale).getRegion().isEmpty()) {
872                 continue;
873             }
874             check(locale, false, uniqueness, alreadySeen);
875             ++count;
876         }
877         logln("Count:\t" + count);
878     }
879 
check( String localeID, boolean resolved, Map<String, PathHeader> uniqueness, Set<String> alreadySeen)880     public void check(
881             String localeID,
882             boolean resolved,
883             Map<String, PathHeader> uniqueness,
884             Set<String> alreadySeen) {
885         CLDRFile nativeFile = info.getCLDRFile(localeID, resolved);
886         int count = 0;
887         for (String path : nativeFile) {
888             if (alreadySeen.contains(path)) {
889                 continue;
890             }
891             alreadySeen.add(path);
892             final PathHeader pathHeader = pathHeaderFactory.fromPath(path);
893             ++count;
894             if (pathHeader == null) {
895                 errln("Null pathheader for " + path);
896             } else {
897                 String visible = pathHeader.toString();
898                 PathHeader old = uniqueness.get(visible);
899                 if (pathHeader.getSectionId() == SectionId.Timezones) {
900                     final PageId pageId = pathHeader.getPageId();
901                     if (badZonePages.contains(pageId) && !pathHeader.getCode().equals("Unknown")) {
902                         String msg = "Bad page ID:\t" + pageId + "\t" + pathHeader + "\t" + path;
903                         if (!logKnownIssue(
904                                 "cldrbug:7802", "ICU/CLDR time zone data sync problem - " + msg)) {
905                             errln("Bad page ID:\t" + pageId + "\t" + pathHeader + "\t" + path);
906                         }
907                     }
908                 }
909                 if (old == null) {
910                     if (pathHeader.getSection().equals("Special")) {
911                         if (pathHeader.getSection().equals("Unknown")) {
912                             errln(
913                                     "PathHeader has fallback: "
914                                             + visible
915                                             + "\t"
916                                             + pathHeader.getOriginalPath());
917                             // } else {
918                             // logln("Special:\t" + visible + "\t" +
919                             // pathHeader.getOriginalPath());
920                         }
921                     }
922                     uniqueness.put(visible, pathHeader);
923                 } else if (!old.equals(pathHeader)) {
924                     if (pathHeader.getSectionId() == SectionId.Special) {
925                         logln(
926                                 "Special PathHeader not unique: "
927                                         + visible
928                                         + "\t"
929                                         + pathHeader.getOriginalPath()
930                                         + "\t"
931                                         + old.getOriginalPath());
932                     } else {
933                         errln(
934                                 "PathHeader not unique: "
935                                         + visible
936                                         + "\t"
937                                         + pathHeader.getOriginalPath()
938                                         + "\t"
939                                         + old.getOriginalPath());
940                     }
941                 }
942             }
943         }
944         logln(localeID + "\t" + count);
945     }
946 
TestContainment()947     public void TestContainment() {
948         Map<String, Map<String, String>> metazoneToRegionToZone =
949                 supplemental.getMetazoneToRegionToZone();
950         Map<String, String> metazoneToContinent = supplemental.getMetazoneToContinentMap();
951         for (String metazone : metazoneToRegionToZone.keySet()) {
952             Map<String, String> regionToZone = metazoneToRegionToZone.get(metazone);
953             String worldZone = regionToZone.get("001");
954             String territory = Containment.getRegionFromZone(worldZone);
955             if (territory == null) {
956                 territory = "ZZ";
957             }
958             String cont = Containment.getContinent(territory);
959             int order = Containment.getOrder(territory);
960             String sub = Containment.getSubcontinent(territory);
961             String revision = PathHeader.getMetazonePageTerritory(metazone);
962             String continent = metazoneToContinent.get(metazone);
963             if (continent == null) {
964                 continent = "UnknownT";
965             }
966             // Russia, Antarctica => territory
967             // in Australasia, Asia, S. America => subcontinent
968             // in N. America => N. America (grouping of 3 subcontinents)
969             // in everything else => continent
970 
971             if (territory.equals("RU")) {
972                 assertEquals("Russia special case", "RU", revision);
973             } else if (territory.equals("US")) {
974                 assertEquals("N. America special case", "003", revision);
975             } else if (territory.equals("BR")) {
976                 assertEquals("S. America special case", "005", revision);
977             }
978             if (isVerbose()) {
979                 String name = english.getName(CLDRFile.TERRITORY_NAME, cont);
980                 String name2 = english.getName(CLDRFile.TERRITORY_NAME, sub);
981                 String name3 = english.getName(CLDRFile.TERRITORY_NAME, territory);
982                 String name4 = english.getName(CLDRFile.TERRITORY_NAME, revision);
983 
984                 logln(
985                         metazone + "\t" + continent + "\t" + name + "\t" + name2 + "\t" + name3
986                                 + "\t" + order + "\t" + name4);
987             }
988         }
989     }
990 
TestZ()991     public void TestZ() {
992         PathStarrer pathStarrer = new PathStarrer();
993         pathStarrer.setSubstitutionPattern("%A");
994 
995         Set<PathHeader> sorted = new TreeSet<>();
996         Map<String, String> missing = new TreeMap<>();
997         Map<String, String> skipped = new TreeMap<>();
998         Map<String, String> collide = new TreeMap<>();
999 
1000         logln("Traversing Paths");
1001         for (String path : english) {
1002             PathHeader pathHeader = pathHeaderFactory.fromPath(path);
1003             String value = english.getStringValue(path);
1004             if (pathHeader == null) {
1005                 final String starred = pathStarrer.set(path);
1006                 missing.put(starred, value + "\t" + path);
1007                 continue;
1008             }
1009             if (pathHeader.getSection().equalsIgnoreCase("skip")) {
1010                 final String starred = pathStarrer.set(path);
1011                 skipped.put(starred, value + "\t" + path);
1012                 continue;
1013             }
1014             sorted.add(pathHeader);
1015         }
1016         logln("\nConverted:\t" + sorted.size());
1017         String lastHeader = "";
1018         String lastPage = "";
1019         String lastSection = "";
1020         List<String> threeLevel = new ArrayList<>();
1021         Status status = new Status();
1022         CoverageLevel2 coverageLevel2 = CoverageLevel2.getInstance("en");
1023 
1024         for (PathHeader pathHeader : sorted) {
1025             String original = pathHeader.getOriginalPath();
1026             if (!original.equals(status.pathWhereFound)) {
1027                 continue;
1028             }
1029             if (!lastSection.equals(pathHeader.getSection())) {
1030                 logln("");
1031                 threeLevel.add(pathHeader.getSection());
1032                 threeLevel.add("\t" + pathHeader.getPage());
1033                 threeLevel.add("\t\t" + pathHeader.getHeader());
1034                 lastSection = pathHeader.getSection();
1035                 lastPage = pathHeader.getPage();
1036                 lastHeader = pathHeader.getHeader();
1037             } else if (!lastPage.equals(pathHeader.getPage())) {
1038                 logln("");
1039                 threeLevel.add("\t" + pathHeader.getPage());
1040                 threeLevel.add("\t\t" + pathHeader.getHeader());
1041                 lastPage = pathHeader.getPage();
1042                 lastHeader = pathHeader.getHeader();
1043             } else if (!lastHeader.equals(pathHeader.getHeader())) {
1044                 logln("");
1045                 threeLevel.add("\t\t" + pathHeader.getHeader());
1046                 lastHeader = pathHeader.getHeader();
1047             }
1048             logln(
1049                     pathHeader
1050                             + "\t"
1051                             + coverageLevel2.getLevel(original)
1052                             + "\t"
1053                             + english.getStringValue(pathHeader.getOriginalPath())
1054                             + "\t"
1055                             + pathHeader.getOriginalPath());
1056         }
1057         if (collide.size() != 0) {
1058             errln("\nCollide:\t" + collide.size());
1059             for (Entry<String, String> item : collide.entrySet()) {
1060                 errln("\t" + item);
1061             }
1062         }
1063         if (missing.size() != 0) {
1064             errln("\nMissing:\t" + missing.size());
1065             for (Entry<String, String> item : missing.entrySet()) {
1066                 errln("\t" + item.getKey() + "\tvalue:\t" + item.getValue());
1067             }
1068         }
1069         if (skipped.size() != 0) {
1070             errln("\nSkipped:\t" + skipped.size());
1071             for (Entry<String, String> item : skipped.entrySet()) {
1072                 errln("\t" + item);
1073             }
1074         }
1075         Counter<PathHeader.Factory.CounterData> counterData =
1076                 pathHeaderFactory.getInternalCounter();
1077         logln("\nInternal Counter:\t" + counterData.size());
1078         for (PathHeader.Factory.CounterData item : counterData.keySet()) {
1079             logln(
1080                     "\t"
1081                             + counterData.getCount(item)
1082                             + "\t"
1083                             + item.get2() // externals
1084                             + "\t"
1085                             + item.get3()
1086                             + "\t"
1087                             + item.get0() // internals
1088                             + "\t"
1089                             + item.get1());
1090         }
1091         logln("\nMenus/Headers:\t" + threeLevel.size());
1092         for (String item : threeLevel) {
1093             logln(item);
1094         }
1095         Relation<SectionId, PageId> s2p = PathHeader.Factory.getSectionIdsToPageIds();
1096         logln("\nMenus:\t" + s2p.size());
1097         for (Entry<SectionId, Set<PageId>> sectionAndPages : s2p.keyValuesSet()) {
1098             final SectionId section = sectionAndPages.getKey();
1099             for (PageId page : sectionAndPages.getValue()) {
1100                 logln("\t" + section + "\t" + page);
1101                 int count = 0;
1102                 for (String path : pathHeaderFactory.filterCldr(section, page, english)) {
1103                     count += 1; // just count them.
1104                 }
1105                 logln("\t" + count);
1106             }
1107         }
1108     }
1109 
1110     public static final Set<String> GERMAN_UNIT_ORDER =
1111             ImmutableSet.of(
1112                     "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]",
1113                     "//ldml/units/unitLength[@type=\"short\"]/compoundUnit[@type=\"power2\"]",
1114                     "//ldml/units/unitLength[@type=\"narrow\"]/compoundUnit[@type=\"power2\"]",
1115                     "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]",
1116                     "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]",
1117                     "//ldml/units/unitLength[@type=\"narrrow\"]/unit[@type=\"volume-liter\"]",
1118                     "//ldml/numbers/minimalPairs/caseMinimalPairs",
1119                     "//ldml/numbers/minimalPairs/genderMinimalPairs");
1120 
TestOrder()1121     public void TestOrder() {
1122         String[] paths = {
1123             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"narrow\"]/dayPeriod[@type=\"noon\"]",
1124             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"narrow\"]/dayPeriod[@type=\"afternoon1\"]",
1125         };
1126         PathHeader pathHeaderLast = null;
1127         for (String path : paths) {
1128             PathHeader pathHeader = pathHeaderFactory.fromPath(path);
1129             if (pathHeaderLast != null) {
1130                 assertRelation("ordering", true, pathHeaderLast, LEQ, pathHeader);
1131             }
1132             pathHeaderLast = pathHeader;
1133         }
1134         CLDRFile german = factory.make("de", true);
1135         Multimap<PathHeader, String> pathHeaderToPaths = TreeMultimap.create();
1136         for (String path : german.fullIterable()) {
1137             for (String prefix : GERMAN_UNIT_ORDER) {
1138                 if (path.startsWith(prefix)) {
1139                     PathHeader pathHeader = pathHeaderFactory.fromPath(path);
1140                     pathHeaderToPaths.put(pathHeader, path);
1141                 }
1142             }
1143         }
1144         String[] germanExpected = {
1145             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/gender", // Units
1146             //
1147             // Volume
1148             //  liter
1149             //
1150             // long-gender
1151             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/displayName", // Units    Volume  liter   long-displayName
1152             "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]/displayName", // Units    Volume  liter   short-displayName
1153             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/perUnitPattern", // Units    Volume  liter   long-per
1154             "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]/perUnitPattern", // Units    Volume  liter   short-per
1155             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"]", // Units    Volume  liter   long-one-nominative
1156             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"][@case=\"accusative\"]", // Units    Volume  liter   long-one-accusative
1157             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"][@case=\"genitive\"]", // Units    Volume  liter   long-one-genitive
1158             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"][@case=\"dative\"]", // Units    Volume  liter   long-one-dative
1159             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"]", // Units    Volume  liter   long-other-nominative
1160             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"][@case=\"accusative\"]", // Units    Volume  liter   long-other-accusative
1161             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"][@case=\"genitive\"]", // Units    Volume  liter   long-other-genitive
1162             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"][@case=\"dative\"]", // Units    Volume  liter   long-other-dative
1163             "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"]", // Units    Volume  liter   short-one-nominative
1164             "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"]", // Units    Volume  liter   short-other-nominative
1165             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"masculine\"]", // Units    Compound Units  power2  long-one-nominative-masculine
1166             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"]", // Units    Compound Units  power2  long-one-nominative-feminine
1167             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"]", // Units    Compound Units  power2  long-one-nominative-dgender
1168             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"masculine\"][@case=\"accusative\"]", // Units    Compound Units  power2  long-one-accusative-masculine
1169             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"accusative\"]", // Units    Compound Units  power2  long-one-accusative-feminine
1170             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@case=\"accusative\"]", // Units    Compound Units  power2  long-one-accusative-dgender
1171             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"masculine\"][@case=\"genitive\"]", // Units    Compound Units  power2  long-one-genitive-masculine
1172             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"genitive\"]", // Units    Compound Units  power2  long-one-genitive-feminine
1173             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@case=\"genitive\"]", // Units    Compound Units  power2  long-one-genitive-dgender
1174             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"masculine\"][@case=\"dative\"]", // Units    Compound Units  power2  long-one-dative-masculine
1175             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"dative\"]", // Units    Compound Units  power2  long-one-dative-feminine
1176             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@case=\"dative\"]", // Units    Compound Units  power2  long-one-dative-dgender
1177             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"masculine\"]", // Units    Compound Units  power2  long-other-nominative-masculine
1178             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"]", // Units    Compound Units  power2  long-other-nominative-feminine
1179             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"]", // Units    Compound Units  power2  long-other-nominative-dgender
1180             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"masculine\"][@case=\"accusative\"]", // Units    Compound Units  power2  long-other-accusative-masculine
1181             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"][@case=\"accusative\"]", // Units    Compound Units  power2  long-other-accusative-feminine
1182             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@case=\"accusative\"]", // Units    Compound Units  power2  long-other-accusative-dgender
1183             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"masculine\"][@case=\"genitive\"]", // Units    Compound Units  power2  long-other-genitive-masculine
1184             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"][@case=\"genitive\"]", // Units    Compound Units  power2  long-other-genitive-feminine
1185             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@case=\"genitive\"]", // Units    Compound Units  power2  long-other-genitive-dgender
1186             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"masculine\"][@case=\"dative\"]", // Units    Compound Units  power2  long-other-dative-masculine
1187             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"][@case=\"dative\"]", // Units    Compound Units  power2  long-other-dative-feminine
1188             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@case=\"dative\"]", // Units    Compound Units  power2  long-other-dative-dgender
1189             "//ldml/units/unitLength[@type=\"short\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"]", // Units    Compound Units  power2  short-one-nominative-dgender
1190             "//ldml/units/unitLength[@type=\"short\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"]", // Units    Compound Units  power2  short-other-nominative-dgender
1191             "//ldml/units/unitLength[@type=\"narrow\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"]", // Units    Compound Units  power2  narrow-one-nominative-dgender
1192             "//ldml/units/unitLength[@type=\"narrow\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"]", // Units    Compound Units  power2  narrow-other-nominative-dgender
1193             "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"nominative\"]", // Miscellaneous
1194             //  Minimal Pairs
1195             //  Case
1196             // nominative
1197             "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"accusative\"]", // Miscellaneous
1198             //  Minimal Pairs
1199             //  Case
1200             // accusative
1201             "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"genitive\"]", // Miscellaneous
1202             // Minimal Pairs
1203             // Case    genitive
1204             "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"dative\"]", // Miscellaneous
1205             // Minimal Pairs
1206             // Case    dative
1207             "//ldml/numbers/minimalPairs/genderMinimalPairs[@gender=\"masculine\"]", // Miscellaneous    Minimal Pairs   Gender  masculine
1208             "//ldml/numbers/minimalPairs/genderMinimalPairs[@gender=\"feminine\"]", // Miscellaneous
1209             //    Minimal
1210             // Pairs
1211             // Gender
1212             // feminine
1213             "//ldml/numbers/minimalPairs/genderMinimalPairs[@gender=\"neuter\"]", // Miscellaneous
1214             //  Minimal Pairs
1215             //  Gender  neuter
1216 
1217             // we don't care about order here.
1218             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1", // Special  Suppress    compound-UnitPattern1-power2    long
1219             "//ldml/units/unitLength[@type=\"narrow\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1", // Special  Suppress    compound-UnitPattern1-power2    narrow
1220             "//ldml/units/unitLength[@type=\"short\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1", // Special  Suppress    compound-UnitPattern1-power2    short
1221         };
1222 
1223         int germanExpectedIndex = 0;
1224         int errorCount = 0;
1225         int item = 0;
1226         for (Entry<PathHeader, Collection<String>> entry : pathHeaderToPaths.asMap().entrySet()) {
1227             PathHeader ph = entry.getKey();
1228             Collection<String> epaths = entry.getValue();
1229             if (!assertEquals(entry.toString(), 1, epaths.size())) {
1230                 ++errorCount;
1231             }
1232             if (!assertEquals(
1233                     ++item + ") PathHeader order",
1234                     germanExpected[germanExpectedIndex++],
1235                     epaths.iterator().next())) {
1236                 ++errorCount;
1237             }
1238         }
1239         if (errorCount != 0) {
1240             for (Entry<PathHeader, Collection<String>> entry :
1241                     pathHeaderToPaths.asMap().entrySet()) {
1242                 PathHeader ph = entry.getKey();
1243                 Collection<String> epaths = entry.getValue();
1244                 System.out.println(
1245                         "\"" + epaths.iterator().next().replace("\"", "\\\"") + "\",\t// " + ph);
1246             }
1247         }
1248     }
1249 
Test8414()1250     public void Test8414() {
1251         PathDescription pathDescription =
1252                 new PathDescription(
1253                         supplemental, english, null, null, PathDescription.ErrorHandling.CONTINUE);
1254 
1255         String prefix =
1256                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"";
1257         String suffix = "\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"morning1\"]";
1258 
1259         final String path0 = prefix + "format" + suffix;
1260         final String path1 = prefix + "stand-alone" + suffix;
1261         String v0 = english.getStringValue(path0);
1262         String v1 = english.getStringValue(path1);
1263         String p0 = pathDescription.getDescription(path0, v0, null);
1264         String p1 = pathDescription.getDescription(path1, v1, null);
1265         assertTrue("Check pd for format", p0.contains("in the morning"));
1266         assertTrue("Check pd for stand-alone", !p1.contains("in the morning"));
1267     }
1268 
TestCompletenessNonLdmlDtd()1269     public void TestCompletenessNonLdmlDtd() {
1270         PathChecker pathChecker = new PathChecker();
1271         Set<String> directories = new LinkedHashSet<>();
1272         Multimap<String, String> pathValuePairs = LinkedListMultimap.create();
1273         // get all the directories containing non-Ldml dtd files
1274         for (DtdType dtdType : DtdType.values()) {
1275             if (dtdType.getStatus() != DtdType.DtdStatus.active) {
1276                 continue;
1277             }
1278             if (dtdType == DtdType.ldml
1279                     || dtdType == DtdType.ldmlICU
1280                     || dtdType == DtdType.keyboard3
1281                     || dtdType == DtdType.keyboardTest3) {
1282                 continue;
1283             }
1284             DtdData dtdData = DtdData.getInstance(dtdType);
1285             for (String dir : dtdType.directories) {
1286                 if (DEBUG_DTD_TYPE != null && !DEBUG_DTD_TYPE.directories.contains(dir)) {
1287                     continue;
1288                 }
1289                 File dir2 = new File(COMMON_DIR + dir);
1290                 logln(dir2.getName());
1291                 for (String file : dir2.list()) {
1292                     // don't need to restrict with getFilesToTest(Arrays.asList(dir2.list()),
1293                     // "root", "en")) {
1294                     if (!file.endsWith(".xml")) {
1295                         continue;
1296                     }
1297                     if (DEBUG) warnln(" TestCompletenessNonLdmlDtd: " + dir + ", " + file);
1298                     logln(" \t" + file);
1299                     for (Pair<String, String> pathValue :
1300                             XMLFileReader.loadPathValues(
1301                                     dir2 + "/" + file,
1302                                     new ArrayList<Pair<String, String>>(),
1303                                     true)) {
1304                         final String path = pathValue.getFirst();
1305                         final String value = pathValue.getSecond();
1306                         //                        logln("\t\t" + path);
1307                         if (path.startsWith("//supplementalData/unitPreferenceData/unitPreferences")
1308                                 && path.contains("skeleton")) {
1309                             int debug = 0;
1310                         }
1311                         pathChecker.checkPathHeader(dtdData, path);
1312                     }
1313                 }
1314             }
1315         }
1316         if (!pathChecker.badHeaders.isEmpty()) {
1317             System.out.println("For help with DTD updates: " + CLDRURLS.CLDR_UPDATINGDTD_URL);
1318         }
1319     }
1320 
1321     private class PathChecker {
1322         PathHeader.Factory phf = pathHeaderFactory;
1323         PathStarrer starrer = new PathStarrer().setSubstitutionPattern("%A");
1324 
1325         Set<String> badHeaders = new TreeSet<>();
1326         Map<PathHeader, PathHeader> goodHeaders = new HashMap<>();
1327         Set<PathHeader> seenBad = new HashSet<>();
1328 
1329         {
phf.clearCache()1330             phf.clearCache();
1331         }
1332 
checkPathHeader(DtdData dtdData, String rawPath)1333         public void checkPathHeader(DtdData dtdData, String rawPath) {
1334             XPathParts pathPlain = XPathParts.getFrozenInstance(rawPath);
1335             if (dtdData.isMetadata(pathPlain)) {
1336                 return;
1337             }
1338             if (dtdData.isDeprecated(pathPlain)) {
1339                 return;
1340             }
1341             Multimap<String, String> extras = HashMultimap.create();
1342             Set<String> fixedPaths = dtdData.getRegularizedPaths(pathPlain, extras);
1343             if (fixedPaths != null) {
1344                 for (String fixedPath : fixedPaths) {
1345                     checkSubpath(fixedPath);
1346                 }
1347             }
1348             for (String path : extras.keySet()) {
1349                 checkSubpath(path);
1350             }
1351         }
1352 
checkSubpath(String path)1353         public void checkSubpath(String path) {
1354             String message = ": Can't compute path header";
1355             if (path.contentEquals(
1356                     "//supplementalData/grammaticalData/grammaticalFeatures[@targets=\"nominal\"][@locales=\"it\"]/grammaticalGender/_values")) {
1357                 int debug = 0;
1358             }
1359             PathHeader ph = null;
1360             try {
1361                 ph = phf.fromPath(path);
1362                 if (seenBad.contains(ph)) {
1363                     return;
1364                 }
1365                 if (ph.getPageId() == PageId.Deprecated) {
1366                     return; // don't care
1367                 }
1368                 if (ph.getPageId() != PageId.Unknown) {
1369                     PathHeader old = goodHeaders.put(ph, ph);
1370                     if (old != null && !path.equals(old.getOriginalPath())) {
1371                         errln(
1372                                 "Duplicate path header for: "
1373                                         + ph
1374                                         + "\n\t\t "
1375                                         + path
1376                                         + "\n\t\t≠"
1377                                         + old.getOriginalPath());
1378                         seenBad.add(ph);
1379                     }
1380                     return;
1381                 }
1382                 // for debugging
1383                 phf.clearCache();
1384                 List<String> failures = new ArrayList<>();
1385                 ph = phf.fromPath(path, failures);
1386                 message = ": Unknown path header" + failures;
1387             } catch (Exception e) {
1388                 message = ": Exception in path header: " + e.getMessage();
1389             }
1390             String star = starrer.set(path);
1391             if (badHeaders.add(star)) {
1392                 errln(star + message + ", " + ph);
1393                 System.out.println(
1394                         "\tNo match in PathHeader.txt for "
1395                                 + path
1396                                 + "\n\tYou get only one message for all paths matching "
1397                                 + star
1398                                 + "\n\tFor example, check to see if the field in PathHeader.txt is in PathHeader.PageId."
1399                                 + "\n\tIf not, either correct PathHeader.txt or add it to PageId"
1400                                 + "\n\tIf you have a value attribute, you will need extra _ characters. The value attribute will show at the end with prefixed _, eg [...]/_skeleton."
1401                                 + "If there can be a value for the path then that element will add _. ");
1402             }
1403         }
1404     }
1405 
TestSupplementalItems()1406     public void TestSupplementalItems() {
1407         //      <weekOfPreference ordering="weekOfYear weekOfMonth" locales="am az bs cs cy da el et
1408         // hi ky lt mk sk ta th"/>
1409         // logln(pathHeaderFactory.getRegexInfo());
1410         CLDRFile supplementalFile =
1411                 CLDRConfig.getInstance().getSupplementalFactory().make("supplementalData", false);
1412         List<String> failures = new ArrayList<>();
1413         Multimap<String, String> pathValuePairs = LinkedListMultimap.create();
1414         for (String test : With.in(supplementalFile.iterator("//supplementalData/weekData"))) {
1415             failures.clear();
1416             XPathParts parts = XPathParts.getFrozenInstance(supplementalFile.getFullXPath(test));
1417             supplementalFile.getDtdData().getRegularizedPaths(parts, pathValuePairs);
1418             for (Entry<String, Collection<String>> entry : pathValuePairs.asMap().entrySet()) {
1419                 final String normalizedPath = entry.getKey();
1420                 final Collection<String> normalizedValue = entry.getValue();
1421                 PathHeader ph = pathHeaderFactory.fromPath(normalizedPath, failures);
1422                 if (ph == null || ph.getSectionId() == SectionId.Special) {
1423                     errln(
1424                             "Failure with "
1425                                     + test
1426                                     + " => "
1427                                     + normalizedPath
1428                                     + " = "
1429                                     + normalizedValue);
1430                 } else {
1431                     logln(ph + "\t" + test + " = " + normalizedValue);
1432                 }
1433             }
1434         }
1435     }
1436 
test10232()1437     public void test10232() {
1438         String[][] tests = {
1439             {"MMM", "Formats - Flexible - Date Formats"},
1440             {"dMM", "Formats - Flexible - Date Formats"},
1441             {"h", "Formats - Flexible - 12 Hour Time Formats"},
1442             {"hm", "Formats - Flexible - 12 Hour Time Formats"},
1443             {"Ehm", "Formats - Flexible - 12 Hour Time Formats"},
1444             {"H", "Formats - Flexible - 24 Hour Time Formats"},
1445             {"Hm", "Formats - Flexible - 24 Hour Time Formats"},
1446             {"EHm", "Formats - Flexible - 24 Hour Time Formats"},
1447         };
1448         for (String[] test : tests) {
1449             String path =
1450                     "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\""
1451                             + test[0]
1452                             + "\"]";
1453             PathHeader pathHeader = pathHeaderFactory.fromPath(path);
1454             assertEquals(
1455                     "flexible formats",
1456                     test[1] + "|" + test[0],
1457                     pathHeader.getHeader() + "|" + pathHeader.getCode());
1458         }
1459     }
1460 
1461     // Moved from TestAnnotations and generalized
testPathHeaderSize()1462     public void testPathHeaderSize() {
1463         String locale = "ar"; // choose one with lots of plurals
1464         int maxSize = 1250;
1465         boolean showTable = false; // only printed if test fails or verbose
1466 
1467         Factory factory = CLDRConfig.getInstance().getCommonAndSeedAndMainAndAnnotationsFactory();
1468         CLDRFile english = factory.make(locale, true);
1469 
1470         PathHeader.Factory phf = PathHeader.getFactory(CLDRConfig.getInstance().getEnglish());
1471         Counter<PageId> counterPageId = new Counter<>();
1472         Counter<PageId> counterPageIdAll = new Counter<>();
1473         for (String path : english) {
1474             Level level =
1475                     CLDRConfig.getInstance()
1476                             .getSupplementalDataInfo()
1477                             .getCoverageLevel(path, locale);
1478             PathHeader ph = phf.fromPath(path);
1479             if (level.compareTo(Level.MODERN) <= 0) {
1480                 counterPageId.add(ph.getPageId(), 1);
1481             }
1482             counterPageIdAll.add(ph.getPageId(), 1);
1483         }
1484         Set<R2<Long, PageId>> entrySetSortedByCount =
1485                 counterPageId.getEntrySetSortedByCount(false, null);
1486         for (R2<Long, PageId> sizeAndPageId : entrySetSortedByCount) {
1487             long size = sizeAndPageId.get0();
1488             PageId pageId = sizeAndPageId.get1();
1489             if (!assertTrue(
1490                     pageId.getSectionId()
1491                             + "/"
1492                             + pageId
1493                             + " size ("
1494                             + size
1495                             + ") < "
1496                             + maxSize
1497                             + "?",
1498                     size < maxSize)) {
1499                 showTable = true;
1500             }
1501             // System.out.println(pageId + "\t" + size);
1502         }
1503         if (showTable || isVerbose()) {
1504             for (R2<Long, PageId> sizeAndPageId : entrySetSortedByCount) {
1505                 PageId pageId = sizeAndPageId.get1();
1506                 System.out.println(
1507                         pageId.getSectionId()
1508                                 + "\t"
1509                                 + pageId
1510                                 + "\t"
1511                                 + sizeAndPageId.get0()
1512                                 + "\t"
1513                                 + counterPageIdAll.get(pageId));
1514             }
1515         }
1516     }
1517 
TestCLDR_11454()1518     public void TestCLDR_11454() {
1519         PathHeader.Factory phf = PathHeader.getFactory();
1520         PathHeader century =
1521                 phf.fromPath(
1522                         "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"duration-century\"]/displayName");
1523         PathHeader decade =
1524                 phf.fromPath(
1525                         "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"duration-decade\"]/displayName");
1526         assertEquals("Section", century.getSectionId(), decade.getSectionId());
1527         assertEquals("Page", century.getPageId(), decade.getPageId());
1528     }
1529 
TestEmojiOrder()1530     public void TestEmojiOrder() {
1531         PathHeader.Factory phf = PathHeader.getFactory();
1532         String[] desiredOrder = {
1533             "��‍⚕", "��‍⚕", "��‍⚕",
1534             "��‍⚖", "��‍⚖", "��‍⚖"
1535         };
1536         List<PathHeader> pathHeaders = new ArrayList<>();
1537         for (String emoji : desiredOrder) {
1538             String base = "//ldml/annotations/annotation[@cp=\"" + emoji + "\"]";
1539             pathHeaders.add(phf.fromPath(base + "[@type=\"tts\"]"));
1540             pathHeaders.add(phf.fromPath(base));
1541             logln(
1542                     emoji
1543                             + ": getEmojiMinorOrder="
1544                             + Emoji.getEmojiMinorOrder(Emoji.getMinorCategory(emoji))
1545                             + ", getEmojiToOrder="
1546                             + Emoji.getEmojiToOrder(emoji));
1547         }
1548         PathHeader lastItem = null;
1549         for (PathHeader item : pathHeaders) {
1550             if (lastItem != null) {
1551                 assertEquals("Section", lastItem.getSectionId(), item.getSectionId());
1552                 assertEquals("Page", lastItem.getPageId(), item.getPageId());
1553                 assertEquals("Header", lastItem.getHeader(), item.getHeader());
1554                 if (!assertTrue(lastItem + " < " + item, lastItem.compareTo(item) < 0)) {
1555                     lastItem.compareTo(item); // for debugging
1556                 }
1557             }
1558             lastItem = item;
1559         }
1560     }
1561 
TestQuotes()1562     public void TestQuotes() {
1563         // quotes should never appear in result
1564         PathHeader.Factory phf = PathHeader.getFactory();
1565         String[] tests = {
1566             "//supplementalData/plurals[@type=\"ordinal\"]/pluralRules[@locales=\"ig\"]/pluralRule[@count=\"other\"]",
1567             "//supplementalData/transforms/transform[@source=\"und-Khmr\"][@target=\"und-Latn\"]"
1568         };
1569         for (String test : tests) {
1570             PathHeader trial = phf.fromPath(test);
1571             assertEquals("No quotes in pathheader", false, trial.toString().contains("\""));
1572         }
1573     }
1574     /**
1575      * Make sure that the PathHeader sort order is consistent with the grammatical feature orders
1576      * "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/displayName"
1577      * //ldml/units/unitLength[@type=\long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"][@case=\"genitive\"]",
1578      * //ldml/units/unitLength[@type=\long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"accusative\"]",
1579      */
TestUnitOrder()1580     public void TestUnitOrder() {
1581         PathHeader.Factory phf = PathHeader.getFactory();
1582         List<PathHeader> expectedOrder = new ArrayList<>();
1583         List<Width> widths = Arrays.asList(Width.LONG, Width.SHORT, Width.NARROW);
1584         List<CaseValues> cases = Arrays.asList(GrammarInfo.CaseValues.values()).subList(0, 3);
1585         List<GenderValues> genders = Arrays.asList(GrammarInfo.GenderValues.values()).subList(0, 3);
1586 
1587         for (Width width : widths) {
1588             String path =
1589                     "//ldml/units/unitLength[@type=\""
1590                             + width
1591                             + "\"]/unit[@type=\"length-meter\"]/displayName";
1592             expectedOrder.add(phf.fromPath(path));
1593         }
1594 
1595         for (Width width : widths) {
1596             for (Count count : Count.values()) {
1597                 for (GrammarInfo.CaseValues gCase : cases) {
1598                     if (width != Width.LONG && gCase != CaseValues.nominative) {
1599                         break;
1600                     }
1601                     String path =
1602                             "//ldml/units/unitLength[@type=\""
1603                                     + width
1604                                     + "\"]/unit[@type=\"length-meter\"]/unitPattern[@count=\""
1605                                     + count
1606                                     + (gCase == CaseValues.nominative ? "" : "\"][@case=\"" + gCase)
1607                                     + "\"]";
1608                     expectedOrder.add(phf.fromPath(path));
1609                 }
1610             }
1611         }
1612         for (Width width : widths) {
1613             for (Count count : Count.values()) {
1614                 for (GrammarInfo.CaseValues gCase : cases) {
1615                     if (width != Width.LONG && gCase != CaseValues.nominative) {
1616                         break;
1617                     }
1618                     for (GrammarInfo.GenderValues gGender : genders) {
1619                         if (width != Width.LONG && gGender != GenderValues.neuter) {
1620                             break;
1621                         }
1622                         String path =
1623                                 "//ldml/units/unitLength[@type=\""
1624                                         + width
1625                                         + "\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\""
1626                                         + count
1627                                         + (gGender == GenderValues.neuter
1628                                                 ? ""
1629                                                 : "\"][@gender=\"" + gGender)
1630                                         + (gCase == CaseValues.nominative
1631                                                 ? ""
1632                                                 : "\"][@case=\"" + gCase)
1633                                         + "\"]";
1634                         expectedOrder.add(phf.fromPath(path));
1635                     }
1636                 }
1637             }
1638         }
1639         for (Count count : Count.values()) {
1640             String path =
1641                     "//ldml/numbers/minimalPairs/ordinalMinimalPairs[@ordinal=\"" + count + "\"]";
1642             expectedOrder.add(phf.fromPath(path));
1643         }
1644         for (Count count : Count.values()) {
1645             String path =
1646                     "//ldml/numbers/minimalPairs/pluralMinimalPairs[@count=\"" + count + "\"]";
1647             expectedOrder.add(phf.fromPath(path));
1648         }
1649         for (GrammarInfo.CaseValues gCase : cases) {
1650             String path = "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"" + gCase + "\"]";
1651             expectedOrder.add(phf.fromPath(path));
1652         }
1653         for (GrammarInfo.GenderValues gGender : genders) {
1654             String path =
1655                     "//ldml/numbers/minimalPairs/genderMinimalPairs[@gender=\"" + gGender + "\"]";
1656             expectedOrder.add(phf.fromPath(path));
1657         }
1658 
1659         PathHeader last = null;
1660         int item = 0;
1661         int errorCount = 0;
1662         for (PathHeader pathHeader : expectedOrder) {
1663             if (last != null) {
1664                 if (!assertTrue(
1665                         ++item + ")\t" + last + "\t<\t" + pathHeader,
1666                         last.compareTo(pathHeader) < 0)) {
1667                     errorCount++;
1668                     last.compareTo(pathHeader);
1669                 }
1670             }
1671             last = pathHeader;
1672         }
1673         if (errorCount != 0 || isVerbose()) {
1674             for (PathHeader pathHeader : expectedOrder) {
1675                 System.out.println(
1676                         "\""
1677                                 + pathHeader.getOriginalPath().replace("\"", "\\\"")
1678                                 + "\",\t// "
1679                                 + pathHeader);
1680             }
1681         }
1682     }
1683 
testPageSize()1684     public void testPageSize() {
1685         final long minError = 946; // above this, emit error
1686         final long minLog = 700; // otherwise above this, emit warning
1687         Factory factory = CLDRConfig.getInstance().getCommonAndSeedAndMainAndAnnotationsFactory();
1688         List<String> locales =
1689                 StandardCodes.make()
1690                         .getLocaleCoverageLocales(Organization.cldr, ImmutableSet.of(Level.MODERN))
1691                         .stream()
1692                         .filter(x -> CLDRLocale.getInstance(x).getCountry().isEmpty())
1693                         .collect(Collectors.toUnmodifiableList());
1694         List<Counter<PageId>> counters = new ArrayList<>();
1695         final String thresholdExplanation = "log/error thresholds are " + minLog + "/" + minError;
1696         for (String locale : locales) {
1697             CLDRFile cldrFile = factory.make(locale, false);
1698             PathHeader.Factory phf = PathHeader.getFactory();
1699             Counter<PageId> c = new Counter<>();
1700             counters.add(c);
1701             for (String path : cldrFile) {
1702                 PathHeader ph = phf.fromPath(path);
1703                 c.add(ph.getPageId(), 1);
1704             }
1705             for (PageId entry : c.getKeysetSortedByKey()) {
1706                 long count = c.getCount(entry);
1707                 if (count > minLog) {
1708                     final String message =
1709                             String.format(
1710                                     "%s\t%s\t%s\thas too many entries:\t%d\t(%s)",
1711                                     locale,
1712                                     entry.getSectionId().toString(),
1713                                     entry,
1714                                     count,
1715                                     thresholdExplanation);
1716                     if (count > minError) {
1717                         errln(message);
1718                     } else {
1719                         warnln(message);
1720                     }
1721                 }
1722             }
1723         }
1724         if (isVerbose()) {
1725             System.out.println();
1726             Set<PageId> sorted = new TreeSet<>();
1727             for (Counter<PageId> counter : counters) {
1728                 sorted.addAll(counter.keySet());
1729             }
1730             int i = 0;
1731             System.out.print("Order" + "\t" + "Section" + "\t" + "Page");
1732             for (String c : locales) {
1733                 System.out.print("\t" + c);
1734             }
1735             System.out.println();
1736 
1737             for (PageId entry : sorted) {
1738                 System.out.print(++i + "\t" + entry.getSectionId() + "\t" + entry);
1739                 for (Counter<PageId> c : counters) {
1740                     System.out.print("\t" + c.get(entry));
1741                 }
1742                 System.out.println();
1743             }
1744         }
1745     }
1746 }
1747