• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.unittest;
2 
3 import java.io.File;
4 import java.util.ArrayList;
5 import java.util.Arrays;
6 import java.util.Collection;
7 import java.util.EnumMap;
8 import java.util.EnumSet;
9 import java.util.HashMap;
10 import java.util.HashSet;
11 import java.util.LinkedHashMap;
12 import java.util.LinkedHashSet;
13 import java.util.List;
14 import java.util.Map;
15 import java.util.Map.Entry;
16 import java.util.Set;
17 import java.util.TreeMap;
18 import java.util.TreeSet;
19 import java.util.regex.Matcher;
20 
21 import org.unicode.cldr.test.CoverageLevel2;
22 import org.unicode.cldr.test.ExampleGenerator;
23 import org.unicode.cldr.util.CLDRConfig;
24 import org.unicode.cldr.util.CLDRFile;
25 import org.unicode.cldr.util.CLDRFile.Status;
26 import org.unicode.cldr.util.CLDRPaths;
27 import org.unicode.cldr.util.CldrUtility;
28 import org.unicode.cldr.util.Containment;
29 import org.unicode.cldr.util.Counter;
30 import org.unicode.cldr.util.DtdData;
31 import org.unicode.cldr.util.DtdType;
32 import org.unicode.cldr.util.Emoji;
33 import org.unicode.cldr.util.Factory;
34 import org.unicode.cldr.util.LanguageTagParser;
35 import org.unicode.cldr.util.Level;
36 import org.unicode.cldr.util.Organization;
37 import org.unicode.cldr.util.Pair;
38 import org.unicode.cldr.util.PathDescription;
39 import org.unicode.cldr.util.PathHeader;
40 import org.unicode.cldr.util.PathHeader.PageId;
41 import org.unicode.cldr.util.PathHeader.SectionId;
42 import org.unicode.cldr.util.PathHeader.SurveyToolStatus;
43 import org.unicode.cldr.util.PathStarrer;
44 import org.unicode.cldr.util.PatternCache;
45 import org.unicode.cldr.util.PatternPlaceholders;
46 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderInfo;
47 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderStatus;
48 import org.unicode.cldr.util.StandardCodes;
49 import org.unicode.cldr.util.SupplementalDataInfo;
50 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
51 import org.unicode.cldr.util.SupplementalDataInfo.PluralType;
52 import org.unicode.cldr.util.With;
53 import org.unicode.cldr.util.XMLFileReader;
54 import org.unicode.cldr.util.XPathParts;
55 
56 import com.google.common.base.Joiner;
57 import com.google.common.collect.HashMultimap;
58 import com.google.common.collect.ImmutableSet;
59 import com.google.common.collect.LinkedListMultimap;
60 import com.google.common.collect.Multimap;
61 import com.google.common.collect.TreeMultimap;
62 import com.ibm.icu.impl.Relation;
63 import com.ibm.icu.impl.Row;
64 import com.ibm.icu.impl.Row.R2;
65 
66 public class TestPathHeader extends TestFmwkPlus {
67     private static final DtdType DEBUG_DTD_TYPE = null; // DtdType.supplementalData;
68     private static final String COMMON_DIR = CLDRPaths.BASE_DIRECTORY + "common/";
69     private static final boolean DEBUG = false;
70 
main(String[] args)71     public static void main(String[] args) {
72         new TestPathHeader().run(args);
73     }
74 
75     static final CLDRConfig info = CLDRConfig.getInstance();
76     static final Factory factory = info.getCommonAndSeedAndMainAndAnnotationsFactory();
77     static final CLDRFile english = factory.make("en", true);
78     static final SupplementalDataInfo supplemental = info
79         .getSupplementalDataInfo();
80     static PathHeader.Factory pathHeaderFactory = PathHeader
81         .getFactory(english);
82     private EnumSet<PageId> badZonePages = EnumSet.of(PageId.UnknownT);
83 
tempTestAnnotation()84     public void tempTestAnnotation() {
85         // NEW:     <annotation cp="��">face | grin</annotation>
86         //          <annotation cp="��" type="tts">grinning face</annotation>
87 
88         final String path1 = "//ldml/annotations/annotation[@cp=\"��\"]";
89         PathHeader ph1 = pathHeaderFactory.fromPath(path1);
90         logln(ph1.toString() + "\t" + path1);
91         final String path2 = "//ldml/annotations/annotation[@cp=\"��\"][@type=\"tts\"]";
92         PathHeader ph2 = pathHeaderFactory.fromPath(path2);
93         logln(ph2.toString() + "\t" + path2);
94         final String path3 = "//ldml/annotations/annotation[@cp=\"��\"]";
95         PathHeader ph3 = pathHeaderFactory.fromPath(path2);
96         logln(ph3.toString() + "\t" + path3);
97 
98         assertNotEquals("pathheader", ph1, ph2);
99         assertNotEquals("pathheader", ph1.toString(), ph2.toString());
100         assertRelation("pathheader", true, ph1, TestFmwkPlus.LEQ, ph3);
101         assertRelation("pathheader", true, ph3, TestFmwkPlus.LEQ, ph2);
102     }
103 
104     static final String[] MIN_LOCALES = {"root", "en", "de", "ru", "ko"}; // choose locales with range of case/gender structures
105 
tempTestCompletenessLdmlDtd()106     public void tempTestCompletenessLdmlDtd() {
107         // List<String> failures = null;
108         pathHeaderFactory.clearCache();
109         PathChecker pathChecker = new PathChecker();
110         for (String directory : DtdType.ldml.directories) {
111             Factory factory2 = CLDRConfig.getInstance().getMainAndAnnotationsFactory();
112             Set<String> source = factory2.getAvailable();
113             for (String file : getFilesToTest(source, MIN_LOCALES)) {
114                 if (DEBUG) warnln(" TestCompletenessLdmlDtd: " + directory + ", " + file);
115                 DtdData dtdData = null;
116                 CLDRFile cldrFile = factory2.make(file, true);
117                 for (String path : cldrFile.fullIterable()) {
118                     pathChecker.checkPathHeader(cldrFile.getDtdData(), path);
119                 }
120             }
121         }
122         Set<String> missing = pathHeaderFactory.getUnmatchedRegexes();
123         if (missing.size() != 0) {
124             for (String e : missing) {
125                 errln("Path Regex never matched:\t" + e);
126             }
127         }
128     }
129 
getFilesToTest(Collection<String> source, String... doFirst)130     private Collection<String> getFilesToTest(Collection<String> source, String... doFirst) {
131         LinkedHashSet<String> files = new LinkedHashSet<>(Arrays.asList(doFirst));
132         files.retainAll(source); // put first
133         files.addAll(new HashSet<>(source)); // now add others semi-randomly
134         int max = Math.min(30, files.size());
135         if (getInclusion() == 10 || files.size() <= max) {
136             return files;
137         }
138         ArrayList<String> shortFiles = new ArrayList<>(files);
139         if (getInclusion() > 5) {
140             max += (files.size() - 30) * (getInclusion() - 5) / 10; // use proportional amount
141         }
142         return shortFiles.subList(0, max);
143     }
144 
TestCompleteness()145     public void TestCompleteness() {
146         PathHeader.Factory pathHeaderFactory2 = PathHeader.getFactory(english);
147         // List<String> failures = null;
148         pathHeaderFactory2.clearCache();
149         Multimap<PathHeader.PageId, PathHeader.SectionId> pageUniqueness = TreeMultimap.create();
150         Multimap<String, Pair<PathHeader.SectionId, PathHeader.PageId>> headerUniqueness = TreeMultimap.create();
151         Set<String> toTest;
152         switch (getInclusion()) {
153         default:
154             toTest = StandardCodes.make().getLocaleCoverageLocales(Organization.cldr);
155             break;
156         case 10:
157             toTest = factory.getAvailable();
158             break;
159         }
160         toTest = ImmutableSet.<String> builder().add("en").addAll(toTest).build();
161         Set<String> seenPaths = new HashSet<>();
162         Set<String> localSeenPaths = new TreeSet<>();
163         for (String locale : toTest) {
164             localSeenPaths.clear();
165             for (String p : factory.make(locale, true).fullIterable()) {
166                 if (p.startsWith("//ldml/identity/")) {
167                     continue;
168                 }
169                 if (seenPaths.contains(p)) {
170                     continue;
171                 }
172                 seenPaths.add(p);
173                 localSeenPaths.add(p);
174                 // if (p.contains("symbol[@alt") && failures == null) {
175                 // PathHeader result = pathHeaderFactory2.fromPath(p, failures = new
176                 // ArrayList<String>());
177                 // logln("Matching " + p + ": " + result + "\t" +
178                 // result.getSurveyToolStatus());
179                 // for (String failure : failures) {
180                 // logln("\t" + failure);
181                 // }
182                 // }
183                 PathHeader ph;
184                 try {
185                     ph = pathHeaderFactory2.fromPath(p);
186                 } catch (Exception e1) {
187                     try {
188                         ph = pathHeaderFactory2.fromPath(p);
189                     } catch (Exception e2) {
190                         throw new IllegalArgumentException(locale + ":\t" + p, e2);
191                     }
192                 }
193                 if (ph == null) {
194                     errln("Failed to create path from: " + p);
195                     continue;
196                 }
197                 final SectionId sectionId = ph.getSectionId();
198                 if (sectionId != SectionId.Special) {
199                     pageUniqueness.put(ph.getPageId(), sectionId);
200                     headerUniqueness.put(ph.getHeader(), new Pair<>(sectionId, ph.getPageId()));
201                 }
202             }
203             if (!localSeenPaths.isEmpty()) {
204                 logln(locale + ": checked " + localSeenPaths.size() + " new paths");
205             }
206         }
207         Set<String> missing = pathHeaderFactory2.getUnmatchedRegexes();
208         if (missing.size() != 0) {
209             for (String e : missing) {
210                 if (e.contains("//ldml/")) {
211                     if (e.contains("//ldml/rbnf/") || e.contains("//ldml/segmentations/") || e.contains("//ldml/collations/")) {
212                         continue;
213                     }
214                     logln("Path Regex never matched:\t" + e);
215                 }
216             }
217         }
218 
219         for (Entry<PageId, Collection<SectionId>> e : pageUniqueness.asMap().entrySet()) {
220             Collection<SectionId> values = e.getValue();
221             if (values.size() != 1) {
222                 warnln("Duplicate page in section: " + CldrUtility.toString(e));
223             }
224         }
225 
226         for (Entry<String, Collection<Pair<SectionId, PageId>>> e : headerUniqueness.asMap().entrySet()) {
227             Collection<Pair<SectionId, PageId>> values = e.getValue();
228             if (values.size() != 1) {
229                 warnln("Duplicate header in (section,page): " + CldrUtility.toString(e));
230             }
231         }
232     }
233 
Test6170()234     public void Test6170() {
235         String p1 = "//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"speed-kilometer-per-hour\"]/unitPattern[@count=\"other\"]";
236         String p2 = "//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"area-square-meter\"]/unitPattern[@count=\"other\"]";
237         PathHeader ph1 = pathHeaderFactory.fromPath(p1);
238         PathHeader ph2 = pathHeaderFactory.fromPath(p2);
239         int comp12 = ph1.compareTo(ph2);
240         int comp21 = ph2.compareTo(ph1);
241         assertEquals("comp ph", comp12, -comp21);
242     }
243 
TestVariant()244     public void TestVariant() {
245         PathHeader p1 = pathHeaderFactory
246             .fromPath("//ldml/localeDisplayNames/languages/language[@type=\"ug\"][@alt=\"variant\"]");
247         PathHeader p2 = pathHeaderFactory
248             .fromPath("//ldml/localeDisplayNames/languages/language[@type=\"ug\"]");
249         assertNotEquals("variants", p1, p2);
250         assertNotEquals("variants", p1.toString(), p2.toString());
251         // Code Lists Languages Arabic Script ug-variant
252     }
253 
Test4587()254     public void Test4587() {
255         String test = "//ldml/dates/timeZoneNames/metazone[@type=\"Pacific/Wallis\"]/short/standard";
256         PathHeader ph = pathHeaderFactory.fromPath(test);
257         if (ph == null) {
258             errln("Failure with " + test);
259         } else {
260             logln(ph + "\t" + test);
261         }
262     }
263 
TestMiscPatterns()264     public void TestMiscPatterns() {
265         String test = "//ldml/numbers/miscPatterns[@numberSystem=\"arab\"]/pattern[@type=\"atLeast\"]";
266         PathHeader ph = pathHeaderFactory.fromPath(test);
267         assertNotNull("MiscPatterns path not found", ph);
268         if (false)
269             System.out.println(english.getStringValue(test));
270     }
271 
TestPluralOrder()272     public void TestPluralOrder() {
273         Set<PathHeader> sorted = new TreeSet<>();
274         for (String locale : new String[] { "ru", "ar", "ja" }) {
275             sorted.clear();
276             CLDRFile cldrFile = info.getCLDRFile(locale, true);
277             CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(locale);
278             for (String path : cldrFile.fullIterable()) {
279                 if (!path.contains("@count")) {
280                     continue;
281                 }
282                 Level level = coverageLevel.getLevel(path);
283                 if (Level.MODERN.compareTo(level) < 0) {
284                     continue;
285                 }
286                 PathHeader p = pathHeaderFactory.fromPath(path);
287                 sorted.add(p);
288             }
289             for (PathHeader p : sorted) {
290                 logln(locale + "\t" + p + "\t" + p.getOriginalPath());
291             }
292         }
293     }
294 
295     static final String APPEND_TIMEZONE = "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/appendItems/appendItem[@request=\"Timezone\"]";
296     static final String APPEND_TIMEZONE_END = "/dateTimeFormats/appendItems/appendItem[@request=\"Timezone\"]";
297     static final String BEFORE_PH = "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"ms\"]";
298     static final String AFTER_PH = "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"d\"]/greatestDifference[@id=\"d\"]";
299 
TestAppendTimezone()300     public void TestAppendTimezone() {
301         CLDRFile cldrFile = info.getEnglish();
302         CoverageLevel2 coverageLevel = CoverageLevel2.getInstance("en");
303         assertEquals("appendItem:Timezone", Level.MODERATE,
304             coverageLevel.getLevel(APPEND_TIMEZONE));
305 
306         PathHeader ph = pathHeaderFactory.fromPath(APPEND_TIMEZONE);
307         assertEquals("appendItem:Timezone pathheader", "Timezone", ph.getCode());
308         // check that they are in the right place (they weren't before!)
309         PathHeader phBefore = pathHeaderFactory.fromPath(BEFORE_PH);
310         PathHeader phAfter = pathHeaderFactory.fromPath(AFTER_PH);
311         assertTrue(phBefore, LEQ, ph);
312         assertTrue(ph, LEQ, phAfter);
313 
314         PathDescription pathDescription = new PathDescription(supplemental,
315             english, null, null, PathDescription.ErrorHandling.CONTINUE);
316         String description = pathDescription.getDescription(APPEND_TIMEZONE,
317             "tempvalue", null, null);
318         assertTrue("appendItem:Timezone pathDescription",
319             description.contains("“Timezone”"));
320 
321         PatternPlaceholders patternPlaceholders = PatternPlaceholders
322             .getInstance();
323         PlaceholderStatus status = patternPlaceholders
324             .getStatus(APPEND_TIMEZONE);
325         assertEquals("appendItem:Timezone placeholders",
326             PlaceholderStatus.REQUIRED, status);
327 
328         Map<String, PlaceholderInfo> placeholderInfo = patternPlaceholders
329             .get(APPEND_TIMEZONE);
330         PlaceholderInfo placeholderInfo2 = placeholderInfo.get("{1}");
331         if (assertNotNull("appendItem:Timezone placeholders", placeholderInfo2)) {
332             assertEquals("appendItem:Timezone placeholders",
333                 "APPEND_FIELD_FORMAT", placeholderInfo2.name);
334             assertEquals("appendItem:Timezone placeholders", "Pacific Time",
335                 placeholderInfo2.example);
336         }
337         ExampleGenerator eg = new ExampleGenerator(cldrFile, cldrFile, CLDRPaths.SUPPLEMENTAL_DIRECTORY);
338         String example = eg.getExampleHtml(APPEND_TIMEZONE, cldrFile.getStringValue(APPEND_TIMEZONE));
339         String result = ExampleGenerator.simplify(example, false);
340         assertEquals("", "〖❬6:25:59 PM❭ ❬GMT❭〗", result);
341     }
342 
TestOptional()343     public void TestOptional() {
344         if (true) return;
345         Map<PathHeader, String> sorted = new TreeMap<>();
346         for (String locale : new String[] { "af" }) {
347             sorted.clear();
348             CLDRFile cldrFile = info.getCLDRFile(locale, true);
349             CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(locale);
350             for (String path : cldrFile.fullIterable()) {
351                 // if (!path.contains("@count")) {
352                 // continue;
353                 // }
354                 Level level = coverageLevel.getLevel(path);
355                 if (supplemental.isDeprecated(DtdType.ldml, path)) {
356                     continue;
357                 }
358 
359                 if (Level.OPTIONAL.compareTo(level) != 0) {
360                     continue;
361                 }
362 
363                 PathHeader p = pathHeaderFactory.fromPath(path);
364                 final SurveyToolStatus status = p.getSurveyToolStatus();
365                 if (status == SurveyToolStatus.DEPRECATED) {
366                     continue;
367                 }
368                 sorted.put(
369                     p,
370                     locale + "\t" + status + "\t" + p + "\t"
371                         + p.getOriginalPath());
372             }
373             Set<String> codes = new LinkedHashSet<>();
374             PathHeader old = null;
375             String line = null;
376             for (Entry<PathHeader, String> s : sorted.entrySet()) {
377                 PathHeader p = s.getKey();
378                 String v = s.getValue();
379                 if (old == null) {
380                     line = v;
381                     codes.add(p.getCode());
382                 } else if (p.getSectionId() == old.getSectionId()
383                     && p.getPageId() == old.getPageId()
384                     && p.getHeader().equals(old.getHeader())) {
385                     codes.add(p.getCode());
386                 } else {
387                     logln(line + "\t" + codes.toString());
388                     codes.clear();
389                     line = v;
390                     codes.add(p.getCode());
391                 }
392                 old = p;
393             }
394             logln(line + "\t" + codes.toString());
395         }
396     }
397 
TestPluralCanonicals()398     public void TestPluralCanonicals() {
399         Relation<String, String> data = Relation.of(
400             new LinkedHashMap<String, Set<String>>(), TreeSet.class);
401         for (String locale : factory.getAvailable()) {
402             if (locale.contains("_")) {
403                 continue;
404             }
405             PluralInfo info = supplemental.getPlurals(PluralType.cardinal,
406                 locale);
407             Set<String> keywords = info.getCanonicalKeywords();
408             data.put(keywords.toString(), locale);
409         }
410         for (Entry<String, Set<String>> entry : data.keyValuesSet()) {
411             logln(entry.getKey() + "\t" + entry.getValue());
412         }
413     }
414 
TestPluralPaths()415     public void TestPluralPaths() {
416         // do the following line once, when the file is opened
417         Set<String> filePaths = pathHeaderFactory.pathsForFile(english);
418 
419         // check that English doesn't contain few or many
420         verifyContains(PageId.Duration, filePaths, "few", false);
421         verifyContains(PageId.C_NAmerica, filePaths, "many", false);
422         verifyContains(PageId.C_SAmerica, filePaths, "many", false);
423         verifyContains(PageId.C_NWEurope, filePaths, "many", false);
424         verifyContains(PageId.C_SEEurope, filePaths, "many", false);
425         verifyContains(PageId.C_NAfrica, filePaths, "many", false);
426         verifyContains(PageId.C_WAfrica, filePaths, "many", false);
427         verifyContains(PageId.C_SAfrica, filePaths, "many", false);
428         verifyContains(PageId.C_EAfrica, filePaths, "many", false);
429         verifyContains(PageId.C_CAsia, filePaths, "many", false);
430         verifyContains(PageId.C_WAsia, filePaths, "many", false);
431         verifyContains(PageId.C_SEAsia, filePaths, "many", false);
432         verifyContains(PageId.C_Oceania, filePaths, "many", false);
433         verifyContains(PageId.C_Unknown, filePaths, "many", false);
434 
435         // check that Arabic does contain few and many
436         filePaths = pathHeaderFactory.pathsForFile(info.getCLDRFile("ar", true));
437 
438         verifyContains(PageId.Duration, filePaths, "few", true);
439         verifyContains(PageId.C_NAmerica, filePaths, "many", true);
440         verifyContains(PageId.C_SAmerica, filePaths, "many", true);
441         verifyContains(PageId.C_NWEurope, filePaths, "many", true);
442         verifyContains(PageId.C_SEEurope, filePaths, "many", true);
443         verifyContains(PageId.C_NAfrica, filePaths, "many", true);
444         verifyContains(PageId.C_WAfrica, filePaths, "many", true);
445         verifyContains(PageId.C_SAfrica, filePaths, "many", true);
446         verifyContains(PageId.C_EAfrica, filePaths, "many", true);
447         verifyContains(PageId.C_CAsia, filePaths, "many", true);
448         verifyContains(PageId.C_WAsia, filePaths, "many", true);
449         verifyContains(PageId.C_SEAsia, filePaths, "many", true);
450         verifyContains(PageId.C_Oceania, filePaths, "many", true);
451         verifyContains(PageId.C_Unknown, filePaths, "many", true);
452     }
453 
TestCoverage()454     public void TestCoverage() {
455         Map<Row.R2<SectionId, PageId>, Counter<Level>> data = new TreeMap<>();
456         CLDRFile cldrFile = english;
457         for (String path : cldrFile.fullIterable()) {
458             if (supplemental.isDeprecated(DtdType.ldml, path)) {
459                 errln("Deprecated path in English: " + path);
460                 continue;
461             }
462             Level level = supplemental.getCoverageLevel(path,
463                 cldrFile.getLocaleID());
464             PathHeader p = pathHeaderFactory.fromPath(path);
465             SurveyToolStatus status = p.getSurveyToolStatus();
466 
467             boolean hideCoverage = level == Level.OPTIONAL;
468             boolean hidePathHeader = status == SurveyToolStatus.DEPRECATED
469                 || status == SurveyToolStatus.HIDE;
470             if (hidePathHeader != hideCoverage) {
471                 String message = "PathHeader: " + status + ", Coverage: "
472                     + level + ": " + path;
473                 if (hidePathHeader && !hideCoverage) {
474                     errln(message);
475                 } else if (!hidePathHeader && hideCoverage) {
476                     logln(message);
477                 }
478             }
479             final R2<SectionId, PageId> key = Row.of(p.getSectionId(),
480                 p.getPageId());
481             Counter<Level> counter = data.get(key);
482             if (counter == null) {
483                 data.put(key, counter = new Counter<>());
484             }
485             counter.add(level, 1);
486         }
487         StringBuffer b = new StringBuffer("\t");
488         for (Level level : Level.values()) {
489             b.append("\t" + level);
490         }
491         logln(b.toString());
492         for (Entry<R2<SectionId, PageId>, Counter<Level>> entry : data
493             .entrySet()) {
494             b.setLength(0);
495             b.append(entry.getKey().get0() + "\t" + entry.getKey().get1());
496             Counter<Level> counter = entry.getValue();
497             long total = 0;
498             for (Level level : Level.values()) {
499                 total += counter.getCount(level);
500                 b.append("\t" + total);
501             }
502             logln(b.toString());
503         }
504     }
505 
Test00AFile()506     public void Test00AFile() {
507         final String localeId = "en";
508         Counter<Level> counter = new Counter<>();
509         Map<String, PathHeader> uniqueness = new HashMap<>();
510         Set<String> alreadySeen = new HashSet<>();
511         check(localeId, true, uniqueness, alreadySeen);
512         // check paths
513         for (Entry<SectionId, Set<PageId>> sectionAndPages : PathHeader.Factory
514             .getSectionIdsToPageIds().keyValuesSet()) {
515             final SectionId section = sectionAndPages.getKey();
516             if (section == SectionId.Supplemental || section == SectionId.BCP47) {
517                 continue;
518             }
519             logln(section.toString());
520             for (PageId page : sectionAndPages.getValue()) {
521                 final Set<String> cachedPaths = PathHeader.Factory
522                     .getCachedPaths(section, page);
523                 if (cachedPaths == null) {
524                     if (!badZonePages.contains(page) && page != PageId.Unknown) {
525                         errln("Null pages for: " + section + "\t" + page);
526                     }
527                 } else if (section == SectionId.Special
528                     && page == PageId.Unknown) {
529                     // skip
530                 } else if (section == SectionId.Timezones
531                     && page == PageId.UnknownT) {
532                     // skip
533                 } else if (section == SectionId.Misc
534                     && page == PageId.Transforms) {
535                     // skip
536                 } else {
537 
538                     int count2 = cachedPaths.size();
539                     if (count2 == 0) {
540                         warnln("Missing pages for: " + section + "\t" + page);
541                     } else {
542                         counter.clear();
543                         for (String s : cachedPaths) {
544                             Level coverage = supplemental.getCoverageLevel(s,
545                                 localeId);
546                             counter.add(coverage, 1);
547                         }
548                         String countString = "";
549                         int total = 0;
550                         for (Level item : Level.values()) {
551                             long count = counter.get(item);
552                             if (count != 0) {
553                                 if (!countString.isEmpty()) {
554                                     countString += ",\t+";
555                                 }
556                                 total += count;
557                                 countString += item + "=" + total;
558                             }
559                         }
560                         logln("\t" + page + "\t" + countString);
561                         if (page.toString().startsWith("Unknown")) {
562                             logln("\t\t" + cachedPaths);
563                         }
564                     }
565                 }
566             }
567         }
568     }
569 
TestMetazones()570     public void TestMetazones() {
571 
572         CLDRFile nativeFile = info.getEnglish();
573         Set<PathHeader> pathHeaders = getPathHeaders(nativeFile);
574         // String oldPage = "";
575         String oldHeader = "";
576         for (PathHeader entry : pathHeaders) {
577             final String page = entry.getPage();
578             // if (!oldPage.equals(page)) {
579             // logln(page);
580             // oldPage = page;
581             // }
582             String header = entry.getHeader();
583             if (!oldHeader.equals(header)) {
584                 logln(page + "\t" + header);
585                 oldHeader = header;
586             }
587         }
588     }
589 
getPathHeaders(CLDRFile nativeFile)590     public Set<PathHeader> getPathHeaders(CLDRFile nativeFile) {
591         Set<PathHeader> pathHeaders = new TreeSet<>();
592         for (String path : nativeFile.fullIterable()) {
593             PathHeader p = pathHeaderFactory.fromPath(path);
594             pathHeaders.add(p);
595         }
596         return pathHeaders;
597     }
598 
verifyContains(PageId pageId, Set<String> filePaths, String substring, boolean contains)599     public void verifyContains(PageId pageId, Set<String> filePaths,
600         String substring, boolean contains) {
601         String path;
602         path = findOneContaining(allPaths(pageId, filePaths), substring);
603         if (contains) {
604             if (path == null) {
605                 errln("No path contains <" + substring + ">");
606             }
607         } else {
608             if (path != null) {
609                 errln("Path contains <" + substring + ">\t" + path);
610             }
611         }
612     }
613 
findOneContaining(Collection<String> allPaths, String substring)614     private String findOneContaining(Collection<String> allPaths,
615         String substring) {
616         for (String path : allPaths) {
617             if (path.contains(substring)) {
618                 return path;
619             }
620         }
621         return null;
622     }
623 
allPaths(PageId pageId, Set<String> filePaths)624     public Set<String> allPaths(PageId pageId, Set<String> filePaths) {
625         Set<String> result = PathHeader.Factory.getCachedPaths(
626             pageId.getSectionId(), pageId);
627         result.retainAll(filePaths);
628         return result;
629     }
630 
TestUniqueness()631     public void TestUniqueness() {
632         Factory factory2 = CLDRConfig.getInstance().getMainAndAnnotationsFactory();
633         Set<String> source = factory2.getAvailable();
634         for (String file : getFilesToTest(source, MIN_LOCALES)) {
635             CLDRFile nativeFile = factory2.make(file,true);
636             Map<PathHeader, String> headerToPath = new HashMap<>();
637             Map<String, String> headerVisibleToPath = new HashMap<>();
638             for (String path : nativeFile.fullIterable()) {
639                 PathHeader p = pathHeaderFactory.fromPath(path);
640                 if (p.getSectionId() == SectionId.Special) {
641                     continue;
642                 }
643                 String old = headerToPath.get(p);
644                 if (old == null) {
645                     headerToPath.put(p, path);
646                 } else if (!old.equals(path)) {
647                     if (true) { // for debugging
648                         pathHeaderFactory.clearCache();
649                         List<String> failuresOld = new ArrayList<>();
650                         pathHeaderFactory.fromPath(old, failuresOld);
651                         List<String> failuresPath = new ArrayList<>();
652                         pathHeaderFactory.fromPath(path, failuresPath);
653                     }
654                     errln(file + " collision with path " + p + "\t" + old + "\t" + path);
655                 }
656                 final String visible = p.toString();
657                 old = headerVisibleToPath.get(visible);
658                 if (old == null) {
659                     headerVisibleToPath.put(visible, path);
660                 } else if (!old.equals(path)) {
661                     errln("Collision with path " + visible + "\t" + old + "\t"
662                         + path);
663                 }
664             }
665         }
666     }
667 
TestStatus()668     public void TestStatus() {
669         CLDRFile nativeFile = info.getEnglish();
670         PathStarrer starrer = new PathStarrer();
671         EnumMap<SurveyToolStatus, Relation<String, String>> info2 = new EnumMap<>(
672             SurveyToolStatus.class);
673         Set<String> nuked = new HashSet<>();
674         Set<String> deprecatedStar = new HashSet<>();
675         Set<String> differentStar = new HashSet<>();
676 
677         for (String path : nativeFile.fullIterable()) {
678 
679             PathHeader p = pathHeaderFactory.fromPath(path);
680             final SurveyToolStatus surveyToolStatus = p.getSurveyToolStatus();
681 
682             if (p.getSectionId() == SectionId.Special
683                 && surveyToolStatus == SurveyToolStatus.READ_WRITE) {
684                 errln("SurveyToolStatus should not be " + surveyToolStatus
685                     + ": " + p);
686             }
687 
688             final SurveyToolStatus tempSTS = surveyToolStatus == SurveyToolStatus.DEPRECATED ? SurveyToolStatus.HIDE
689                 : surveyToolStatus;
690             String starred = starrer.set(path);
691             List<String> attr = starrer.getAttributes();
692             if (surveyToolStatus != SurveyToolStatus.READ_WRITE) {
693                 nuked.add(starred);
694             }
695 
696             // check against old
697             SurveyToolStatus oldStatus = SurveyToolStatus.READ_WRITE;
698 
699             if (tempSTS != oldStatus
700                 && oldStatus != SurveyToolStatus.READ_WRITE
701                 && !path.endsWith(APPEND_TIMEZONE_END)) {
702                 if (!differentStar.contains(starred)) {
703                     errln("Different from old:\t" + oldStatus + "\tnew:\t"
704                         + surveyToolStatus + "\t" + path);
705                     differentStar.add(starred);
706                 }
707             }
708 
709             // check against deprecated
710             boolean isDeprecated = supplemental.isDeprecated(DtdType.ldml, path);
711             if (isDeprecated != (surveyToolStatus == SurveyToolStatus.DEPRECATED)) {
712                 if (!deprecatedStar.contains(starred)) {
713                     errln("Different from DtdData deprecated:\t"
714                         + isDeprecated + "\t" + surveyToolStatus + "\t"
715                         + path);
716                     deprecatedStar.add(starred);
717                 }
718             }
719 
720             Relation<String, String> data = info2.get(surveyToolStatus);
721             if (data == null) {
722                 info2.put(
723                     surveyToolStatus,
724                     data = Relation.of(new TreeMap<String, Set<String>>(),
725                         TreeSet.class));
726             }
727             data.put(starred, Joiner.on("|").join(attr));
728         }
729         for (Entry<SurveyToolStatus, Relation<String, String>> entry : info2
730             .entrySet()) {
731             final SurveyToolStatus status = entry.getKey();
732             for (Entry<String, Set<String>> item : entry.getValue()
733                 .keyValuesSet()) {
734                 final String starred = item.getKey();
735                 if (status == SurveyToolStatus.READ_WRITE
736                     && !nuked.contains(starred)) {
737                     continue;
738                 }
739                 logln(status + "\t" + starred + "\t" + item.getValue());
740             }
741         }
742     }
743 
TestPathsNotInEnglish()744     public void TestPathsNotInEnglish() {
745         Set<String> englishPaths = new HashSet<>();
746         for (String path : english.fullIterable()) {
747             englishPaths.add(path);
748         }
749         Set<String> alreadySeen = new HashSet<>(englishPaths);
750 
751         for (String locale : factory.getAvailable()) {
752             CLDRFile nativeFile = info.getCLDRFile(locale, false);
753             CoverageLevel2 coverageLevel2 = null;
754             for (String path : nativeFile.fullIterable()) {
755                 if (alreadySeen.contains(path) || path.contains("@count")) {
756                     continue;
757                 }
758                 if (coverageLevel2 == null) {
759                     coverageLevel2 = CoverageLevel2.getInstance(locale);
760                 }
761                 Level level = coverageLevel2.getLevel(path);
762                 if (Level.COMPREHENSIVE.compareTo(level) < 0) {
763                     continue;
764                 }
765                 logln("Path not in English\t" + locale + "\t" + path);
766                 alreadySeen.add(path);
767             }
768         }
769     }
770 
TestPathDescriptionCompleteness()771     public void TestPathDescriptionCompleteness() {
772         PathDescription pathDescription = new PathDescription(supplemental,
773             english, null, null, PathDescription.ErrorHandling.CONTINUE);
774         Matcher normal = PatternCache.get(
775             "http://cldr.org/translation/[-a-zA-Z0-9_]").matcher("");
776         // http://cldr.unicode.org/translation/plurals#TOC-Minimal-Pairs
777         Set<String> alreadySeen = new HashSet<>();
778         PathStarrer starrer = new PathStarrer();
779 
780         checkPathDescriptionCompleteness(pathDescription, normal,
781             "//ldml/numbers/defaultNumberingSystem", alreadySeen, starrer);
782         for (PathHeader pathHeader : getPathHeaders(english)) {
783             final SurveyToolStatus surveyToolStatus = pathHeader
784                 .getSurveyToolStatus();
785             if (surveyToolStatus == SurveyToolStatus.DEPRECATED
786                 || surveyToolStatus == SurveyToolStatus.HIDE) {
787                 continue;
788             }
789             String path = pathHeader.getOriginalPath();
790             checkPathDescriptionCompleteness(pathDescription, normal, path,
791                 alreadySeen, starrer);
792         }
793     }
794 
checkPathDescriptionCompleteness( PathDescription pathDescription, Matcher normal, String path, Set<String> alreadySeen, PathStarrer starrer)795     public void checkPathDescriptionCompleteness(
796         PathDescription pathDescription, Matcher normal, String path,
797         Set<String> alreadySeen, PathStarrer starrer) {
798         String value = english.getStringValue(path);
799         String description = pathDescription.getDescription(path, value, null,
800             null);
801         String starred = starrer.set(path);
802         if (alreadySeen.contains(starred)) {
803             return;
804         } else if (description == null) {
805             errln("Path has no description:\t" + value + "\t" + path);
806         } else if (!description.contains("http://")) {
807             errln("Description has no URL:\t" + description + "\t" + value
808                 + "\t" + path);
809         } else if (!normal.reset(description).find()) {
810             errln("Description has generic URL, fix to be specific:\t"
811                 + description + "\t" + value + "\t" + path);
812         } else if (description == PathDescription.MISSING_DESCRIPTION) {
813             errln("Fallback Description:\t" + value + "\t" + path);
814         } else {
815             return;
816         }
817         // Add if we had a problem, keeping us from being overwhelmed with
818         // errors.
819         alreadySeen.add(starred);
820     }
821 
TestTerritoryOrder()822     public void TestTerritoryOrder() {
823         final Set<String> goodAvailableCodes = CLDRConfig.getInstance()
824             .getStandardCodes().getGoodAvailableCodes("territory");
825         Set<String> results = showContained("001", 0, new HashSet<>(
826             goodAvailableCodes));
827         results.remove("ZZ");
828         for (String territory : results) {
829             String sub = Containment.getSubcontinent(territory);
830             String cont = Containment.getContinent(territory);
831             errln("Missing\t" + getNameAndOrder(territory) + "\t"
832                 + getNameAndOrder(sub) + "\t" + getNameAndOrder(cont));
833         }
834     }
835 
showContained(String territory, int level, Set<String> soFar)836     private Set<String> showContained(String territory, int level,
837         Set<String> soFar) {
838         if (!soFar.contains(territory)) {
839             return soFar;
840         }
841         soFar.remove(territory);
842         Set<String> contained = supplemental.getContained(territory);
843         if (contained == null) {
844             return soFar;
845         }
846         for (String containedItem : contained) {
847             logln(level + "\t" + getNameAndOrder(territory) + "\t"
848                 + getNameAndOrder(containedItem));
849         }
850         for (String containedItem : contained) {
851             showContained(containedItem, level + 1, soFar);
852         }
853         return soFar;
854     }
855 
getNameAndOrder(String territory)856     private String getNameAndOrder(String territory) {
857         return territory + "\t"
858             + english.getName(CLDRFile.TERRITORY_NAME, territory) + "\t"
859             + Containment.getOrder(territory);
860     }
861 
TestZCompleteness()862     public void TestZCompleteness() {
863         Map<String, PathHeader> uniqueness = new HashMap<>();
864         Set<String> alreadySeen = new HashSet<>();
865         LanguageTagParser ltp = new LanguageTagParser();
866         int count = 0;
867         for (String locale : factory.getAvailable()) {
868             if (!ltp.set(locale).getRegion().isEmpty()) {
869                 continue;
870             }
871             check(locale, false, uniqueness, alreadySeen);
872             ++count;
873         }
874         logln("Count:\t" + count);
875     }
876 
check(String localeID, boolean resolved, Map<String, PathHeader> uniqueness, Set<String> alreadySeen)877     public void check(String localeID, boolean resolved,
878         Map<String, PathHeader> uniqueness, Set<String> alreadySeen) {
879         CLDRFile nativeFile = info.getCLDRFile(localeID, resolved);
880         int count = 0;
881         for (String path : nativeFile) {
882             if (alreadySeen.contains(path)) {
883                 continue;
884             }
885             alreadySeen.add(path);
886             final PathHeader pathHeader = pathHeaderFactory.fromPath(path);
887             ++count;
888             if (pathHeader == null) {
889                 errln("Null pathheader for " + path);
890             } else {
891                 String visible = pathHeader.toString();
892                 PathHeader old = uniqueness.get(visible);
893                 if (pathHeader.getSectionId() == SectionId.Timezones) {
894                     final PageId pageId = pathHeader.getPageId();
895                     if (badZonePages.contains(pageId)
896                         && !pathHeader.getCode().equals("Unknown")) {
897                         String msg = "Bad page ID:\t" + pageId + "\t" + pathHeader + "\t" + path;
898                         if (!logKnownIssue("cldrbug:7802", "ICU/CLDR time zone data sync problem - " + msg)) {
899                             errln("Bad page ID:\t" + pageId + "\t" + pathHeader
900                                 + "\t" + path);
901                         }
902                     }
903                 }
904                 if (old == null) {
905                     if (pathHeader.getSection().equals("Special")) {
906                         if (pathHeader.getSection().equals("Unknown")) {
907                             errln("PathHeader has fallback: " + visible + "\t"
908                                 + pathHeader.getOriginalPath());
909                             // } else {
910                             // logln("Special:\t" + visible + "\t" +
911                             // pathHeader.getOriginalPath());
912                         }
913                     }
914                     uniqueness.put(visible, pathHeader);
915                 } else if (!old.equals(pathHeader)) {
916                     if (pathHeader.getSectionId() == SectionId.Special) {
917                         logln("Special PathHeader not unique: " + visible
918                             + "\t" + pathHeader.getOriginalPath() + "\t"
919                             + old.getOriginalPath());
920                     } else {
921                         errln("PathHeader not unique: " + visible + "\t"
922                             + pathHeader.getOriginalPath() + "\t"
923                             + old.getOriginalPath());
924                     }
925                 }
926             }
927         }
928         logln(localeID + "\t" + count);
929     }
930 
TestContainment()931     public void TestContainment() {
932         Map<String, Map<String, String>> metazoneToRegionToZone = supplemental
933             .getMetazoneToRegionToZone();
934         Map<String, String> metazoneToContinent = supplemental
935             .getMetazoneToContinentMap();
936         for (String metazone : metazoneToRegionToZone.keySet()) {
937             Map<String, String> regionToZone = metazoneToRegionToZone
938                 .get(metazone);
939             String worldZone = regionToZone.get("001");
940             String territory = Containment.getRegionFromZone(worldZone);
941             if (territory == null) {
942                 territory = "ZZ";
943             }
944             String cont = Containment.getContinent(territory);
945             int order = Containment.getOrder(territory);
946             String sub = Containment.getSubcontinent(territory);
947             String revision = PathHeader.getMetazonePageTerritory(metazone);
948             String continent = metazoneToContinent.get(metazone);
949             if (continent == null) {
950                 continent = "UnknownT";
951             }
952             // Russia, Antarctica => territory
953             // in Australasia, Asia, S. America => subcontinent
954             // in N. America => N. America (grouping of 3 subcontinents)
955             // in everything else => continent
956 
957             if (territory.equals("RU")) {
958                 assertEquals("Russia special case", "RU", revision);
959             } else if (territory.equals("US")) {
960                 assertEquals("N. America special case", "003", revision);
961             } else if (territory.equals("BR")) {
962                 assertEquals("S. America special case", "005", revision);
963             }
964             if (isVerbose()) {
965                 String name = english.getName(CLDRFile.TERRITORY_NAME, cont);
966                 String name2 = english.getName(CLDRFile.TERRITORY_NAME, sub);
967                 String name3 = english.getName(CLDRFile.TERRITORY_NAME,
968                     territory);
969                 String name4 = english.getName(CLDRFile.TERRITORY_NAME,
970                     revision);
971 
972                 logln(metazone + "\t" + continent + "\t" + name + "\t" + name2
973                     + "\t" + name3 + "\t" + order + "\t" + name4);
974             }
975         }
976     }
977 
TestZ()978     public void TestZ() {
979         PathStarrer pathStarrer = new PathStarrer();
980         pathStarrer.setSubstitutionPattern("%A");
981 
982         Set<PathHeader> sorted = new TreeSet<>();
983         Map<String, String> missing = new TreeMap<>();
984         Map<String, String> skipped = new TreeMap<>();
985         Map<String, String> collide = new TreeMap<>();
986 
987         logln("Traversing Paths");
988         for (String path : english) {
989             PathHeader pathHeader = pathHeaderFactory.fromPath(path);
990             String value = english.getStringValue(path);
991             if (pathHeader == null) {
992                 final String starred = pathStarrer.set(path);
993                 missing.put(starred, value + "\t" + path);
994                 continue;
995             }
996             if (pathHeader.getSection().equalsIgnoreCase("skip")) {
997                 final String starred = pathStarrer.set(path);
998                 skipped.put(starred, value + "\t" + path);
999                 continue;
1000             }
1001             sorted.add(pathHeader);
1002         }
1003         logln("\nConverted:\t" + sorted.size());
1004         String lastHeader = "";
1005         String lastPage = "";
1006         String lastSection = "";
1007         List<String> threeLevel = new ArrayList<>();
1008         Status status = new Status();
1009         CoverageLevel2 coverageLevel2 = CoverageLevel2.getInstance("en");
1010 
1011         for (PathHeader pathHeader : sorted) {
1012             String original = pathHeader.getOriginalPath();
1013             if (!original.equals(status.pathWhereFound)) {
1014                 continue;
1015             }
1016             if (!lastSection.equals(pathHeader.getSection())) {
1017                 logln("");
1018                 threeLevel.add(pathHeader.getSection());
1019                 threeLevel.add("\t" + pathHeader.getPage());
1020                 threeLevel.add("\t\t" + pathHeader.getHeader());
1021                 lastSection = pathHeader.getSection();
1022                 lastPage = pathHeader.getPage();
1023                 lastHeader = pathHeader.getHeader();
1024             } else if (!lastPage.equals(pathHeader.getPage())) {
1025                 logln("");
1026                 threeLevel.add("\t" + pathHeader.getPage());
1027                 threeLevel.add("\t\t" + pathHeader.getHeader());
1028                 lastPage = pathHeader.getPage();
1029                 lastHeader = pathHeader.getHeader();
1030             } else if (!lastHeader.equals(pathHeader.getHeader())) {
1031                 logln("");
1032                 threeLevel.add("\t\t" + pathHeader.getHeader());
1033                 lastHeader = pathHeader.getHeader();
1034             }
1035             logln(pathHeader + "\t" + coverageLevel2.getLevel(original) + "\t"
1036                 + english.getStringValue(pathHeader.getOriginalPath())
1037                 + "\t" + pathHeader.getOriginalPath());
1038         }
1039         if (collide.size() != 0) {
1040             errln("\nCollide:\t" + collide.size());
1041             for (Entry<String, String> item : collide.entrySet()) {
1042                 errln("\t" + item);
1043             }
1044         }
1045         if (missing.size() != 0) {
1046             errln("\nMissing:\t" + missing.size());
1047             for (Entry<String, String> item : missing.entrySet()) {
1048                 errln("\t" + item.getKey() + "\tvalue:\t" + item.getValue());
1049             }
1050         }
1051         if (skipped.size() != 0) {
1052             errln("\nSkipped:\t" + skipped.size());
1053             for (Entry<String, String> item : skipped.entrySet()) {
1054                 errln("\t" + item);
1055             }
1056         }
1057         Counter<PathHeader.Factory.CounterData> counterData = pathHeaderFactory
1058             .getInternalCounter();
1059         logln("\nInternal Counter:\t" + counterData.size());
1060         for (PathHeader.Factory.CounterData item : counterData.keySet()) {
1061             logln("\t" + counterData.getCount(item) + "\t" + item.get2() // externals
1062             + "\t" + item.get3() + "\t" + item.get0() // internals
1063             + "\t" + item.get1());
1064         }
1065         logln("\nMenus/Headers:\t" + threeLevel.size());
1066         for (String item : threeLevel) {
1067             logln(item);
1068         }
1069         LinkedHashMap<String, Set<String>> sectionsToPages = org.unicode.cldr.util.PathHeader.Factory
1070             .getSectionsToPages();
1071         logln("\nMenus:\t" + sectionsToPages.size());
1072         for (Entry<String, Set<String>> item : sectionsToPages.entrySet()) {
1073             final String section = item.getKey();
1074             for (String page : item.getValue()) {
1075                 logln("\t" + section + "\t" + page);
1076                 int count = 0;
1077                 for (String path : pathHeaderFactory.filterCldr(section, page,
1078                     english)) {
1079                     count += 1; // just count them.
1080                 }
1081                 logln("\t" + count);
1082             }
1083         }
1084     }
1085 
1086     public static final Set<String> GERMAN_UNIT_ORDER = ImmutableSet.of(
1087         "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]",
1088         "//ldml/units/unitLength[@type=\"short\"]/compoundUnit[@type=\"power2\"]",
1089         "//ldml/units/unitLength[@type=\"narrow\"]/compoundUnit[@type=\"power2\"]",
1090         "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]",
1091         "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]",
1092         "//ldml/units/unitLength[@type=\"narrrow\"]/unit[@type=\"volume-liter\"]",
1093         "//ldml/numbers/minimalPairs/caseMinimalPairs",
1094         "//ldml/numbers/minimalPairs/genderMinimalPairs"
1095         );
TestOrder()1096     public void TestOrder() {
1097         String[] paths = {
1098             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"narrow\"]/dayPeriod[@type=\"noon\"]",
1099             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"narrow\"]/dayPeriod[@type=\"afternoon1\"]",
1100         };
1101         PathHeader pathHeaderLast = null;
1102         for (String path : paths) {
1103             PathHeader pathHeader = pathHeaderFactory.fromPath(path);
1104             if (pathHeaderLast != null) {
1105                 assertRelation("ordering", true, pathHeaderLast, LEQ, pathHeader);
1106             }
1107             pathHeaderLast = pathHeader;
1108         }
1109         CLDRFile german = factory.make("de", true);
1110         Multimap<PathHeader, String> pathHeaderToPaths = TreeMultimap.create();
1111         for (String path : german.fullIterable()) {
1112             for (String prefix : GERMAN_UNIT_ORDER) {
1113                 if (path.startsWith(prefix)) {
1114                     PathHeader pathHeader = pathHeaderFactory.fromPath(path);
1115                     pathHeaderToPaths.put(pathHeader, path);
1116                 }
1117             }
1118         }
1119         String[] germanExpected = {
1120             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/gender",
1121             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/displayName",
1122             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/perUnitPattern",
1123             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"][@case=\"accusative\"]",
1124             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"][@case=\"dative\"]",
1125             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"][@case=\"genitive\"]",
1126             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"]",
1127             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"][@case=\"accusative\"]",
1128             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"][@case=\"dative\"]",
1129             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"][@case=\"genitive\"]",
1130             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"]",
1131             "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]/displayName",
1132             "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]/perUnitPattern",
1133             "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"]",
1134             "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"]",
1135             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@case=\"accusative\"]",
1136             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"accusative\"]",
1137             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"masculine\"][@case=\"accusative\"]",
1138             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@case=\"dative\"]",
1139             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"dative\"]",
1140             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"masculine\"][@case=\"dative\"]",
1141             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@case=\"genitive\"]",
1142             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"genitive\"]",
1143             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"masculine\"][@case=\"genitive\"]",
1144             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"]",
1145             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"]",
1146             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"masculine\"]",
1147             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@case=\"accusative\"]",
1148             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"][@case=\"accusative\"]",
1149             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"masculine\"][@case=\"accusative\"]",
1150             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@case=\"dative\"]",
1151             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"][@case=\"dative\"]",
1152             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"masculine\"][@case=\"dative\"]",
1153             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@case=\"genitive\"]",
1154             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"][@case=\"genitive\"]",
1155             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"masculine\"][@case=\"genitive\"]",
1156             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"]",
1157             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"]",
1158             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"masculine\"]",
1159             "//ldml/units/unitLength[@type=\"short\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"]",
1160             "//ldml/units/unitLength[@type=\"short\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"]",
1161             "//ldml/units/unitLength[@type=\"narrow\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"]",
1162             "//ldml/units/unitLength[@type=\"narrow\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"]",
1163             "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"accusative\"]",
1164             "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"dative\"]",
1165             "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"genitive\"]",
1166             "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"nominative\"]",
1167             "//ldml/numbers/minimalPairs/genderMinimalPairs[@gender=\"feminine\"]",
1168             "//ldml/numbers/minimalPairs/genderMinimalPairs[@gender=\"masculine\"]",
1169             "//ldml/numbers/minimalPairs/genderMinimalPairs[@gender=\"neuter\"]",
1170             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1",
1171             "//ldml/units/unitLength[@type=\"short\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1",
1172             "//ldml/units/unitLength[@type=\"narrow\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1"};
1173 
1174         int germanExpectedIndex = 0;
1175         int errorCount = 0;
1176         for (Entry<PathHeader, Collection<String>> entry : pathHeaderToPaths.asMap().entrySet()) {
1177             PathHeader ph = entry.getKey();
1178             Collection<String> epaths = entry.getValue();
1179             if (!assertEquals(entry.toString(), 1, epaths.size())) {
1180                 ++errorCount;
1181             }
1182             if (!assertEquals("PathHeader order", germanExpected[germanExpectedIndex++], epaths.iterator().next())) {
1183                 ++errorCount;
1184             }
1185         }
1186         if (errorCount != 0) {
1187             for (Entry<PathHeader, Collection<String>> entry : pathHeaderToPaths.asMap().entrySet()) {
1188                 PathHeader ph = entry.getKey();
1189                 Collection<String> epaths = entry.getValue();
1190                 System.out.println("\"" + epaths.iterator().next().replace("\"", "\\\"") + "\",\t// " + ph);
1191             }
1192         }
1193     }
1194 
Test8414()1195     public void Test8414() {
1196         PathDescription pathDescription = new PathDescription(supplemental,
1197             english, null, null, PathDescription.ErrorHandling.CONTINUE);
1198 
1199         String prefix = "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"";
1200         String suffix = "\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"morning1\"]";
1201 
1202         final String path0 = prefix + "format" + suffix;
1203         final String path1 = prefix + "stand-alone" + suffix;
1204         String v0 = english.getStringValue(path0);
1205         String v1 = english.getStringValue(path1);
1206         String p0 = pathDescription.getDescription(path0, v0, null, null);
1207         String p1 = pathDescription.getDescription(path1, v1, null, null);
1208         assertTrue("Check pd for format", p0.contains("in the morning"));
1209         assertTrue("Check pd for stand-alone", !p1.contains("in the morning"));
1210     }
1211 
TestCompletenessNonLdmlDtd()1212     public void TestCompletenessNonLdmlDtd() {
1213         PathChecker pathChecker = new PathChecker();
1214         Set<String> directories = new LinkedHashSet<>();
1215         Multimap<String, String> pathValuePairs = LinkedListMultimap.create();
1216         // get all the directories containing non-Ldml dtd files
1217         for (DtdType dtdType : DtdType.values()) {
1218             if (dtdType == DtdType.ldml || dtdType == DtdType.ldmlICU) {
1219                 continue;
1220             }
1221             DtdData dtdData = DtdData.getInstance(dtdType);
1222             for (String dir : dtdType.directories) {
1223                 if (DEBUG_DTD_TYPE != null && !DEBUG_DTD_TYPE.directories.contains(dir)) {
1224                     continue;
1225                 }
1226                 File dir2 = new File(COMMON_DIR + dir);
1227                 logln(dir2.getName());
1228                 for (String file : dir2.list()) {
1229                     // don't need to restrict with getFilesToTest(Arrays.asList(dir2.list()), "root", "en")) {
1230                     if (!file.endsWith(".xml")) {
1231                         continue;
1232                     }
1233                     if (DEBUG) warnln(" TestCompletenessNonLdmlDtd: " + dir + ", " + file);
1234                     logln(" \t" + file);
1235                     for (Pair<String, String> pathValue : XMLFileReader.loadPathValues(
1236                         dir2 + "/" + file, new ArrayList<Pair<String, String>>(), true)) {
1237                         final String path = pathValue.getFirst();
1238                         final String value = pathValue.getSecond();
1239 //                        logln("\t\t" + path);
1240                         if (path.startsWith("//supplementalData/unitPreferenceData/unitPreferences")
1241                             && path.contains("skeleton")) {
1242                             int debug = 0;
1243                         }
1244                         pathChecker.checkPathHeader(dtdData, path);
1245                     }
1246                 }
1247             }
1248         }
1249     }
1250 
1251     private class PathChecker {
1252         PathHeader.Factory phf = pathHeaderFactory;
1253         PathStarrer starrer = new PathStarrer().setSubstitutionPattern("%A");
1254 
1255         Set<String> badHeaders = new TreeSet<>();
1256         Map<PathHeader, PathHeader> goodHeaders = new HashMap<>();
1257         Set<PathHeader> seenBad = new HashSet<>();
1258         {
phf.clearCache()1259             phf.clearCache();
1260         }
1261 
checkPathHeader(DtdData dtdData, String rawPath)1262         public void checkPathHeader(DtdData dtdData, String rawPath) {
1263             XPathParts pathPlain = XPathParts.getFrozenInstance(rawPath);
1264             if (dtdData.isMetadata(pathPlain)) {
1265                 return;
1266             }
1267             if (dtdData.isDeprecated(pathPlain)) {
1268                 return;
1269             }
1270             Multimap<String, String> extras = HashMultimap.create();
1271             Set<String> fixedPaths = dtdData.getRegularizedPaths(pathPlain, extras);
1272             if (fixedPaths != null) {
1273                 for (String fixedPath : fixedPaths) {
1274                     checkSubpath(fixedPath);
1275                 }
1276             }
1277             for (String path : extras.keySet()) {
1278                 checkSubpath(path);
1279             }
1280         }
1281 
checkSubpath(String path)1282         public void checkSubpath(String path) {
1283             String message = ": Can't compute path header";
1284             if (path.contentEquals("//supplementalData/grammaticalData/grammaticalFeatures[@targets=\"nominal\"][@locales=\"it\"]/grammaticalGender/_values") ) {
1285                 int debug = 0;
1286             }
1287             PathHeader ph = null;
1288             try {
1289                 ph = phf.fromPath(path);
1290                 if (seenBad.contains(ph)) {
1291                     return;
1292                 }
1293                 if (ph.getPageId() == PageId.Deprecated) {
1294                     return; // don't care
1295                 }
1296                 if (ph.getPageId() != PageId.Unknown) {
1297                     PathHeader old = goodHeaders.put(ph, ph);
1298                     if (old != null && !path.equals(old.getOriginalPath())) {
1299                         errln("Duplicate path header for: " + ph
1300                             + "\n\t\t " + path
1301                             + "\n\t\t≠" + old.getOriginalPath());
1302                         seenBad.add(ph);
1303                     }
1304                     return;
1305                 }
1306                 // for debugging
1307                 phf.clearCache();
1308                 List<String> failures = new ArrayList<>();
1309                 ph = phf.fromPath(path, failures);
1310                 message = ": Unknown path header" + failures;
1311             } catch (Exception e) {
1312                 message = ": Exception in path header: " + e.getMessage();
1313             }
1314             String star = starrer.set(path);
1315             if (badHeaders.add(star)) {
1316                 errln(star + message + ", " + ph);
1317                 System.out.println("\tNo match in PathHeader.txt for " + path
1318                     + "\n\tYou get only one message for all paths matching " + star
1319                     + "\n\tFor example, check to see if the field in PathHeader.txt is in PathHeader.PageId."
1320                     + "\n\tIf not, either correct PathHeader.txt or add it to PageId"
1321                     + "\n\tIf you have a value attribute, you will need extra _ characters. The value attribute will show at the end with prefixed _, eg [...]/_skeleton."
1322                     + "If there can be a value for the path then that element will add _. "
1323                     );
1324             }
1325         }
1326     }
1327 
TestSupplementalItems()1328     public void TestSupplementalItems() {
1329         //      <weekOfPreference ordering="weekOfYear weekOfMonth" locales="am az bs cs cy da el et hi ky lt mk sk ta th"/>
1330         // logln(pathHeaderFactory.getRegexInfo());
1331         CLDRFile supplementalFile = CLDRConfig.getInstance().getSupplementalFactory().make("supplementalData", false);
1332         List<String> failures = new ArrayList<>();
1333         Multimap<String, String> pathValuePairs = LinkedListMultimap.create();
1334         for (String test : With.in(supplementalFile.iterator("//supplementalData/weekData"))) {
1335             failures.clear();
1336             XPathParts parts = XPathParts.getFrozenInstance(supplementalFile.getFullXPath(test));
1337             supplementalFile.getDtdData().getRegularizedPaths(parts, pathValuePairs);
1338             for (Entry<String, Collection<String>> entry : pathValuePairs.asMap().entrySet()) {
1339                 final String normalizedPath = entry.getKey();
1340                 final Collection<String> normalizedValue = entry.getValue();
1341                 PathHeader ph = pathHeaderFactory.fromPath(normalizedPath, failures);
1342                 if (ph == null || ph.getSectionId() == SectionId.Special) {
1343                     errln("Failure with " + test + " => " + normalizedPath + " = " + normalizedValue);
1344                 } else {
1345                     logln(ph + "\t" + test + " = " + normalizedValue);
1346                 }
1347             }
1348         }
1349     }
1350 
test10232()1351     public void test10232() {
1352         String[][] tests = {
1353             { "MMM", "Formats - Flexible - Date Formats" },
1354             { "dMM", "Formats - Flexible - Date Formats" },
1355             { "h", "Formats - Flexible - 12 Hour Time Formats" },
1356             { "hm", "Formats - Flexible - 12 Hour Time Formats" },
1357             { "Ehm", "Formats - Flexible - 12 Hour Time Formats" },
1358             { "H", "Formats - Flexible - 24 Hour Time Formats" },
1359             { "Hm", "Formats - Flexible - 24 Hour Time Formats" },
1360             { "EHm", "Formats - Flexible - 24 Hour Time Formats" },
1361         };
1362         for (String[] test : tests) {
1363             String path = "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\""
1364                 + test[0] + "\"]";
1365             PathHeader pathHeader = pathHeaderFactory.fromPath(path);
1366             assertEquals("flexible formats", test[1] + "|" + test[0], pathHeader.getHeader() + "|" + pathHeader.getCode());
1367         }
1368     }
1369 
1370     // Moved from TestAnnotations and generalized
testPathHeaderSize()1371     public void testPathHeaderSize() {
1372         String locale = "ar"; // choose one with lots of plurals
1373         int maxSize = 750;
1374         boolean showTable = false; // only printed if test fails or verbose
1375 
1376         Factory factory = CLDRConfig.getInstance().getCommonAndSeedAndMainAndAnnotationsFactory();
1377         CLDRFile english = factory.make(locale, true);
1378 
1379         PathHeader.Factory phf = PathHeader.getFactory(CLDRConfig.getInstance().getEnglish());
1380         Counter<PageId> counterPageId = new Counter<>();
1381         Counter<PageId> counterPageIdAll = new Counter<>();
1382         for (String path : english) {
1383             Level level = CLDRConfig.getInstance().getSupplementalDataInfo().getCoverageLevel(path, locale);
1384             PathHeader ph = phf.fromPath(path);
1385             if (level.compareTo(Level.MODERN) <= 0) {
1386                 counterPageId.add(ph.getPageId(), 1);
1387             }
1388             counterPageIdAll.add(ph.getPageId(), 1);
1389         }
1390         Set<R2<Long, PageId>> entrySetSortedByCount = counterPageId.getEntrySetSortedByCount(false, null);
1391         for (R2<Long, PageId> sizeAndPageId : entrySetSortedByCount) {
1392             long size = sizeAndPageId.get0();
1393             PageId pageId = sizeAndPageId.get1();
1394             if (!assertTrue(pageId.getSectionId() + "/" + pageId + " size (" + size
1395                 + ") < " + maxSize + "?", size < maxSize)) {
1396                 showTable = true;
1397             }
1398             // System.out.println(pageId + "\t" + size);
1399         }
1400         if (showTable || isVerbose()) {
1401             for (R2<Long, PageId> sizeAndPageId : entrySetSortedByCount) {
1402                 PageId pageId = sizeAndPageId.get1();
1403                 System.out.println(pageId.getSectionId() + "\t" + pageId + "\t" + sizeAndPageId.get0() + "\t" + counterPageIdAll.get(pageId));
1404             }
1405         }
1406     }
TestCLDR_11454()1407     public void TestCLDR_11454() {
1408         PathHeader.Factory phf = PathHeader.getFactory();
1409         PathHeader century = phf.fromPath("//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"duration-century\"]/displayName");
1410         PathHeader decade =  phf.fromPath("//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"duration-decade\"]/displayName");
1411         assertEquals("Section", century.getSectionId(), decade.getSectionId());
1412         assertEquals("Page", century.getPageId(), decade.getPageId());
1413     }
1414 
TestEmojiOrder()1415     public void TestEmojiOrder() {
1416         PathHeader.Factory phf = PathHeader.getFactory();
1417         String[] desiredOrder = {
1418             "��‍⚕", "��‍⚕", "��‍⚕",
1419             "��‍⚖", "��‍⚖", "��‍⚖"};
1420         List<PathHeader> pathHeaders = new ArrayList<>();
1421         for (String emoji : desiredOrder) {
1422             String base = "//ldml/annotations/annotation[@cp=\"" + emoji + "\"]";
1423             pathHeaders.add(phf.fromPath(base + "[@type=\"tts\"]"));
1424             pathHeaders.add(phf.fromPath(base));
1425             logln(emoji
1426                 + ": getEmojiMinorOrder="+ Emoji.getEmojiMinorOrder(Emoji.getMinorCategory(emoji))
1427                 + ", getEmojiToOrder="+ Emoji.getEmojiToOrder(emoji)
1428                 );
1429         }
1430         PathHeader lastItem = null;
1431         for (PathHeader item : pathHeaders) {
1432             if (lastItem != null) {
1433                 assertEquals("Section", lastItem.getSectionId(), item.getSectionId());
1434                 assertEquals("Page", lastItem.getPageId(), item.getPageId());
1435                 assertEquals("Header", lastItem.getHeader(), item.getHeader());
1436                 if (!assertTrue(lastItem + " < " + item, lastItem.compareTo(item) < 0)) {
1437                     lastItem.compareTo(item); // for debugging
1438                 }
1439             }
1440             lastItem = item;
1441         }
1442     }
1443 
TestQuotes()1444     public void TestQuotes() {
1445         // quotes should never appear in result
1446         PathHeader.Factory phf = PathHeader.getFactory();
1447         String[] tests = {
1448             "//supplementalData/plurals[@type=\"ordinal\"]/pluralRules[@locales=\"ig\"]/pluralRule[@count=\"other\"]",
1449             "//supplementalData/transforms/transform[@source=\"und-Khmr\"][@target=\"und-Latn\"]"
1450         };
1451         for (String test : tests) {
1452             PathHeader trial = phf.fromPath(test);
1453             assertEquals("No quotes in pathheader", false, trial.toString().contains("\""));
1454         }
1455     }
1456 }
1457