• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.unittest;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.collect.ImmutableMap;
5 import com.google.common.collect.Multimap;
6 import com.google.common.collect.Sets;
7 import com.google.common.collect.TreeMultimap;
8 import com.ibm.icu.dev.test.TestFmwk;
9 import com.ibm.icu.impl.Relation;
10 import com.ibm.icu.impl.Row.R2;
11 import com.ibm.icu.text.NumberFormat;
12 import com.ibm.icu.text.UTF16;
13 import com.ibm.icu.text.UnicodeSet;
14 import com.ibm.icu.util.Output;
15 import java.io.File;
16 import java.util.ArrayList;
17 import java.util.Arrays;
18 import java.util.Collection;
19 import java.util.HashSet;
20 import java.util.Iterator;
21 import java.util.LinkedHashMap;
22 import java.util.LinkedHashSet;
23 import java.util.List;
24 import java.util.Map;
25 import java.util.Map.Entry;
26 import java.util.Objects;
27 import java.util.Set;
28 import java.util.TreeMap;
29 import java.util.TreeSet;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32 import org.unicode.cldr.test.CoverageLevel2;
33 import org.unicode.cldr.tool.LikelySubtags;
34 import org.unicode.cldr.util.CLDRConfig;
35 import org.unicode.cldr.util.CLDRFile;
36 import org.unicode.cldr.util.CLDRFile.DraftStatus;
37 import org.unicode.cldr.util.CLDRFile.Status;
38 import org.unicode.cldr.util.CLDRLocale;
39 import org.unicode.cldr.util.CLDRPaths;
40 import org.unicode.cldr.util.CldrUtility;
41 import org.unicode.cldr.util.Counter;
42 import org.unicode.cldr.util.DtdType;
43 import org.unicode.cldr.util.Factory;
44 import org.unicode.cldr.util.GlossonymConstructor;
45 import org.unicode.cldr.util.GrammarInfo;
46 import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature;
47 import org.unicode.cldr.util.GrammarInfo.GrammaticalTarget;
48 import org.unicode.cldr.util.LanguageTagParser;
49 import org.unicode.cldr.util.Level;
50 import org.unicode.cldr.util.LocaleIDParser;
51 import org.unicode.cldr.util.PathHeader;
52 import org.unicode.cldr.util.PathHeader.PageId;
53 import org.unicode.cldr.util.PathHeader.SectionId;
54 import org.unicode.cldr.util.PatternCache;
55 import org.unicode.cldr.util.PatternPlaceholders;
56 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderStatus;
57 import org.unicode.cldr.util.SimpleFactory;
58 import org.unicode.cldr.util.SupplementalDataInfo;
59 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
60 import org.unicode.cldr.util.SupplementalDataInfo.PluralType;
61 import org.unicode.cldr.util.XPathParts;
62 
63 /**
64  * This is the original TestFwmk test case for CLDRFile.
65  *
66  * @see {@link org.unicode.cldr.util.TestCLDRFile}
67  * @see {@link org.unicode.cldr.util.CLDRFile}
68  */
69 public class TestCLDRFile extends TestFmwk {
70     private static final boolean DISABLE_TIL_WORKS = false;
71 
72     static CLDRConfig testInfo = CLDRConfig.getInstance();
73     static SupplementalDataInfo sdi = testInfo.getSupplementalDataInfo();
74 
main(String[] args)75     public static void main(String[] args) {
76         new TestCLDRFile().run(args);
77     }
78 
testFallbackNames()79     public void testFallbackNames() {
80         String[][] tests = {
81             {"zh-Hanb", "Chinese (Han with Bopomofo)"},
82             {"aaa", "Ghotuo"},
83             {"zh-RR", "Chinese (RR)"},
84             {"new_Newa_NP", "Newari (Newa, Nepal)"},
85         };
86         CLDRFile english = testInfo.getEnglish();
87         for (String[] test : tests) {
88             assertEquals("", test[1], english.getName(test[0]));
89         }
90     }
91 
92     // verify for all paths, if there is a count="other", then there is a
93     // count="x", for all x in keywords
testPlurals()94     public void testPlurals() {
95         for (String locale : new String[] {"fr", "en", "root", "ar", "ja"}) {
96             checkPlurals(locale);
97         }
98     }
99 
100     static final Pattern COUNT_MATCHER = Pattern.compile("\\[@count=\"([^\"]+)\"]");
101 
102     static final UnicodeSet DIGITS = new UnicodeSet('0', '9').freeze();
103 
checkPlurals(String locale)104     private void checkPlurals(String locale) {
105         CLDRFile cldrFile = testInfo.getCLDRFile(locale, true);
106         Matcher m = COUNT_MATCHER.matcher("");
107         Relation<String, String> skeletonToKeywords =
108                 Relation.of(
109                         new TreeMap<String, Set<String>>(cldrFile.getComparator()), TreeSet.class);
110         PluralInfo plurals = sdi.getPlurals(PluralType.cardinal, locale);
111         for (String path : cldrFile.fullIterable()) {
112             if (!path.contains("@count")) {
113                 continue;
114             }
115             if (!m.reset(path).find()) {
116                 throw new IllegalArgumentException();
117             }
118             if (DIGITS.containsAll(m.group(1))) {
119                 continue;
120             }
121             String skeleton = path.substring(0, m.start(1)) + ".*" + path.substring(m.end(1));
122             skeletonToKeywords.put(skeleton, m.group(1));
123         }
124         Set<String> normalKeywords = plurals.getAdjustedCountStrings();
125 
126         for (Entry<String, Set<String>> entry : skeletonToKeywords.keyValuesSet()) {
127             final String abbreviatedPath = entry.getKey();
128             Set<String> expected = normalKeywords;
129             if (abbreviatedPath.startsWith("//ldml/numbers/minimalPairs/pluralMinimalPairs")) {
130                 expected = plurals.getCanonicalKeywords();
131             }
132             assertEquals(
133                     "Incorrect keywords: " + locale + ", " + abbreviatedPath,
134                     expected,
135                     entry.getValue());
136         }
137     }
138 
139     static Factory cldrFactory = testInfo.getCldrFactory();
140 
141     static class LocaleInfo {
142         final String locale;
143         final CLDRFile cldrFile;
144         final Set<String> paths = new HashSet<>();
145 
LocaleInfo(String locale)146         LocaleInfo(String locale) {
147             this.locale = locale;
148             cldrFile = testInfo.getCLDRFile(locale, true);
149             for (String path : cldrFile.fullIterable()) {
150                 Level level = sdi.getCoverageLevel(path, locale);
151                 if (level.compareTo(Level.COMPREHENSIVE) > 0) {
152                     continue;
153                 }
154                 if (path.contains("[@count=") && !path.contains("[@count=\"other\"]")) {
155                     continue;
156                 }
157                 paths.add(path);
158             }
159         }
160     }
161 
162     static final boolean DEBUG = false;
163     static final boolean DEBUG_testExtraPaths = true;
164 
testExtraPaths()165     public void testExtraPaths() {
166         // for debugging
167         final CLDRFile german = CLDRConfig.getInstance().getCldrFactory().make("de", true);
168         getLogger().fine("");
169         Set<String> sorted = new TreeSet<>(german.getExtraPaths());
170         PathHeader.Factory phf = PathHeader.getFactory();
171         PatternPlaceholders pph = PatternPlaceholders.getInstance();
172 
173         for (String path : sorted) {
174             if (DEBUG_testExtraPaths
175                     && path.equals(
176                             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"acceleration-g-force\"]/unitPattern")) {
177                 List<String> failures = new ArrayList<>();
178                 phf.fromPath(path, failures);
179             }
180             PathHeader ph = phf.fromPath(path);
181             if (ph.getPageId() != PageId.Deprecated) {
182                 assertNotEquals(
183                         "bad placeholder: " + path + " ; " + ph,
184                         SectionId.Special,
185                         ph.getSectionId());
186             }
187             PlaceholderStatus phStatus = pph.getStatus(path);
188 
189             PlaceholderStatus expected =
190                     path.contains("/metazone")
191                                     || path.contains("/timeZoneNames")
192                                     || path.contains("/gender")
193                                     || path.startsWith("//ldml/numbers/currencies/currency")
194                                     || path.startsWith("//ldml/personNames/sampleName")
195                             ? PlaceholderStatus.DISALLOWED
196                             : path.contains("/compoundUnitPattern1")
197                                     ? PlaceholderStatus.REQUIRED
198                                     : PlaceholderStatus.LOCALE_DEPENDENT;
199             if (!assertEquals(path, expected, phStatus)) {
200                 int debug = 0;
201             }
202 
203             if (DEBUG) {
204                 if (GrammaticalFeature.pathHasFeature(path) != null || path.endsWith("/gender")) {
205                     System.out.println(path + "\t" + german.getStringValue(path));
206                     String newPath = path.replace("[@case=\"accusative\"]", "");
207                     if (!newPath.contentEquals(path) && !sorted.contains(newPath)) {
208                         System.out.println(newPath + "\t" + german.getStringValue(newPath));
209                     }
210                 }
211             }
212         }
213 
214         Set<String> badCoverage = new TreeSet<>();
215         Counter<String> extraPaths = new Counter<>();
216         final Factory fullCldrFactory = CLDRConfig.getInstance().getFullCldrFactory();
217 
218         for (String locale : sdi.hasGrammarInfo()) {
219             if (sdi.getGrammarInfo(locale).hasInfo(GrammaticalTarget.nominal)) {
220                 if (!fullCldrFactory.getAvailable().contains(locale)) {
221                     if (CLDRConfig.SKIP_SEED) {
222                         continue; // don't check if skipping seed, for production
223                     }
224                 }
225                 final CLDRFile cldrFile = fullCldrFactory.make(locale, true);
226                 Set<String> sorted2 = new TreeSet<>(cldrFile.getExtraPaths());
227                 for (String path : sorted2) {
228                     if (path.contains("speed-beaufort")) {
229                         continue; // special case
230                     }
231                     if (path.contains("/gender")
232                             || path.contains("@gender")
233                             || path.contains("@case")) {
234                         Level level = sdi.getCoverageLevel(path, locale);
235                         if (level.compareTo(Level.MODERN) > 0) {
236                             badCoverage.add(path);
237                         }
238                         extraPaths.add(locale, 1);
239                     }
240                 }
241             }
242         }
243         getLogger().fine("Units with grammar info: " + GrammarInfo.getUnitsToAddGrammar().size());
244         getLogger().fine("Inflection Paths");
245         for (R2<Long, String> locale : extraPaths.getEntrySetSortedByCount(false, null)) {
246             getLogger().fine(locale.get0() + "\t" + locale.get1());
247         }
248         if (!badCoverage.isEmpty()) {
249             errln("Paths not at modern: " + Joiner.on("\n\t").join(badCoverage));
250         }
251 
252         //        Set<String> validUnits =
253         // Validity.getInstance().getStatusToCodes(LstrType.unit).get(Validity.Status.regular);
254         //        validUnits.forEach(System.out::println);
255         //
256         //        grammarInfo = testInfo.getSupplementalDataInfo().getGrammarInfo();
257         //        for (Entry<String, GrammarInfo> entry : grammarInfo.entrySet()) {
258         //            System.out.println(entry);
259         //        }
260 
261         //        CLDRFile toCheck = testInfo.getCldrFactory().make("de", true); //
262         // testInfo.getFullCldrFactory().make("en", false);
263         //        Set<String> sorted = new TreeSet<>(CLDRFile.getComparator(DtdType.ldml));
264         //        sorted.addAll(toCheck.getExtraPaths());
265         //        for (String path : sorted) {
266         //            XPathParts parts = XPathParts.getFrozenInstance(path);
267         //            assertEquals("parts: ", parts.toString(), path);
268         //            System.out.println(path);
269         //        }
270         //        int debug = 0;
271 
272         Map<String, LocaleInfo> localeInfos = new LinkedHashMap<>();
273         Relation<String, String> missingPathsToLocales =
274                 Relation.of(
275                         new TreeMap<String, Set<String>>(CLDRFile.getComparator(DtdType.ldml)),
276                         TreeSet.class);
277         Relation<String, String> extraPathsToLocales =
278                 Relation.of(
279                         new TreeMap<String, Set<String>>(CLDRFile.getComparator(DtdType.ldml)),
280                         TreeSet.class);
281 
282         for (String locale : new String[] {"en", "root", "fr", "ar", "ja"}) {
283             localeInfos.put(locale, new LocaleInfo(locale));
284         }
285         LocaleInfo englishInfo = localeInfos.get("en");
286         for (String path : englishInfo.paths) {
287             if (path.startsWith("//ldml/identity/")
288                     || path.startsWith("//ldml/numbers/currencies/currency[@type=")
289                     // || path.startsWith("//ldml/dates/calendars/calendar") &&
290                     // !path.startsWith("//ldml/dates/calendars/calendar[@type=\"gregorian\"]")
291                     // ||
292                     // path.startsWith("//ldml/numbers/currencyFormats[@numberSystem=")
293                     // &&
294                     // !path.startsWith("//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]")
295                     || (path.contains("[@count=") && !path.contains("[@count=\"other\"]"))
296                     || (path.contains("[@ordinal=") && !path.contains("[@ordinal=\"other\"]"))
297                     || path.contains("dayPeriod[@type=\"noon\"]")) {
298                 continue;
299             }
300             for (LocaleInfo localeInfo : localeInfos.values()) {
301                 if (localeInfo == englishInfo) {
302                     continue;
303                 }
304                 if (!localeInfo.paths.contains(path)) {
305                     if (path.startsWith("//ldml/dates/calendars/calendar")
306                                     && !(path.contains("[@type=\"generic\"]")
307                                             || path.contains("[@type=\"gregorian\"]"))
308                             || (path.contains("/eras/")
309                                     && path.contains("[@alt=\"variant\"]")) // it is OK
310                             // for
311                             // just
312                             // "en"
313                             // to
314                             // have
315                             // /eras/.../era[@type=...][@alt="variant"]
316                             || path.contains("[@type=\"japanese\"]")
317                             || path.contains("[@type=\"coptic\"]")
318                             || path.contains("[@type=\"hebrew\"]")
319                             || path.contains("[@type=\"islamic-rgsa\"]")
320                             || path.contains("[@type=\"islamic-umalqura\"]")
321                             || path.contains("/relative[@type=\"-2\"]")
322                             || path.contains("/relative[@type=\"2\"]")
323                             || path.startsWith("//ldml/contextTransforms/contextTransformUsage")
324                             || path.contains("[@alt=\"variant\"]")
325                             || path.contains("[@alt=\"formal\"]")
326                             || path.contains("[@type=\"pressure-gasoline-energy-density\"]")
327                             || (path.contains("dayPeriod[@type=")
328                                     && (path.endsWith("1\"]")
329                                             || path.endsWith("\"am\"]")
330                                             || path.endsWith("\"pm\"]")
331                                             || path.endsWith(
332                                                     "\"midnight\"]"))) // morning1, afternoon1, ...
333                             || (localeInfo.locale.equals("root")
334                                     && (path.startsWith(
335                                                     "//ldml/characters/exemplarCharacters[@type=\"index\"]")
336                                             || (path.startsWith("//ldml/units/unitLength") // two
337                                                     // aliased
338                                                     // paths in
339                                                     // root
340                                                     && (path.contains("[@type=\"energy-foodcalorie")
341                                                             || path.contains(
342                                                                     "[@type=\"graphics-dot")))))
343                     // //ldml/characters/exemplarCharacters[@type="index"][root]
344                     ) {
345                         continue;
346                     }
347                     String localeAndStatus =
348                             localeInfo.locale + (englishInfo.cldrFile.isHere(path) ? "" : "*");
349                     missingPathsToLocales.put(path, localeAndStatus);
350                     // English contains the path, and the target locale doesn't.
351                     // The * means that the value is inherited (eg from root).
352                 }
353             }
354         }
355 
356         for (LocaleInfo localeInfo : localeInfos.values()) {
357             if (localeInfo == englishInfo) {
358                 continue;
359             }
360             for (String path : localeInfo.paths) {
361                 if (path.contains("[@numberSystem=\"arab\"]")
362                         || path.contains("[@type=\"japanese\"]")
363                         || path.contains("[@type=\"coptic\"]")
364                         || path.contains("[@type=\"hebrew\"]")
365                         || path.contains("[@type=\"islamic-rgsa\"]")
366                         || path.contains("[@type=\"islamic-umalqura\"]")
367                         || path.contains("/relative[@type=\"-2\"]")
368                         || path.contains("/relative[@type=\"2\"]")) {
369                     continue;
370                 }
371                 if (!englishInfo.paths.contains(path)) {
372                     String localeAndStatus =
373                             localeInfo.locale + (localeInfo.cldrFile.isHere(path) ? "" : "*");
374                     extraPathsToLocales.put(path, localeAndStatus);
375                     // English doesn't contains the path, and the target locale does.
376                     // The * means that the value is inherited (eg from root).
377                 }
378             }
379         }
380 
381         for (Entry<String, Set<String>> entry : missingPathsToLocales.keyValuesSet()) {
382             String path = entry.getKey();
383             Set<String> locales = entry.getValue();
384             Status status = new Status();
385             String originalLocale = englishInfo.cldrFile.getSourceLocaleID(path, status);
386             String engName =
387                     "en"
388                             + (englishInfo.cldrFile.isHere(path)
389                                     ? ""
390                                     : " (source_locale:"
391                                             + originalLocale
392                                             + (path.equals(status.pathWhereFound)
393                                                     ? ""
394                                                     : ", source_path: " + status)
395                                             + ")");
396             if (path.startsWith("//ldml/localeDisplayNames/")
397                     || path.contains("[@alt=\"accounting\"]")
398                     || path.contains("[@alt=\"alphaNextToNumber\"]") // CLDR-14336
399                     || path.contains("[@alt=\"ascii\"]") // CLDR-16606
400                     || path.contains("[@alt=\"noCurrency\"]") // CLDR-14336
401                     || path.startsWith("//ldml/personNames/") // CLDR-15384
402                     || path.startsWith("//ldml/typographicNames/styleName")
403                     || path.startsWith("//ldml/units")) {
404                 logln("+" + engName + ", -" + locales + "\t" + path);
405             } else {
406                 errln("+" + engName + ", -" + locales + "\t" + path);
407             }
408         }
409         for (Entry<String, Set<String>> entry : extraPathsToLocales.keyValuesSet()) {
410             String path = entry.getKey();
411             Set<String> locales = entry.getValue();
412             if (path.startsWith("//ldml/localeDisplayNames/")
413                     || path.startsWith("//ldml/numbers/otherNumberingSystems/")
414             // || path.contains("[@alt=\"accounting\"]")
415             ) {
416                 logln("-en, +" + locales + "\t" + path);
417             } else {
418                 logln("-en, +" + locales + "\t" + path);
419             }
420         }
421 
422         // for (String locale : new String[] { "fr", "ar", "ja" }) {
423         // CLDRFile cldrFile = cldrFactory.make(locale, true);
424         // Set<String> s = (Set<String>) cldrFile.getExtraPaths(new
425         // TreeSet<String>());
426         // System.out.println("Extras for " + locale);
427         // for (String path : s) {
428         // System.out.println(path + " => " + cldrFile.getStringValue(path));
429         // }
430         // System.out.println("Already in " + locale);
431         // for (Iterator<String> it =
432         // cldrFile.iterator(PatternCache.get(".*\\[@count=.*").matcher(""));
433         // it.hasNext();) {
434         // String path = it.next();
435         // System.out.println(path + " => " + cldrFile.getStringValue(path));
436         // }
437         // }
438     }
439 
440     // public void testDraftFilter() {
441     // Factory cldrFactory = Factory.make(CldrUtility.MAIN_DIRECTORY, ".*",
442     // DraftStatus.approved);
443     // checkLocale(cldrFactory.make("root", true));
444     // checkLocale(cldrFactory.make("ee", true));
445     // }
446 
checkLocale(CLDRFile cldr)447     public void checkLocale(CLDRFile cldr) {
448         Matcher m = PatternCache.get("gregorian.*eras").matcher("");
449         for (Iterator<String> it = cldr.iterator("", new UTF16.StringComparator());
450                 it.hasNext(); ) {
451             String path = it.next();
452             if (m.reset(path).find() && !path.contains("alias")) {
453                 errln(
454                         cldr.getLocaleID()
455                                 + "\t"
456                                 + cldr.getStringValue(path)
457                                 + "\t"
458                                 + cldr.getFullXPath(path));
459             }
460             if (path == null) {
461                 errln("Null path");
462             }
463             String fullPath = cldr.getFullXPath(path);
464             if (fullPath.contains("@draft")) {
465                 errln("File can't contain draft elements");
466             }
467         }
468     }
469 
470     // public void testTimeZonePath() {
471     // Factory cldrFactory = Factory.make(CldrUtility.MAIN_DIRECTORY, ".*");
472     // String tz = "Pacific/Midway";
473     // CLDRFile cldrFile = cldrFactory.make("lv", true);
474     // String retVal = cldrFile.getStringValue(
475     // "//ldml/dates/timeZoneNames/zone[@type=\"" + tz + "\"]/exemplarCity"
476     // , true).trim();
477     // errln(retVal);
478     // }
479 
testSimple()480     public void testSimple() {
481         double deltaTime = System.currentTimeMillis();
482         CLDRFile english = testInfo.getEnglish();
483         deltaTime = System.currentTimeMillis() - deltaTime;
484         logln("Creation: Elapsed: " + deltaTime / 1000.0 + " seconds");
485 
486         deltaTime = System.currentTimeMillis();
487         english.getStringValue("//ldml");
488         deltaTime = System.currentTimeMillis() - deltaTime;
489         logln("Creation: Elapsed: " + deltaTime / 1000.0 + " seconds");
490 
491         deltaTime = System.currentTimeMillis();
492         english.getStringValue("//ldml");
493         deltaTime = System.currentTimeMillis() - deltaTime;
494         logln("Caching: Elapsed: " + deltaTime / 1000.0 + " seconds");
495 
496         deltaTime = System.currentTimeMillis();
497         for (int j = 0; j < 2; ++j) {
498             for (Iterator<String> it = english.iterator(); it.hasNext(); ) {
499                 String dpath = it.next();
500                 String value = english.getStringValue(dpath);
501                 Set<String> paths = english.getPathsWithValue(value, "", null, null);
502                 if (paths.size() == 0) {
503                     continue;
504                 }
505                 if (!paths.contains(dpath)) {
506                     if (DISABLE_TIL_WORKS) {
507                         errln("Missing " + dpath + " in " + pathsWithValues(value, paths));
508                     }
509                 }
510                 if (paths.size() > 1) {
511                     Set<String> nonAliased = getNonAliased(paths, english);
512                     if (nonAliased.size() > 1) {
513                         logln(pathsWithValues(value, nonAliased));
514                     }
515                 }
516             }
517         }
518         deltaTime = System.currentTimeMillis() - deltaTime;
519         logln("Elapsed: " + deltaTime / 1000.0 + " seconds");
520     }
521 
pathsWithValues(String value, Set<String> paths)522     private String pathsWithValues(String value, Set<String> paths) {
523         return paths.size()
524                 + " paths with: <"
525                 + value
526                 + ">\t\tPaths: "
527                 + paths.iterator().next()
528                 + ",...";
529     }
530 
getNonAliased(Set<String> paths, CLDRFile file)531     private Set<String> getNonAliased(Set<String> paths, CLDRFile file) {
532         Set<String> result = new LinkedHashSet<>();
533         for (String path : paths) {
534             if (file.isHere(path)) {
535                 result.add(path);
536             }
537         }
538         return result;
539     }
540 
testResolution()541     public void testResolution() {
542         CLDRFile german = testInfo.getCLDRFile("de", true);
543         CLDRFile bal = testInfo.getCLDRFile("bal", true);
544         // Test direct lookup.
545 
546         String xpath = "//ldml/localeDisplayNames/localeDisplayPattern/localeSeparator";
547         String id = bal.getSourceLocaleID(xpath, null);
548         if (!id.equals("bal")) {
549             errln("Expected bal but was " + id + " for " + xpath);
550         }
551 
552         // Test aliasing.
553         xpath =
554                 "//ldml/dates/calendars/calendar[@type=\"islamic-civil\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"yyyyMEd\"]";
555         id = german.getSourceLocaleID(xpath, null);
556         if (!id.equals("de")) {
557             errln("Expected de but was " + id + " for " + xpath);
558         }
559 
560         // Test lookup that falls to root.
561         xpath =
562                 "//ldml/dates/calendars/calendar[@type=\"coptic\"]/months/monthContext[@type=\"stand-alone\"]/monthWidth[@type=\"narrow\"]/month[@type=\"5\"]";
563         id = german.getSourceLocaleID(xpath, null);
564         if (!id.equals("root")) {
565             errln("Expected root but was " + id + " for " + xpath);
566         }
567     }
568 
569     static final NumberFormat percent = NumberFormat.getPercentInstance();
570 
571     static final class Size {
572         int items;
573         int chars;
574 
add(String topValue)575         public void add(String topValue) {
576             items++;
577             chars += topValue.length();
578         }
579 
over(Size base)580         public String over(Size base) {
581             return "items: "
582                     + items
583                     + "("
584                     + percent.format(items / (0.0 + base.items))
585                     + "); "
586                     + "chars: "
587                     + chars
588                     + "("
589                     + percent.format(chars / (0.0 + base.chars))
590                     + ")";
591         }
592     }
593 
testGeorgeBailey()594     public void testGeorgeBailey() {
595         PathHeader.Factory phf = PathHeader.getFactory(testInfo.getEnglish());
596         for (String locale : Arrays.asList("de", "de_AT", "en", "nl")) {
597             CLDRFile cldrFile = testInfo.getCLDRFile(locale, true);
598 
599             CLDRFile cldrFileUnresolved = testInfo.getCLDRFile(locale, false);
600             Status status = new Status();
601             Output<String> localeWhereFound = new Output<>();
602             Output<String> pathWhereFound = new Output<>();
603 
604             Map<String, String> diff = new TreeMap<>(CLDRFile.getComparator(DtdType.ldml));
605 
606             Size countSuperfluous = new Size();
607             Size countExtraLevel = new Size();
608             Size countOrdinary = new Size();
609 
610             for (String path : cldrFile.fullIterable()) {
611                 String baileyValue =
612                         cldrFile.getBaileyValue(path, pathWhereFound, localeWhereFound);
613                 String topValue = cldrFileUnresolved.getStringValue(path);
614                 String resolvedValue = cldrFile.getStringValue(path);
615                 String unresolvedConstructedValue = cldrFileUnresolved.getConstructedValue(path);
616                 String resolvedConstructedValue = cldrFile.getConstructedValue(path);
617 
618                 // assertEquals("x≠y", "x", "y"); // expected x, got y
619                 if (unresolvedConstructedValue != null) {
620                     assertEquals(
621                             "uc≠rc\t" + locale + "\t" + phf.fromPath(path),
622                             unresolvedConstructedValue,
623                             resolvedConstructedValue);
624                 }
625 
626                 // if there is a value, then either it is at the top level or it
627                 // is the bailey value.
628                 // OR it is INHERITANCE_MARKER
629 
630                 if (resolvedValue != null) {
631                     if (topValue != null && !CldrUtility.INHERITANCE_MARKER.equals(topValue)) {
632                         if (!topValue.equals(cldrFileUnresolved.getConstructedValue(path))) {
633                             assertEquals(
634                                     "top≠resolved\t" + locale + "\t" + phf.fromPath(path),
635                                     topValue,
636                                     resolvedValue);
637                         }
638                     } else {
639                         String locale2 = cldrFile.getSourceLocaleID(path, status);
640                         if (!assertEquals(
641                                 "bailey value≠\t" + locale + "\t" + phf.fromPath(path),
642                                 resolvedValue,
643                                 baileyValue)) {
644                             baileyValue =
645                                     cldrFile.getBaileyValue(path, pathWhereFound, localeWhereFound);
646                             topValue = cldrFileUnresolved.getStringValue(path);
647                         }
648                         if (!assertEquals(
649                                 "bailey locale≠\t" + locale + "\t" + phf.fromPath(path),
650                                 locale2,
651                                 localeWhereFound.value)) {
652                             baileyValue =
653                                     cldrFile.getBaileyValue(path, pathWhereFound, localeWhereFound);
654                             topValue = cldrFileUnresolved.getStringValue(path);
655                         }
656                         if (!assertEquals(
657                                 "bailey path≠\t" + locale + "\t" + phf.fromPath(path),
658                                 status.pathWhereFound,
659                                 pathWhereFound.value)) {
660                             baileyValue =
661                                     cldrFile.getBaileyValue(path, pathWhereFound, localeWhereFound);
662                             topValue = cldrFileUnresolved.getStringValue(path);
663                         }
664                     }
665                 }
666 
667                 if (topValue != null) {
668                     if (CldrUtility.equals(topValue, baileyValue)) {
669                         countSuperfluous.add(topValue);
670                     } else if (sdi.getCoverageLevel(path, locale).compareTo(Level.MODERN) > 0) {
671                         countExtraLevel.add(topValue);
672                     }
673                     countOrdinary.add(topValue);
674                 }
675             }
676             logln("Superfluous (" + locale + "):\t" + countSuperfluous.over(countOrdinary));
677             logln(">Modern (" + locale + "):\t" + countExtraLevel.over(countOrdinary));
678             for (Entry<String, String> entry : diff.entrySet()) {
679                 logln(locale + "\t" + phf.fromPath(entry.getKey()) + ";\t" + entry.getValue());
680             }
681         }
682     }
683 
TestConstructedValue()684     public void TestConstructedValue() {
685         CLDRFile eng = CLDRConfig.getInstance().getEnglish();
686 
687         String prefix = GlossonymConstructor.PATH_PREFIX;
688         String display = eng.getConstructedValue(prefix + "zh_Hans" + "\"]");
689         assertEquals("contructed value", "Chinese (Simplified)", display);
690         display = eng.getConstructedValue(prefix + "es_US" + "\"]");
691         assertEquals("contructed value", "Spanish (United States)", display);
692         display = eng.getConstructedValue(prefix + "es_US" + "\"][@alt=\"short\"]");
693         assertEquals("contructed value", "Spanish (US)", display);
694         display = eng.getConstructedValue(prefix + "es" + "\"]");
695         assertEquals("contructed value", null, display);
696         display = eng.getConstructedValue(prefix + "missing" + "\"]");
697         assertEquals("contructed value", null, display);
698     }
699 
TestFileLocations()700     public void TestFileLocations() {
701         File mainDir = new File(CLDRPaths.MAIN_DIRECTORY);
702         if (!mainDir.isDirectory()) {
703             throw new IllegalArgumentException(
704                     "MAIN_DIRECTORY is not a directory: " + CLDRPaths.MAIN_DIRECTORY);
705         }
706         File mainCollationDir = new File(CLDRPaths.COLLATION_DIRECTORY);
707         if (!mainCollationDir.isDirectory()) {
708             throw new IllegalArgumentException(
709                     "COLLATION_DIRECTORY is not a directory: " + CLDRPaths.COLLATION_DIRECTORY);
710         }
711         if (CLDRConfig.SKIP_SEED) {
712             return;
713         }
714         File seedDir = new File(CLDRPaths.SEED_DIRECTORY);
715         if (!seedDir.isDirectory()) {
716             throw new IllegalArgumentException(
717                     "SEED_DIRECTORY is not a directory: " + CLDRPaths.SEED_DIRECTORY);
718         }
719         File seedCollationDir = new File(CLDRPaths.SEED_COLLATION_DIRECTORY);
720         if (!seedCollationDir.isDirectory()) {
721             throw new IllegalArgumentException(
722                     "SEED_COLLATION_DIRECTORY is not a directory: "
723                             + CLDRPaths.SEED_COLLATION_DIRECTORY);
724         }
725 
726         File[] md = {mainDir, mainCollationDir};
727         File[] sd = {seedDir, seedCollationDir};
728         Factory mf = SimpleFactory.make(md, ".*", DraftStatus.unconfirmed);
729         Factory sf = SimpleFactory.make(sd, ".*", DraftStatus.unconfirmed);
730         Set<CLDRLocale> mainLocales = mf.getAvailableCLDRLocales();
731         Set<CLDRLocale> seedLocales = sf.getAvailableCLDRLocales();
732         mainLocales.retainAll(seedLocales);
733         mainLocales.remove(CLDRLocale.getInstance("root")); // allow multiple roots
734         if (!mainLocales.isEmpty()) {
735             errln(
736                     "CLDR locale files located in both common and seed ==> "
737                             + mainLocales.toString());
738         }
739     }
740 
TestForStrayFiles()741     public void TestForStrayFiles() {
742         TreeSet<String> mainList =
743                 new TreeSet<>(Arrays.asList(new File(CLDRPaths.MAIN_DIRECTORY).list()));
744 
745         for (String dir : DtdType.ldml.directories) {
746             Set<String> dirFiles =
747                     new TreeSet<>(
748                             Arrays.asList(
749                                     new File(CLDRPaths.BASE_DIRECTORY + "common/" + dir).list()));
750             if (!mainList.containsAll(dirFiles)) {
751                 dirFiles.removeAll(mainList);
752                 errln(dir + "/ has extra files not in main/: " + dirFiles);
753             }
754         }
755     }
756 
TestFileIds()757     public void TestFileIds() {
758         Output<Map<String, Multimap<LdmlDir, Source>>> localeToDirToSource = new Output<>();
759         Map<LdmlDir, Multimap<String, Source>> dirToLocaleToSource = getFiles(localeToDirToSource);
760 
761         for (Entry<String, Multimap<LdmlDir, Source>> e : localeToDirToSource.value.entrySet()) {
762             String locale = e.getKey();
763             if (locale.equals("root")) {
764                 continue; // allow multiple root locales
765             }
766             Map<LdmlDir, Collection<Source>> value = e.getValue().asMap();
767             for (Entry<LdmlDir, Collection<Source>> e2 : value.entrySet()) {
768                 LdmlDir dir = e2.getKey();
769                 Collection<Source> sources = e2.getValue();
770                 if (sources.size() != 1) {
771                     errln(
772                             "Can only one have 1 instance of "
773                                     + locale
774                                     + " in "
775                                     + dir
776                                     + ", but have in "
777                                     + sources);
778                 }
779             }
780         }
781 
782         LikelySubtags likelySubtags = new LikelySubtags();
783 
784         for (Entry<LdmlDir, Multimap<String, Source>> dirAndLocaleToSource :
785                 dirToLocaleToSource.entrySet()) {
786             LdmlDir ldmlDir = dirAndLocaleToSource.getKey();
787             Multimap<String, Source> localesToDirs = dirAndLocaleToSource.getValue();
788             for (Entry<String, Source> localeAndDir : localesToDirs.entries()) {
789                 String loc = localeAndDir.getKey();
790                 if (loc.equals("root")) {
791                     continue;
792                 }
793                 Source source = localeAndDir.getValue();
794                 String parent = LocaleIDParser.getParent(loc);
795                 String parent2 = LanguageTagParser.getSimpleParent(loc);
796                 if (parent2.isEmpty()) {
797                     parent2 = "root";
798                 }
799                 String likely = likelySubtags.minimize(loc);
800                 if (!localesToDirs.containsKey(parent)) {
801                     errln(
802                             "Missing parent ("
803                                     + parent
804                                     + ") for "
805                                     + loc
806                                     + "  in "
807                                     + source
808                                     + "/"
809                                     + ldmlDir
810                                     + "; likely="
811                                     + likely);
812                 }
813                 if (!Objects.equals(parent, parent2) && !localesToDirs.containsKey(parent2)) {
814                     errln(
815                             "Missing simple parent ("
816                                     + parent2
817                                     + ") for "
818                                     + loc
819                                     + "  in "
820                                     + source
821                                     + "/"
822                                     + ldmlDir
823                                     + "; likely="
824                                     + likely);
825                 }
826             }
827 
828             // establish that the parent of locale is somewhere in the same
829             //                assertEquals(dir + " locale file has minimal id: ", min, loc);
830             //            if (!dir.endsWith("exemplars")) {
831             //                continue;
832             //            }
833             //            String trans = ltc.transform(loc);
834             //            System.out.println("\t" + min + "\t" + loc + "\t" + trans);
835         }
836     }
837 
838     enum Source {
839         common,
840         seed,
841         exemplars
842     }
843 
844     enum LdmlDir {
845         main,
846         annotations,
847         annotationsDerived,
848         casing,
849         collation,
850         rbnf,
851         segments,
852         subdivisions
853     }
854 
855     /**
856      * Returns a map from directory (eg main) to its parent (eg seed) and to their children (locales
857      * in seed/main)
858      */
getFiles( Output<Map<String, Multimap<LdmlDir, Source>>> localeToDirToSource)859     private Map<LdmlDir, Multimap<String, Source>> getFiles(
860             Output<Map<String, Multimap<LdmlDir, Source>>> localeToDirToSource) {
861 
862         Map<LdmlDir, Multimap<String, Source>> _dirToLocaleToSource = new TreeMap<>();
863         Map<String, Multimap<LdmlDir, Source>> _localeToDirToSource = new TreeMap<>();
864 
865         for (String base : new File(CLDRPaths.BASE_DIRECTORY).list()) {
866             Source source;
867             try {
868                 source = Source.valueOf(base);
869             } catch (Exception e) {
870                 continue;
871             }
872             String fullBase = CLDRPaths.BASE_DIRECTORY + base;
873             File fullBaseFile = new File(fullBase);
874             if (!fullBaseFile.isDirectory()) {
875                 continue;
876             }
877 
878             for (String sub1 : fullBaseFile.list()) {
879                 if (!DtdType.ldml.directories.contains(sub1)) {
880                     continue;
881                 }
882                 LdmlDir ldmlDir = LdmlDir.valueOf(sub1);
883                 String dir = fullBase + "/" + ldmlDir;
884                 for (String loc : new File(dir).list()) {
885                     if (!loc.endsWith(".xml")) {
886                         continue;
887                     }
888                     loc = loc.substring(0, loc.length() - 4);
889 
890                     put(_localeToDirToSource, loc, ldmlDir, source);
891                     put(_dirToLocaleToSource, ldmlDir, loc, source);
892                 }
893             }
894         }
895         localeToDirToSource.value =
896                 ImmutableMap.copyOf(_localeToDirToSource); // TODO protect subtrees
897         return ImmutableMap.copyOf(_dirToLocaleToSource);
898     }
899 
put(Map<A, Multimap<B, C>> aToBToC, A a, B b, C c)900     private <A, B, C> void put(Map<A, Multimap<B, C>> aToBToC, A a, B b, C c) {
901         Multimap<B, C> dirToSource = aToBToC.get(a);
902         if (dirToSource == null) {
903             aToBToC.put(a, dirToSource = (Multimap<B, C>) TreeMultimap.create());
904         }
905         dirToSource.put(b, c);
906     }
907 
TestSwissHighGerman()908     public void TestSwissHighGerman() {
909         CLDRFile swissHighGerman = testInfo.getCommonSeedExemplarsFactory().make("de_CH", true);
910         for (String xpath : swissHighGerman) {
911             if (xpath.equals("//ldml/characters/exemplarCharacters[@type=\"auxiliary\"]")) {
912                 continue;
913             }
914             String value = swissHighGerman.getStringValue(xpath);
915             if (value.indexOf('ß') >= 0) {
916                 warnln("«" + value + "» contains ß at " + xpath);
917             }
918         }
919     }
920 
TestExtraPaths()921     public void TestExtraPaths() {
922         List<String> testCases =
923                 Arrays.asList(
924                         "//ldml/localeDisplayNames/languages/language[@type=\"ccp\"]",
925                         "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"Gy\"]/greatestDifference[@id=\"G\"]");
926         CLDRFile af = testInfo.getCldrFactory().make("af", true);
927         Set<String> missing = new HashSet<>(testCases);
928         CoverageLevel2 coverageLevel2 = CoverageLevel2.getInstance("af");
929         PathHeader.Factory pathHeaderFactory = PathHeader.getFactory(testInfo.getEnglish());
930         Status status = new Status();
931 
932         for (String xpath : af) {
933             if (missing.contains(xpath)) {
934                 String value = af.getStringValue(xpath);
935                 String source = af.getSourceLocaleID(xpath, status);
936                 Level level = coverageLevel2.getLevel(xpath);
937                 PathHeader ph = pathHeaderFactory.fromPath(xpath);
938                 getLogger()
939                         .fine(
940                                 ""
941                                         + "\nPathHeader:\t"
942                                         + ph
943                                         + "\nValue:\t"
944                                         + value
945                                         + "\nLevel:\t"
946                                         + level
947                                         + "\nReq. Locale:\t"
948                                         + "af"
949                                         + "\nSource Locale:\t"
950                                         + source
951                                         + "\nReq. XPath:\t"
952                                         + xpath
953                                         + "\nSource Path:\t"
954                                         + status);
955                 missing.remove(xpath);
956             }
957         }
958         assertTrue("Should be empty", missing.isEmpty());
959     }
960 
TestExtraPaths13954()961     public void TestExtraPaths13954() {
962         CLDRFile es = cldrFactory.make("es", true);
963         CLDRFile es_US = cldrFactory.make("es_US", true);
964         if (!es_US.getRawExtraPaths().containsAll(es.getRawExtraPaths())) {
965             errln(
966                     "Failure: "
967                             + Joiner.on('\n')
968                                     .join(
969                                             Sets.difference(
970                                                     es.getRawExtraPaths(),
971                                                     es_US.getRawExtraPaths())));
972         }
973     }
974 
testEnglishSideways()975     public void testEnglishSideways() {
976         CLDRFile fr = cldrFactory.make("fr", true);
977         CLDRFile en = cldrFactory.make("en", true);
978         System.out.println();
979         for (String path : fr.fullIterable()) {
980             if (!path.startsWith("//ldml/units") || path.endsWith("/gender")) {
981                 continue;
982             }
983             Status status = new Status();
984             String localeWhereFound = en.getSourceLocaleID(path, status);
985             if (!Objects.equals(path, status.pathWhereFound)) {
986                 XPathParts pathParts = XPathParts.getFrozenInstance(path);
987                 String type = pathParts.getAttributeValue(3, "type");
988                 XPathParts foundPathParts = XPathParts.getFrozenInstance(status.pathWhereFound);
989                 String foundType = foundPathParts.getAttributeValue(3, "type");
990                 if (Objects.equals(type, foundType)) {
991                     continue; // ok to go sideways within type
992                 }
993                 System.out.println(
994                         String.format("%s\t%s\t%s", path, status.pathWhereFound, localeWhereFound));
995             }
996         }
997     }
998 }
999