• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.unittest;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.collect.ImmutableMap;
5 import com.google.common.collect.Multimap;
6 import com.google.common.collect.Sets;
7 import com.google.common.collect.TreeMultimap;
8 import com.ibm.icu.dev.test.TestFmwk;
9 import com.ibm.icu.impl.Relation;
10 import com.ibm.icu.impl.Row.R2;
11 import com.ibm.icu.text.NumberFormat;
12 import com.ibm.icu.text.UTF16;
13 import com.ibm.icu.text.UnicodeSet;
14 import com.ibm.icu.util.Output;
15 import java.io.File;
16 import java.util.ArrayList;
17 import java.util.Arrays;
18 import java.util.Collection;
19 import java.util.HashSet;
20 import java.util.Iterator;
21 import java.util.LinkedHashMap;
22 import java.util.LinkedHashSet;
23 import java.util.List;
24 import java.util.Map;
25 import java.util.Map.Entry;
26 import java.util.Objects;
27 import java.util.Set;
28 import java.util.TreeMap;
29 import java.util.TreeSet;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32 import org.unicode.cldr.test.CoverageLevel2;
33 import org.unicode.cldr.tool.LikelySubtags;
34 import org.unicode.cldr.util.CLDRConfig;
35 import org.unicode.cldr.util.CLDRFile;
36 import org.unicode.cldr.util.CLDRFile.DraftStatus;
37 import org.unicode.cldr.util.CLDRFile.Status;
38 import org.unicode.cldr.util.CLDRLocale;
39 import org.unicode.cldr.util.CLDRPaths;
40 import org.unicode.cldr.util.CldrUtility;
41 import org.unicode.cldr.util.Counter;
42 import org.unicode.cldr.util.DtdType;
43 import org.unicode.cldr.util.Factory;
44 import org.unicode.cldr.util.GlossonymConstructor;
45 import org.unicode.cldr.util.GrammarInfo;
46 import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature;
47 import org.unicode.cldr.util.GrammarInfo.GrammaticalTarget;
48 import org.unicode.cldr.util.LanguageTagParser;
49 import org.unicode.cldr.util.Level;
50 import org.unicode.cldr.util.LocaleIDParser;
51 import org.unicode.cldr.util.PathHeader;
52 import org.unicode.cldr.util.PathHeader.PageId;
53 import org.unicode.cldr.util.PathHeader.SectionId;
54 import org.unicode.cldr.util.PatternCache;
55 import org.unicode.cldr.util.PatternPlaceholders;
56 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderStatus;
57 import org.unicode.cldr.util.SimpleFactory;
58 import org.unicode.cldr.util.SupplementalDataInfo;
59 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
60 import org.unicode.cldr.util.SupplementalDataInfo.PluralType;
61 import org.unicode.cldr.util.XPathParts;
62 
63 /**
64  * This is the original TestFwmk test case for CLDRFile.
65  *
66  * @see {@link org.unicode.cldr.util.TestCLDRFile}
67  * @see {@link org.unicode.cldr.util.CLDRFile}
68  */
69 public class TestCLDRFile extends TestFmwk {
70     private static final boolean DISABLE_TIL_WORKS = false;
71 
72     static CLDRConfig testInfo = CLDRConfig.getInstance();
73     static SupplementalDataInfo sdi = testInfo.getSupplementalDataInfo();
74 
main(String[] args)75     public static void main(String[] args) {
76         new TestCLDRFile().run(args);
77     }
78 
testFallbackNames()79     public void testFallbackNames() {
80         String[][] tests = {
81             {"zh-Hanb", "Chinese (Han with Bopomofo)"},
82             {"aaa", "Ghotuo"},
83             {"zh-RR", "Chinese (RR)"},
84             {"new_Newa_NP", "Newari (Newa, Nepal)"},
85         };
86         CLDRFile english = testInfo.getEnglish();
87         for (String[] test : tests) {
88             assertEquals("", test[1], english.getName(test[0]));
89         }
90     }
91 
92     // verify for all paths, if there is a count="other", then there is a
93     // count="x", for all x in keywords
testPlurals()94     public void testPlurals() {
95         for (String locale : new String[] {"fr", "en", "root", "ar", "ja"}) {
96             checkPlurals(locale);
97         }
98     }
99 
100     static final Pattern COUNT_MATCHER = Pattern.compile("\\[@count=\"([^\"]+)\"]");
101 
102     static final UnicodeSet DIGITS = new UnicodeSet('0', '9').freeze();
103 
checkPlurals(String locale)104     private void checkPlurals(String locale) {
105         CLDRFile cldrFile = testInfo.getCLDRFile(locale, true);
106         Matcher m = COUNT_MATCHER.matcher("");
107         Relation<String, String> skeletonToKeywords =
108                 Relation.of(
109                         new TreeMap<String, Set<String>>(cldrFile.getComparator()), TreeSet.class);
110         PluralInfo plurals = sdi.getPlurals(PluralType.cardinal, locale);
111         for (String path : cldrFile.fullIterable()) {
112             if (!path.contains("@count")) {
113                 continue;
114             }
115             if (!m.reset(path).find()) {
116                 throw new IllegalArgumentException();
117             }
118             if (DIGITS.containsAll(m.group(1))) {
119                 continue;
120             }
121             String skeleton = path.substring(0, m.start(1)) + ".*" + path.substring(m.end(1));
122             skeletonToKeywords.put(skeleton, m.group(1));
123         }
124         Set<String> normalKeywords = plurals.getAdjustedCountStrings();
125 
126         for (Entry<String, Set<String>> entry : skeletonToKeywords.keyValuesSet()) {
127             final String abbreviatedPath = entry.getKey();
128             Set<String> expected = normalKeywords;
129             if (abbreviatedPath.startsWith("//ldml/numbers/minimalPairs/pluralMinimalPairs")) {
130                 expected = plurals.getCanonicalKeywords();
131             }
132             assertEquals(
133                     "Incorrect keywords: " + locale + ", " + abbreviatedPath,
134                     expected,
135                     entry.getValue());
136         }
137     }
138 
139     static Factory cldrFactory = testInfo.getCldrFactory();
140 
141     static class LocaleInfo {
142         final String locale;
143         final CLDRFile cldrFile;
144         final Set<String> paths = new HashSet<>();
145 
LocaleInfo(String locale)146         LocaleInfo(String locale) {
147             this.locale = locale;
148             cldrFile = testInfo.getCLDRFile(locale, true);
149             for (String path : cldrFile.fullIterable()) {
150                 Level level = sdi.getCoverageLevel(path, locale);
151                 if (level.compareTo(Level.COMPREHENSIVE) > 0) {
152                     continue;
153                 }
154                 if (path.contains("[@count=") && !path.contains("[@count=\"other\"]")) {
155                     continue;
156                 }
157                 paths.add(path);
158             }
159         }
160     }
161 
162     static final boolean DEBUG = false;
163     static final boolean DEBUG_testExtraPaths = true;
164 
testExtraPaths()165     public void testExtraPaths() {
166         // for debugging
167         final CLDRFile german = CLDRConfig.getInstance().getCldrFactory().make("de", true);
168         getLogger().fine("");
169         Set<String> sorted = new TreeSet<>(german.getExtraPaths());
170         PathHeader.Factory phf = PathHeader.getFactory();
171         PatternPlaceholders pph = PatternPlaceholders.getInstance();
172 
173         for (String path : sorted) {
174             if (DEBUG_testExtraPaths
175                     && path.equals(
176                             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"acceleration-g-force\"]/unitPattern")) {
177                 List<String> failures = new ArrayList<>();
178                 phf.fromPath(path, failures);
179             }
180             PathHeader ph = phf.fromPath(path);
181             if (ph.getPageId() != PageId.Deprecated) {
182                 assertNotEquals(
183                         "bad placeholder: " + path + " ; " + ph,
184                         SectionId.Special,
185                         ph.getSectionId());
186             }
187             PlaceholderStatus phStatus = pph.getStatus(path);
188 
189             PlaceholderStatus expected =
190                     path.contains("/metazone")
191                                     || path.contains("/timeZoneNames")
192                                     || path.contains("/gender")
193                                     || path.startsWith("//ldml/numbers/currencies/currency")
194                                     || path.startsWith("//ldml/personNames/sampleName")
195                                     || path.contains("/availableFormats")
196                             ? PlaceholderStatus.DISALLOWED
197                             : path.contains("/compoundUnitPattern1")
198                                     ? PlaceholderStatus.REQUIRED
199                                     : PlaceholderStatus.LOCALE_DEPENDENT;
200             if (!assertEquals(path, expected, phStatus)) {
201                 int debug = 0;
202             }
203 
204             if (DEBUG) {
205                 if (GrammaticalFeature.pathHasFeature(path) != null || path.endsWith("/gender")) {
206                     System.out.println(path + "\t" + german.getStringValue(path));
207                     String newPath = path.replace("[@case=\"accusative\"]", "");
208                     if (!newPath.contentEquals(path) && !sorted.contains(newPath)) {
209                         System.out.println(newPath + "\t" + german.getStringValue(newPath));
210                     }
211                 }
212             }
213         }
214 
215         Set<String> badCoverage = new TreeSet<>();
216         Counter<String> extraPaths = new Counter<>();
217         final Factory fullCldrFactory = CLDRConfig.getInstance().getFullCldrFactory();
218 
219         for (String locale : sdi.hasGrammarInfo()) {
220             if (sdi.getGrammarInfo(locale).hasInfo(GrammaticalTarget.nominal)) {
221                 if (!fullCldrFactory.getAvailable().contains(locale)) {
222                     if (CLDRConfig.SKIP_SEED) {
223                         continue; // don't check if skipping seed, for production
224                     }
225                 }
226                 final CLDRFile cldrFile = fullCldrFactory.make(locale, true);
227                 Set<String> sorted2 = new TreeSet<>(cldrFile.getExtraPaths());
228                 for (String path : sorted2) {
229                     if (path.contains("speed-beaufort")) {
230                         continue; // special case
231                     }
232                     if (path.contains("/gender")
233                             || path.contains("@gender")
234                             || path.contains("@case")) {
235                         Level level = sdi.getCoverageLevel(path, locale);
236                         if (level.compareTo(Level.MODERN) > 0) {
237                             badCoverage.add(path);
238                         }
239                         extraPaths.add(locale, 1);
240                     }
241                 }
242             }
243         }
244         getLogger().fine("Units with grammar info: " + GrammarInfo.getUnitsToAddGrammar().size());
245         getLogger().fine("Inflection Paths");
246         for (R2<Long, String> locale : extraPaths.getEntrySetSortedByCount(false, null)) {
247             getLogger().fine(locale.get0() + "\t" + locale.get1());
248         }
249         if (!badCoverage.isEmpty()) {
250             errln("Paths not at modern: " + Joiner.on("\n\t").join(badCoverage));
251         }
252 
253         //        Set<String> validUnits =
254         // Validity.getInstance().getStatusToCodes(LstrType.unit).get(Validity.Status.regular);
255         //        validUnits.forEach(System.out::println);
256         //
257         //        grammarInfo = testInfo.getSupplementalDataInfo().getGrammarInfo();
258         //        for (Entry<String, GrammarInfo> entry : grammarInfo.entrySet()) {
259         //            System.out.println(entry);
260         //        }
261 
262         //        CLDRFile toCheck = testInfo.getCldrFactory().make("de", true); //
263         // testInfo.getFullCldrFactory().make("en", false);
264         //        Set<String> sorted = new TreeSet<>(CLDRFile.getComparator(DtdType.ldml));
265         //        sorted.addAll(toCheck.getExtraPaths());
266         //        for (String path : sorted) {
267         //            XPathParts parts = XPathParts.getFrozenInstance(path);
268         //            assertEquals("parts: ", parts.toString(), path);
269         //            System.out.println(path);
270         //        }
271         //        int debug = 0;
272 
273         Map<String, LocaleInfo> localeInfos = new LinkedHashMap<>();
274         Relation<String, String> missingPathsToLocales =
275                 Relation.of(
276                         new TreeMap<String, Set<String>>(CLDRFile.getComparator(DtdType.ldml)),
277                         TreeSet.class);
278         Relation<String, String> extraPathsToLocales =
279                 Relation.of(
280                         new TreeMap<String, Set<String>>(CLDRFile.getComparator(DtdType.ldml)),
281                         TreeSet.class);
282 
283         for (String locale : new String[] {"en", "root", "fr", "ar", "ja"}) {
284             localeInfos.put(locale, new LocaleInfo(locale));
285         }
286         LocaleInfo englishInfo = localeInfos.get("en");
287         for (String path : englishInfo.paths) {
288             if (path.startsWith("//ldml/identity/")
289                     || path.startsWith("//ldml/numbers/currencies/currency[@type=")
290                     // || path.startsWith("//ldml/dates/calendars/calendar") &&
291                     // !path.startsWith("//ldml/dates/calendars/calendar[@type=\"gregorian\"]")
292                     // ||
293                     // path.startsWith("//ldml/numbers/currencyFormats[@numberSystem=")
294                     // &&
295                     // !path.startsWith("//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]")
296                     || (path.contains("[@count=") && !path.contains("[@count=\"other\"]"))
297                     || (path.contains("[@ordinal=") && !path.contains("[@ordinal=\"other\"]"))
298                     || path.contains("dayPeriod[@type=\"noon\"]")) {
299                 continue;
300             }
301             for (LocaleInfo localeInfo : localeInfos.values()) {
302                 if (localeInfo == englishInfo) {
303                     continue;
304                 }
305                 if (!localeInfo.paths.contains(path)) {
306                     if (path.startsWith("//ldml/dates/calendars/calendar")
307                                     && !(path.contains("[@type=\"generic\"]")
308                                             || path.contains("[@type=\"gregorian\"]"))
309                             || (path.contains("/eras/")
310                                     && path.contains("[@alt=\"variant\"]")) // it is OK
311                             // for
312                             // just
313                             // "en"
314                             // to
315                             // have
316                             // /eras/.../era[@type=...][@alt="variant"]
317                             || path.contains("[@type=\"japanese\"]")
318                             || path.contains("[@type=\"coptic\"]")
319                             || path.contains("[@type=\"hebrew\"]")
320                             || path.contains("[@type=\"islamic-rgsa\"]")
321                             || path.contains("[@type=\"islamic-umalqura\"]")
322                             || path.contains("/relative[@type=\"-2\"]")
323                             || path.contains("/relative[@type=\"2\"]")
324                             || path.startsWith("//ldml/contextTransforms/contextTransformUsage")
325                             || path.contains("[@alt=\"variant\"]")
326                             || path.contains("[@alt=\"formal\"]")
327                             || path.contains("[@type=\"pressure-gasoline-energy-density\"]")
328                             || (path.contains("dayPeriod[@type=")
329                                     && (path.endsWith("1\"]")
330                                             || path.endsWith("\"am\"]")
331                                             || path.endsWith("\"pm\"]")
332                                             || path.endsWith(
333                                                     "\"midnight\"]"))) // morning1, afternoon1, ...
334                             || (localeInfo.locale.equals("root")
335                                     && (path.startsWith(
336                                                     "//ldml/characters/exemplarCharacters[@type=\"index\"]")
337                                             || (path.startsWith("//ldml/units/unitLength") // two
338                                                     // aliased
339                                                     // paths in
340                                                     // root
341                                                     && (path.contains("[@type=\"energy-foodcalorie")
342                                                             || path.contains(
343                                                                     "[@type=\"graphics-dot")))))
344                     // //ldml/characters/exemplarCharacters[@type="index"][root]
345                     ) {
346                         continue;
347                     }
348                     String localeAndStatus =
349                             localeInfo.locale + (englishInfo.cldrFile.isHere(path) ? "" : "*");
350                     missingPathsToLocales.put(path, localeAndStatus);
351                     // English contains the path, and the target locale doesn't.
352                     // The * means that the value is inherited (eg from root).
353                 }
354             }
355         }
356 
357         for (LocaleInfo localeInfo : localeInfos.values()) {
358             if (localeInfo == englishInfo) {
359                 continue;
360             }
361             for (String path : localeInfo.paths) {
362                 if (path.contains("[@numberSystem=\"arab\"]")
363                         || path.contains("[@type=\"japanese\"]")
364                         || path.contains("[@type=\"coptic\"]")
365                         || path.contains("[@type=\"hebrew\"]")
366                         || path.contains("[@type=\"islamic-rgsa\"]")
367                         || path.contains("[@type=\"islamic-umalqura\"]")
368                         || path.contains("/relative[@type=\"-2\"]")
369                         || path.contains("/relative[@type=\"2\"]")) {
370                     continue;
371                 }
372                 if (!englishInfo.paths.contains(path)) {
373                     String localeAndStatus =
374                             localeInfo.locale + (localeInfo.cldrFile.isHere(path) ? "" : "*");
375                     extraPathsToLocales.put(path, localeAndStatus);
376                     // English doesn't contains the path, and the target locale does.
377                     // The * means that the value is inherited (eg from root).
378                 }
379             }
380         }
381 
382         for (Entry<String, Set<String>> entry : missingPathsToLocales.keyValuesSet()) {
383             String path = entry.getKey();
384             Set<String> locales = entry.getValue();
385             Status status = new Status();
386             String originalLocale = englishInfo.cldrFile.getSourceLocaleID(path, status);
387             String engName =
388                     "en"
389                             + (englishInfo.cldrFile.isHere(path)
390                                     ? ""
391                                     : " (source_locale:"
392                                             + originalLocale
393                                             + (path.equals(status.pathWhereFound)
394                                                     ? ""
395                                                     : ", source_path: " + status)
396                                             + ")");
397             if (path.startsWith("//ldml/localeDisplayNames/")
398                     || path.contains("[@alt=\"accounting\"]")
399                     || path.contains("[@alt=\"alphaNextToNumber\"]") // CLDR-14336
400                     || path.contains("[@alt=\"ascii\"]") // CLDR-16606
401                     || path.contains("[@alt=\"noCurrency\"]") // CLDR-14336
402                     || path.startsWith("//ldml/personNames/") // CLDR-15384
403                     || path.startsWith("//ldml/typographicNames/styleName")
404                     || path.startsWith("//ldml/units")) {
405                 logln("+" + engName + ", -" + locales + "\t" + path);
406             } else {
407                 errln("+" + engName + ", -" + locales + "\t" + path);
408             }
409         }
410         for (Entry<String, Set<String>> entry : extraPathsToLocales.keyValuesSet()) {
411             String path = entry.getKey();
412             Set<String> locales = entry.getValue();
413             if (path.startsWith("//ldml/localeDisplayNames/")
414                     || path.startsWith("//ldml/numbers/otherNumberingSystems/")
415             // || path.contains("[@alt=\"accounting\"]")
416             ) {
417                 logln("-en, +" + locales + "\t" + path);
418             } else {
419                 logln("-en, +" + locales + "\t" + path);
420             }
421         }
422 
423         // for (String locale : new String[] { "fr", "ar", "ja" }) {
424         // CLDRFile cldrFile = cldrFactory.make(locale, true);
425         // Set<String> s = (Set<String>) cldrFile.getExtraPaths(new
426         // TreeSet<String>());
427         // System.out.println("Extras for " + locale);
428         // for (String path : s) {
429         // System.out.println(path + " => " + cldrFile.getStringValue(path));
430         // }
431         // System.out.println("Already in " + locale);
432         // for (Iterator<String> it =
433         // cldrFile.iterator(PatternCache.get(".*\\[@count=.*").matcher(""));
434         // it.hasNext();) {
435         // String path = it.next();
436         // System.out.println(path + " => " + cldrFile.getStringValue(path));
437         // }
438         // }
439     }
440 
441     // public void testDraftFilter() {
442     // Factory cldrFactory = Factory.make(CldrUtility.MAIN_DIRECTORY, ".*",
443     // DraftStatus.approved);
444     // checkLocale(cldrFactory.make("root", true));
445     // checkLocale(cldrFactory.make("ee", true));
446     // }
447 
checkLocale(CLDRFile cldr)448     public void checkLocale(CLDRFile cldr) {
449         Matcher m = PatternCache.get("gregorian.*eras").matcher("");
450         for (Iterator<String> it = cldr.iterator("", new UTF16.StringComparator());
451                 it.hasNext(); ) {
452             String path = it.next();
453             if (m.reset(path).find() && !path.contains("alias")) {
454                 errln(
455                         cldr.getLocaleID()
456                                 + "\t"
457                                 + cldr.getStringValue(path)
458                                 + "\t"
459                                 + cldr.getFullXPath(path));
460             }
461             if (path == null) {
462                 errln("Null path");
463             }
464             String fullPath = cldr.getFullXPath(path);
465             if (fullPath.contains("@draft")) {
466                 errln("File can't contain draft elements");
467             }
468         }
469     }
470 
471     // public void testTimeZonePath() {
472     // Factory cldrFactory = Factory.make(CldrUtility.MAIN_DIRECTORY, ".*");
473     // String tz = "Pacific/Midway";
474     // CLDRFile cldrFile = cldrFactory.make("lv", true);
475     // String retVal = cldrFile.getStringValue(
476     // "//ldml/dates/timeZoneNames/zone[@type=\"" + tz + "\"]/exemplarCity"
477     // , true).trim();
478     // errln(retVal);
479     // }
480 
testSimple()481     public void testSimple() {
482         double deltaTime = System.currentTimeMillis();
483         CLDRFile english = testInfo.getEnglish();
484         deltaTime = System.currentTimeMillis() - deltaTime;
485         logln("Creation: Elapsed: " + deltaTime / 1000.0 + " seconds");
486 
487         deltaTime = System.currentTimeMillis();
488         english.getStringValue("//ldml");
489         deltaTime = System.currentTimeMillis() - deltaTime;
490         logln("Creation: Elapsed: " + deltaTime / 1000.0 + " seconds");
491 
492         deltaTime = System.currentTimeMillis();
493         english.getStringValue("//ldml");
494         deltaTime = System.currentTimeMillis() - deltaTime;
495         logln("Caching: Elapsed: " + deltaTime / 1000.0 + " seconds");
496 
497         deltaTime = System.currentTimeMillis();
498         for (int j = 0; j < 2; ++j) {
499             for (Iterator<String> it = english.iterator(); it.hasNext(); ) {
500                 String dpath = it.next();
501                 String value = english.getStringValue(dpath);
502                 Set<String> paths = english.getPathsWithValue(value, "", null, null);
503                 if (paths.size() == 0) {
504                     continue;
505                 }
506                 if (!paths.contains(dpath)) {
507                     if (DISABLE_TIL_WORKS) {
508                         errln("Missing " + dpath + " in " + pathsWithValues(value, paths));
509                     }
510                 }
511                 if (paths.size() > 1) {
512                     Set<String> nonAliased = getNonAliased(paths, english);
513                     if (nonAliased.size() > 1) {
514                         logln(pathsWithValues(value, nonAliased));
515                     }
516                 }
517             }
518         }
519         deltaTime = System.currentTimeMillis() - deltaTime;
520         logln("Elapsed: " + deltaTime / 1000.0 + " seconds");
521     }
522 
pathsWithValues(String value, Set<String> paths)523     private String pathsWithValues(String value, Set<String> paths) {
524         return paths.size()
525                 + " paths with: <"
526                 + value
527                 + ">\t\tPaths: "
528                 + paths.iterator().next()
529                 + ",...";
530     }
531 
getNonAliased(Set<String> paths, CLDRFile file)532     private Set<String> getNonAliased(Set<String> paths, CLDRFile file) {
533         Set<String> result = new LinkedHashSet<>();
534         for (String path : paths) {
535             if (file.isHere(path)) {
536                 result.add(path);
537             }
538         }
539         return result;
540     }
541 
testResolution()542     public void testResolution() {
543         CLDRFile german = testInfo.getCLDRFile("de", true);
544         CLDRFile bal = testInfo.getCLDRFile("bal", true);
545         // Test direct lookup.
546 
547         String xpath = "//ldml/localeDisplayNames/localeDisplayPattern/localeSeparator";
548         String id = bal.getSourceLocaleID(xpath, null);
549         if (!id.equals("bal")) {
550             errln("Expected bal but was " + id + " for " + xpath);
551         }
552 
553         // Test aliasing.
554         xpath =
555                 "//ldml/dates/calendars/calendar[@type=\"islamic-civil\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"yyyyMEd\"]";
556         id = german.getSourceLocaleID(xpath, null);
557         if (!id.equals("de")) {
558             errln("Expected de but was " + id + " for " + xpath);
559         }
560 
561         // Test lookup that falls to root.
562         xpath =
563                 "//ldml/dates/calendars/calendar[@type=\"coptic\"]/months/monthContext[@type=\"stand-alone\"]/monthWidth[@type=\"narrow\"]/month[@type=\"5\"]";
564         id = german.getSourceLocaleID(xpath, null);
565         if (!id.equals("root")) {
566             errln("Expected root but was " + id + " for " + xpath);
567         }
568     }
569 
570     static final NumberFormat percent = NumberFormat.getPercentInstance();
571 
572     static final class Size {
573         int items;
574         int chars;
575 
add(String topValue)576         public void add(String topValue) {
577             items++;
578             chars += topValue.length();
579         }
580 
over(Size base)581         public String over(Size base) {
582             return "items: "
583                     + items
584                     + "("
585                     + percent.format(items / (0.0 + base.items))
586                     + "); "
587                     + "chars: "
588                     + chars
589                     + "("
590                     + percent.format(chars / (0.0 + base.chars))
591                     + ")";
592         }
593     }
594 
testGeorgeBailey()595     public void testGeorgeBailey() {
596         PathHeader.Factory phf = PathHeader.getFactory(testInfo.getEnglish());
597         for (String locale : Arrays.asList("de", "de_AT", "en", "nl")) {
598             CLDRFile cldrFile = testInfo.getCLDRFile(locale, true);
599 
600             CLDRFile cldrFileUnresolved = testInfo.getCLDRFile(locale, false);
601             Status status = new Status();
602             Output<String> localeWhereFound = new Output<>();
603             Output<String> pathWhereFound = new Output<>();
604 
605             Map<String, String> diff = new TreeMap<>(CLDRFile.getComparator(DtdType.ldml));
606 
607             Size countSuperfluous = new Size();
608             Size countExtraLevel = new Size();
609             Size countOrdinary = new Size();
610 
611             for (String path : cldrFile.fullIterable()) {
612                 String baileyValue =
613                         cldrFile.getBaileyValue(path, pathWhereFound, localeWhereFound);
614                 String topValue = cldrFileUnresolved.getStringValue(path);
615                 String resolvedValue = cldrFile.getStringValue(path);
616                 String unresolvedConstructedValue = cldrFileUnresolved.getConstructedValue(path);
617                 String resolvedConstructedValue = cldrFile.getConstructedValue(path);
618 
619                 // assertEquals("x≠y", "x", "y"); // expected x, got y
620                 if (unresolvedConstructedValue != null) {
621                     assertEquals(
622                             "uc≠rc\t" + locale + "\t" + phf.fromPath(path),
623                             unresolvedConstructedValue,
624                             resolvedConstructedValue);
625                 }
626 
627                 // if there is a value, then either it is at the top level or it
628                 // is the bailey value.
629                 // OR it is INHERITANCE_MARKER
630 
631                 if (resolvedValue != null) {
632                     if (topValue != null && !CldrUtility.INHERITANCE_MARKER.equals(topValue)) {
633                         if (!topValue.equals(cldrFileUnresolved.getConstructedValue(path))) {
634                             assertEquals(
635                                     "top≠resolved\t" + locale + "\t" + phf.fromPath(path),
636                                     topValue,
637                                     resolvedValue);
638                         }
639                     } else {
640                         String locale2 = cldrFile.getSourceLocaleID(path, status);
641                         if (!assertEquals(
642                                 "bailey value≠\t" + locale + "\t" + phf.fromPath(path),
643                                 resolvedValue,
644                                 baileyValue)) {
645                             baileyValue =
646                                     cldrFile.getBaileyValue(path, pathWhereFound, localeWhereFound);
647                             topValue = cldrFileUnresolved.getStringValue(path);
648                         }
649                         if (!assertEquals(
650                                 "bailey locale≠\t" + locale + "\t" + phf.fromPath(path),
651                                 locale2,
652                                 localeWhereFound.value)) {
653                             baileyValue =
654                                     cldrFile.getBaileyValue(path, pathWhereFound, localeWhereFound);
655                             topValue = cldrFileUnresolved.getStringValue(path);
656                         }
657                         if (!assertEquals(
658                                 "bailey path≠\t" + locale + "\t" + phf.fromPath(path),
659                                 status.pathWhereFound,
660                                 pathWhereFound.value)) {
661                             baileyValue =
662                                     cldrFile.getBaileyValue(path, pathWhereFound, localeWhereFound);
663                             topValue = cldrFileUnresolved.getStringValue(path);
664                         }
665                     }
666                 }
667 
668                 if (topValue != null) {
669                     if (CldrUtility.equals(topValue, baileyValue)) {
670                         countSuperfluous.add(topValue);
671                     } else if (sdi.getCoverageLevel(path, locale).compareTo(Level.MODERN) > 0) {
672                         countExtraLevel.add(topValue);
673                     }
674                     countOrdinary.add(topValue);
675                 }
676             }
677             logln("Superfluous (" + locale + "):\t" + countSuperfluous.over(countOrdinary));
678             logln(">Modern (" + locale + "):\t" + countExtraLevel.over(countOrdinary));
679             for (Entry<String, String> entry : diff.entrySet()) {
680                 logln(locale + "\t" + phf.fromPath(entry.getKey()) + ";\t" + entry.getValue());
681             }
682         }
683     }
684 
TestConstructedValue()685     public void TestConstructedValue() {
686         CLDRFile eng = CLDRConfig.getInstance().getEnglish();
687 
688         String prefix = GlossonymConstructor.PATH_PREFIX;
689         String display = eng.getConstructedValue(prefix + "zh_Hans" + "\"]");
690         assertEquals("contructed value", "Chinese (Simplified)", display);
691         display = eng.getConstructedValue(prefix + "es_US" + "\"]");
692         assertEquals("contructed value", "Spanish (United States)", display);
693         display = eng.getConstructedValue(prefix + "es_US" + "\"][@alt=\"short\"]");
694         assertEquals("contructed value", "Spanish (US)", display);
695         display = eng.getConstructedValue(prefix + "es" + "\"]");
696         assertEquals("contructed value", null, display);
697         display = eng.getConstructedValue(prefix + "missing" + "\"]");
698         assertEquals("contructed value", null, display);
699     }
700 
TestFileLocations()701     public void TestFileLocations() {
702         File mainDir = new File(CLDRPaths.MAIN_DIRECTORY);
703         if (!mainDir.isDirectory()) {
704             throw new IllegalArgumentException(
705                     "MAIN_DIRECTORY is not a directory: " + CLDRPaths.MAIN_DIRECTORY);
706         }
707         File mainCollationDir = new File(CLDRPaths.COLLATION_DIRECTORY);
708         if (!mainCollationDir.isDirectory()) {
709             throw new IllegalArgumentException(
710                     "COLLATION_DIRECTORY is not a directory: " + CLDRPaths.COLLATION_DIRECTORY);
711         }
712         if (CLDRConfig.SKIP_SEED) {
713             return;
714         }
715         File seedDir = new File(CLDRPaths.SEED_DIRECTORY);
716         if (!seedDir.isDirectory()) {
717             throw new IllegalArgumentException(
718                     "SEED_DIRECTORY is not a directory: " + CLDRPaths.SEED_DIRECTORY);
719         }
720         File seedCollationDir = new File(CLDRPaths.SEED_COLLATION_DIRECTORY);
721         if (!seedCollationDir.isDirectory()) {
722             throw new IllegalArgumentException(
723                     "SEED_COLLATION_DIRECTORY is not a directory: "
724                             + CLDRPaths.SEED_COLLATION_DIRECTORY);
725         }
726 
727         File[] md = {mainDir, mainCollationDir};
728         File[] sd = {seedDir, seedCollationDir};
729         Factory mf = SimpleFactory.make(md, ".*", DraftStatus.unconfirmed);
730         Factory sf = SimpleFactory.make(sd, ".*", DraftStatus.unconfirmed);
731         Set<CLDRLocale> mainLocales = mf.getAvailableCLDRLocales();
732         Set<CLDRLocale> seedLocales = sf.getAvailableCLDRLocales();
733         mainLocales.retainAll(seedLocales);
734         mainLocales.remove(CLDRLocale.getInstance("root")); // allow multiple roots
735         if (!mainLocales.isEmpty()) {
736             errln(
737                     "CLDR locale files located in both common and seed ==> "
738                             + mainLocales.toString());
739         }
740     }
741 
TestForStrayFiles()742     public void TestForStrayFiles() {
743         TreeSet<String> mainList =
744                 new TreeSet<>(Arrays.asList(new File(CLDRPaths.MAIN_DIRECTORY).list()));
745 
746         for (String dir : DtdType.ldml.directories) {
747             Set<String> dirFiles =
748                     new TreeSet<>(
749                             Arrays.asList(
750                                     new File(CLDRPaths.BASE_DIRECTORY + "common/" + dir).list()));
751             if (!mainList.containsAll(dirFiles)) {
752                 dirFiles.removeAll(mainList);
753                 errln(dir + "/ has extra files not in main/: " + dirFiles);
754             }
755         }
756     }
757 
TestFileIds()758     public void TestFileIds() {
759         Output<Map<String, Multimap<LdmlDir, Source>>> localeToDirToSource = new Output<>();
760         Map<LdmlDir, Multimap<String, Source>> dirToLocaleToSource = getFiles(localeToDirToSource);
761 
762         for (Entry<String, Multimap<LdmlDir, Source>> e : localeToDirToSource.value.entrySet()) {
763             String locale = e.getKey();
764             if (locale.equals("root")) {
765                 continue; // allow multiple root locales
766             }
767             Map<LdmlDir, Collection<Source>> value = e.getValue().asMap();
768             for (Entry<LdmlDir, Collection<Source>> e2 : value.entrySet()) {
769                 LdmlDir dir = e2.getKey();
770                 Collection<Source> sources = e2.getValue();
771                 if (sources.size() != 1) {
772                     errln(
773                             "Can only one have 1 instance of "
774                                     + locale
775                                     + " in "
776                                     + dir
777                                     + ", but have in "
778                                     + sources);
779                 }
780             }
781         }
782 
783         LikelySubtags likelySubtags = new LikelySubtags();
784 
785         for (Entry<LdmlDir, Multimap<String, Source>> dirAndLocaleToSource :
786                 dirToLocaleToSource.entrySet()) {
787             LdmlDir ldmlDir = dirAndLocaleToSource.getKey();
788             Multimap<String, Source> localesToDirs = dirAndLocaleToSource.getValue();
789             for (Entry<String, Source> localeAndDir : localesToDirs.entries()) {
790                 String loc = localeAndDir.getKey();
791                 if (loc.equals("root")) {
792                     continue;
793                 }
794                 Source source = localeAndDir.getValue();
795                 String parent = LocaleIDParser.getParent(loc);
796                 String parent2 = LanguageTagParser.getSimpleParent(loc);
797                 if (parent2.isEmpty()) {
798                     parent2 = "root";
799                 }
800                 String likely = likelySubtags.minimize(loc);
801                 if (!localesToDirs.containsKey(parent)) {
802                     errln(
803                             "Missing parent ("
804                                     + parent
805                                     + ") for "
806                                     + loc
807                                     + "  in "
808                                     + source
809                                     + "/"
810                                     + ldmlDir
811                                     + "; likely="
812                                     + likely);
813                 }
814                 if (!Objects.equals(parent, parent2) && !localesToDirs.containsKey(parent2)) {
815                     errln(
816                             "Missing simple parent ("
817                                     + parent2
818                                     + ") for "
819                                     + loc
820                                     + "  in "
821                                     + source
822                                     + "/"
823                                     + ldmlDir
824                                     + "; likely="
825                                     + likely);
826                 }
827             }
828 
829             // establish that the parent of locale is somewhere in the same
830             //                assertEquals(dir + " locale file has minimal id: ", min, loc);
831             //            if (!dir.endsWith("exemplars")) {
832             //                continue;
833             //            }
834             //            String trans = ltc.transform(loc);
835             //            System.out.println("\t" + min + "\t" + loc + "\t" + trans);
836         }
837     }
838 
839     enum Source {
840         common,
841         seed,
842         exemplars
843     }
844 
845     enum LdmlDir {
846         main,
847         annotations,
848         annotationsDerived,
849         casing,
850         collation,
851         rbnf,
852         segments,
853         subdivisions
854     }
855 
856     /**
857      * Returns a map from directory (eg main) to its parent (eg seed) and to their children (locales
858      * in seed/main)
859      */
getFiles( Output<Map<String, Multimap<LdmlDir, Source>>> localeToDirToSource)860     private Map<LdmlDir, Multimap<String, Source>> getFiles(
861             Output<Map<String, Multimap<LdmlDir, Source>>> localeToDirToSource) {
862 
863         Map<LdmlDir, Multimap<String, Source>> _dirToLocaleToSource = new TreeMap<>();
864         Map<String, Multimap<LdmlDir, Source>> _localeToDirToSource = new TreeMap<>();
865 
866         for (String base : new File(CLDRPaths.BASE_DIRECTORY).list()) {
867             Source source;
868             try {
869                 source = Source.valueOf(base);
870             } catch (Exception e) {
871                 continue;
872             }
873             String fullBase = CLDRPaths.BASE_DIRECTORY + base;
874             File fullBaseFile = new File(fullBase);
875             if (!fullBaseFile.isDirectory()) {
876                 continue;
877             }
878 
879             for (String sub1 : fullBaseFile.list()) {
880                 if (!DtdType.ldml.directories.contains(sub1)) {
881                     continue;
882                 }
883                 LdmlDir ldmlDir = LdmlDir.valueOf(sub1);
884                 String dir = fullBase + "/" + ldmlDir;
885                 for (String loc : new File(dir).list()) {
886                     if (!loc.endsWith(".xml")) {
887                         continue;
888                     }
889                     loc = loc.substring(0, loc.length() - 4);
890 
891                     put(_localeToDirToSource, loc, ldmlDir, source);
892                     put(_dirToLocaleToSource, ldmlDir, loc, source);
893                 }
894             }
895         }
896         localeToDirToSource.value =
897                 ImmutableMap.copyOf(_localeToDirToSource); // TODO protect subtrees
898         return ImmutableMap.copyOf(_dirToLocaleToSource);
899     }
900 
put(Map<A, Multimap<B, C>> aToBToC, A a, B b, C c)901     private <A, B, C> void put(Map<A, Multimap<B, C>> aToBToC, A a, B b, C c) {
902         Multimap<B, C> dirToSource = aToBToC.get(a);
903         if (dirToSource == null) {
904             aToBToC.put(a, dirToSource = (Multimap<B, C>) TreeMultimap.create());
905         }
906         dirToSource.put(b, c);
907     }
908 
TestSwissHighGerman()909     public void TestSwissHighGerman() {
910         CLDRFile swissHighGerman = testInfo.getCommonSeedExemplarsFactory().make("de_CH", true);
911         for (String xpath : swissHighGerman) {
912             if (xpath.equals("//ldml/characters/exemplarCharacters[@type=\"auxiliary\"]")) {
913                 continue;
914             }
915             String value = swissHighGerman.getStringValue(xpath);
916             if (value != null && value.indexOf('ß') >= 0) {
917                 warnln("«" + value + "» contains ß at " + xpath);
918             }
919         }
920     }
921 
TestExtraPaths()922     public void TestExtraPaths() {
923         List<String> testCases =
924                 Arrays.asList(
925                         "//ldml/localeDisplayNames/languages/language[@type=\"ccp\"]",
926                         "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"Gy\"]/greatestDifference[@id=\"G\"]");
927         CLDRFile af = testInfo.getCldrFactory().make("af", true);
928         Set<String> missing = new HashSet<>(testCases);
929         CoverageLevel2 coverageLevel2 = CoverageLevel2.getInstance("af");
930         PathHeader.Factory pathHeaderFactory = PathHeader.getFactory(testInfo.getEnglish());
931         Status status = new Status();
932 
933         for (String xpath : af) {
934             if (missing.contains(xpath)) {
935                 String value = af.getStringValue(xpath);
936                 String source = af.getSourceLocaleID(xpath, status);
937                 Level level = coverageLevel2.getLevel(xpath);
938                 PathHeader ph = pathHeaderFactory.fromPath(xpath);
939                 getLogger()
940                         .fine(
941                                 ""
942                                         + "\nPathHeader:\t"
943                                         + ph
944                                         + "\nValue:\t"
945                                         + value
946                                         + "\nLevel:\t"
947                                         + level
948                                         + "\nReq. Locale:\t"
949                                         + "af"
950                                         + "\nSource Locale:\t"
951                                         + source
952                                         + "\nReq. XPath:\t"
953                                         + xpath
954                                         + "\nSource Path:\t"
955                                         + status);
956                 missing.remove(xpath);
957             }
958         }
959         assertTrue("Should be empty", missing.isEmpty());
960     }
961 
TestExtraPaths13954()962     public void TestExtraPaths13954() {
963         CLDRFile es = cldrFactory.make("es", true);
964         CLDRFile es_US = cldrFactory.make("es_US", true);
965         if (!es_US.getRawExtraPaths().containsAll(es.getRawExtraPaths())) {
966             errln(
967                     "Failure: "
968                             + Joiner.on('\n')
969                                     .join(
970                                             Sets.difference(
971                                                     es.getRawExtraPaths(),
972                                                     es_US.getRawExtraPaths())));
973         }
974     }
975 
testEnglishSideways()976     public void testEnglishSideways() {
977         CLDRFile fr = cldrFactory.make("fr", true);
978         CLDRFile en = cldrFactory.make("en", true);
979         System.out.println();
980         for (String path : fr.fullIterable()) {
981             if (!path.startsWith("//ldml/units") || path.endsWith("/gender")) {
982                 continue;
983             }
984             Status status = new Status();
985             String localeWhereFound = en.getSourceLocaleID(path, status);
986             if (!Objects.equals(path, status.pathWhereFound)) {
987                 XPathParts pathParts = XPathParts.getFrozenInstance(path);
988                 String type = pathParts.getAttributeValue(3, "type");
989                 XPathParts foundPathParts = XPathParts.getFrozenInstance(status.pathWhereFound);
990                 String foundType = foundPathParts.getAttributeValue(3, "type");
991                 if (Objects.equals(type, foundType)) {
992                     continue; // ok to go sideways within type
993                 }
994                 System.out.println(
995                         String.format("%s\t%s\t%s", path, status.pathWhereFound, localeWhereFound));
996             }
997         }
998     }
999 }
1000