• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.unittest;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.collect.ImmutableListMultimap;
5 import com.google.common.collect.ImmutableMap;
6 import com.google.common.collect.ImmutableSet;
7 import com.google.common.collect.LinkedHashMultimap;
8 import com.google.common.collect.Multimap;
9 import com.google.common.collect.Sets;
10 import com.google.common.collect.TreeMultimap;
11 import com.ibm.icu.impl.Relation;
12 import com.ibm.icu.impl.Row;
13 import com.ibm.icu.impl.Row.R2;
14 import com.ibm.icu.impl.Row.R4;
15 import com.ibm.icu.text.CompactDecimalFormat;
16 import com.ibm.icu.text.CompactDecimalFormat.CompactStyle;
17 import com.ibm.icu.text.Transform;
18 import com.ibm.icu.util.Calendar;
19 import com.ibm.icu.util.Output;
20 import com.ibm.icu.util.ULocale;
21 import java.util.Arrays;
22 import java.util.Collection;
23 import java.util.Collections;
24 import java.util.Date;
25 import java.util.EnumSet;
26 import java.util.HashMap;
27 import java.util.HashSet;
28 import java.util.LinkedHashSet;
29 import java.util.List;
30 import java.util.Map;
31 import java.util.Map.Entry;
32 import java.util.Set;
33 import java.util.TreeMap;
34 import java.util.TreeSet;
35 import java.util.regex.Pattern;
36 import org.unicode.cldr.draft.ScriptMetadata;
37 import org.unicode.cldr.test.CoverageLevel2;
38 import org.unicode.cldr.tool.LikelySubtags;
39 import org.unicode.cldr.util.CLDRConfig;
40 import org.unicode.cldr.util.CLDRFile;
41 import org.unicode.cldr.util.CLDRLocale;
42 import org.unicode.cldr.util.CLDRPaths;
43 import org.unicode.cldr.util.ChainedMap;
44 import org.unicode.cldr.util.ChainedMap.M4;
45 import org.unicode.cldr.util.Counter2;
46 import org.unicode.cldr.util.DtdData;
47 import org.unicode.cldr.util.DtdData.Element;
48 import org.unicode.cldr.util.DtdType;
49 import org.unicode.cldr.util.GrammarInfo;
50 import org.unicode.cldr.util.LanguageTagParser;
51 import org.unicode.cldr.util.Level;
52 import org.unicode.cldr.util.LocaleNames;
53 import org.unicode.cldr.util.LogicalGrouping;
54 import org.unicode.cldr.util.LogicalGrouping.PathType;
55 import org.unicode.cldr.util.Organization;
56 import org.unicode.cldr.util.PathHeader;
57 import org.unicode.cldr.util.PathHeader.Factory;
58 import org.unicode.cldr.util.PathStarrer;
59 import org.unicode.cldr.util.PatternCache;
60 import org.unicode.cldr.util.RegexLookup;
61 import org.unicode.cldr.util.RegexLookup.Finder;
62 import org.unicode.cldr.util.StandardCodes;
63 import org.unicode.cldr.util.SupplementalDataInfo;
64 import org.unicode.cldr.util.SupplementalDataInfo.CoverageVariableInfo;
65 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo;
66 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus;
67 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData;
68 import org.unicode.cldr.util.VoteResolver;
69 import org.unicode.cldr.util.XPathParts;
70 
71 public class TestCoverageLevel extends TestFmwkPlus {
72 
73     private static final boolean SHOW_LSR_DATA = false;
74 
75     private static CLDRConfig testInfo = CLDRConfig.getInstance();
76     private static final StandardCodes STANDARD_CODES = StandardCodes.make();
77     private static final CLDRFile ENGLISH = testInfo.getEnglish();
78     private static final SupplementalDataInfo SDI = testInfo.getSupplementalDataInfo();
79     private static final String TC_VOTES =
80             Integer.toString(VoteResolver.Level.tc.getVotes(Organization.apple));
81 
main(String[] args)82     public static void main(String[] args) {
83         new TestCoverageLevel().run(args);
84     }
85 
testSpecificPaths()86     public void testSpecificPaths() {
87         String[][] rows = {
88             {
89                 "//ldml/characters/parseLenients[@scope=\"number\"][@level=\"lenient\"]/parseLenient[@sample=\",\"]",
90                 "moderate",
91                 TC_VOTES
92             }
93         };
94         doSpecificPathTest("fr", rows);
95     }
96 
testSpecificPathsPersCal()97     public void testSpecificPathsPersCal() {
98         String[][] rows = {
99             {
100                 "//ldml/dates/calendars/calendar[@type=\"persian\"]/eras/eraAbbr/era[@type=\"0\"]",
101                 "moderate",
102                 "4"
103             },
104             {
105                 "//ldml/dates/calendars/calendar[@type=\"persian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"1\"]",
106                 "moderate",
107                 "4"
108             }
109         };
110         doSpecificPathTest("ckb_IR", rows);
111     }
112 
testSpecificPathsDeFormatLength()113     public void testSpecificPathsDeFormatLength() {
114         String[][] rows = {
115             /* For German (de) these should be high-bar (20) per https://unicode-org.atlassian.net/browse/CLDR-14988 */
116             {
117                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"one\"]",
118                 "moderate",
119                 TC_VOTES
120             },
121             {
122                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"other\"]",
123                 "moderate",
124                 TC_VOTES
125             },
126             {
127                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"one\"]",
128                 "moderate",
129                 TC_VOTES
130             },
131             {
132                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"other\"]",
133                 "moderate",
134                 TC_VOTES
135             },
136             {
137                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"one\"]",
138                 "moderate",
139                 TC_VOTES
140             },
141             {
142                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"other\"]",
143                 "moderate",
144                 TC_VOTES
145             },
146             {
147                 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"one\"]",
148                 "moderate",
149                 TC_VOTES
150             },
151             {
152                 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"other\"]",
153                 "moderate",
154                 TC_VOTES
155             },
156             {
157                 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"one\"]",
158                 "moderate",
159                 TC_VOTES
160             },
161             {
162                 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"other\"]",
163                 "moderate",
164                 TC_VOTES
165             },
166             {
167                 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"one\"]",
168                 "moderate",
169                 TC_VOTES
170             },
171             {
172                 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"other\"]",
173                 "moderate",
174                 TC_VOTES
175             },
176             /* not high-bar (20): wrong number of zeroes, or count many*/
177             {
178                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"100\"][@count=\"other\"]",
179                 "comprehensive",
180                 "8"
181             },
182             {
183                 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"1000000\"][@count=\"other\"]",
184                 "moderate",
185                 "8"
186             },
187             {
188                 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"many\"]",
189                 "moderate",
190                 "8"
191             },
192         };
193         doSpecificPathTest("de", rows);
194     }
195 
doSpecificPathTest(String localeStr, String[][] rows)196     private void doSpecificPathTest(String localeStr, String[][] rows) {
197         Factory phf = PathHeader.getFactory(ENGLISH);
198         CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(SDI, localeStr);
199         CLDRLocale loc = CLDRLocale.getInstance(localeStr);
200         for (String[] row : rows) {
201             String path = row[0];
202             Level expectedLevel = Level.fromString(row[1]);
203             Level level = coverageLevel.getLevel(path);
204             assertEquals("Level for " + path, expectedLevel, level);
205 
206             int expectedRequiredVotes = Integer.parseInt(row[2]);
207             int votes = SDI.getRequiredVotes(loc, phf.fromPath(path));
208             assertEquals("Votes for " + path, expectedRequiredVotes, votes);
209         }
210     }
211 
oldTestInvariantPaths()212     public void oldTestInvariantPaths() {
213         org.unicode.cldr.util.Factory factory = testInfo.getCldrFactory();
214         PathStarrer pathStarrer = new PathStarrer().setSubstitutionPattern("*");
215         SupplementalDataInfo sdi =
216                 SupplementalDataInfo.getInstance(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY);
217 
218         Set<String> allPaths = new HashSet<>();
219         M4<String, String, Level, Boolean> starredToLocalesToLevels =
220                 ChainedMap.of(
221                         new TreeMap<String, Object>(),
222                         new TreeMap<String, Object>(),
223                         new TreeMap<Level, Object>(),
224                         Boolean.class);
225 
226         for (String locale : factory.getAvailableLanguages()) {
227             logln(locale);
228             CLDRFile cldrFileToCheck = factory.make(locale, true);
229             for (String path : cldrFileToCheck.fullIterable()) {
230                 allPaths.add(path);
231                 String starred = pathStarrer.set(path);
232                 Level level = sdi.getCoverageLevel(path, locale);
233                 starredToLocalesToLevels.put(starred, locale, level, true);
234             }
235         }
236 
237         Set<Level> levelsFound = EnumSet.noneOf(Level.class);
238         Set<String> localesWithUniqueLevels = new TreeSet<>();
239         for (Entry<String, Map<String, Map<Level, Boolean>>> entry : starredToLocalesToLevels) {
240             String starred = entry.getKey();
241             Map<String, Map<Level, Boolean>> localesToLevels = entry.getValue();
242             int maxLevelCount = 0;
243             double localeCount = 0;
244             levelsFound.clear();
245             localesWithUniqueLevels.clear();
246 
247             for (Entry<String, Map<Level, Boolean>> entry2 : localesToLevels.entrySet()) {
248                 String locale = entry2.getKey();
249                 Map<Level, Boolean> levels = entry2.getValue();
250                 levelsFound.addAll(levels.keySet());
251                 if (levels.size() > maxLevelCount) {
252                     maxLevelCount = levels.size();
253                 }
254                 if (levels.size() == 1) {
255                     localesWithUniqueLevels.add(locale);
256                 }
257                 localeCount++;
258             }
259             System.out.println(
260                     maxLevelCount
261                             + "\t"
262                             + localesWithUniqueLevels.size() / localeCount
263                             + "\t"
264                             + starred
265                             + "\t"
266                             + Joiner.on(", ").join(levelsFound)
267                             + "\t"
268                             + (maxLevelCount == 1
269                                     ? "all"
270                                     : localesWithUniqueLevels.size() == 0
271                                             ? "none"
272                                             : Joiner.on(", ").join(localesWithUniqueLevels)));
273         }
274     }
275 
276     enum LanguageStatus {
277         Lit100M("P1"),
278         Lit10MandOfficial("P2"),
279         Lit1MandOneThird("P3");
280         final String name;
281 
LanguageStatus(String name)282         LanguageStatus(String name) {
283             this.name = name;
284         }
285     }
286 
287     static Relation<String, LanguageStatus> languageStatus =
288             Relation.of(new HashMap<String, Set<LanguageStatus>>(), TreeSet.class);
289     static Counter2<String> languageLiteratePopulation = new Counter2<>();
290     static Map<String, Date> currencyToLast = new HashMap<>();
291     static Set<String> officialSomewhere = new HashSet<>();
292 
293     static {
294         Counter2<String> territoryLiteratePopulation = new Counter2<>();
295         LanguageTagParser parser = new LanguageTagParser();
296         // cf
297         // http://cldr.unicode.org/development/development-process/design-proposals/languages-to-show-for-translation
298         for (String language : SDI.getLanguagesForTerritoriesPopulationData()) {
299             String base = parser.set(language).getLanguage();
300             boolean isOfficial = false;
301             double languageLiterate = 0;
302             for (String territory : SDI.getTerritoriesForPopulationData(language)) {
303                 PopulationData pop = SDI.getLanguageAndTerritoryPopulationData(language, territory);
304                 OfficialStatus officialStatus = pop.getOfficialStatus();
305                 if (officialStatus.compareTo(OfficialStatus.de_facto_official) >= 0) {
306                     isOfficial = true;
307                     languageStatus.put(base + "_" + territory, LanguageStatus.Lit10MandOfficial);
308                     officialSomewhere.add(base);
309                 }
310                 double litPop = pop.getLiteratePopulation();
311                 languageLiterate += litPop;
territoryLiteratePopulation.add(territory, litPop)312                 territoryLiteratePopulation.add(territory, litPop);
313                 languageLiteratePopulation.add(base + "_" + territory, litPop);
314             }
languageLiteratePopulation.add(base, languageLiterate)315             languageLiteratePopulation.add(base, languageLiterate);
316             if (languageLiterate > 100000000) {
languageStatus.put(base, LanguageStatus.Lit100M)317                 languageStatus.put(base, LanguageStatus.Lit100M);
318             }
319             if (languageLiterate > 10000000 && isOfficial) {
languageStatus.put(base, LanguageStatus.Lit10MandOfficial)320                 languageStatus.put(base, LanguageStatus.Lit10MandOfficial);
321             }
322         }
323         for (String language : SDI.getLanguagesForTerritoriesPopulationData()) {
324             if (languageLiteratePopulation.getCount(language) < 1000000) {
325                 continue;
326             }
327             String base = parser.set(language).getLanguage();
328             for (String territory : SDI.getTerritoriesForPopulationData(language)) {
329                 PopulationData pop = SDI.getLanguageAndTerritoryPopulationData(language, territory);
330                 double litPop = pop.getLiteratePopulation();
331                 double total = territoryLiteratePopulation.getCount(territory);
332                 if (litPop > total / 3) {
languageStatus.put(base, LanguageStatus.Lit1MandOneThird)333                     languageStatus.put(base, LanguageStatus.Lit1MandOneThird);
334                 }
335             }
336         }
337         for (String territory : STANDARD_CODES.getAvailableCodes("territory")) {
338             Set<CurrencyDateInfo> cdateInfo = SDI.getCurrencyDateInfo(territory);
339             if (cdateInfo == null) {
340                 continue;
341             }
342             for (CurrencyDateInfo dateInfo : cdateInfo) {
343                 String currency = dateInfo.getCurrency();
344                 Date last = dateInfo.getEnd();
345                 Date old = currencyToLast.get(currency);
346                 if (old == null || old.compareTo(last) < 0) {
currencyToLast.put(currency, last)347                     currencyToLast.put(currency, last);
348                 }
349             }
350         }
351     }
352 
353     static CompactDecimalFormat cdf =
354             CompactDecimalFormat.getInstance(ULocale.ENGLISH, CompactStyle.SHORT);
355 
isBigLanguage(String lang)356     static String isBigLanguage(String lang) {
357         Set<LanguageStatus> status = languageStatus.get(lang);
358         Double size = languageLiteratePopulation.getCount(lang);
359         String sizeString = size == null ? "?" : cdf.format(size);
360         String off = officialSomewhere.contains(lang) ? "o" : "";
361         if (status == null || status.isEmpty()) {
362             return "P4-" + sizeString + off;
363         }
364         return status.iterator().next().name + "-" + sizeString + off;
365     }
366 
367     static final Date NOW = new Date();
368 
369     private static final boolean DEBUG = false;
370 
371     static class TypeName implements Transform<String, String> {
372         private final int field;
373         private final Map<String, R2<List<String>, String>> dep;
374 
TypeName(int field)375         public TypeName(int field) {
376             this.field = field;
377             switch (field) {
378                 case CLDRFile.LANGUAGE_NAME:
379                     dep = SDI.getLocaleAliasInfo().get("language");
380                     break;
381                 case CLDRFile.TERRITORY_NAME:
382                     dep = SDI.getLocaleAliasInfo().get("territory");
383                     break;
384                 case CLDRFile.SCRIPT_NAME:
385                     dep = SDI.getLocaleAliasInfo().get("script");
386                     break;
387                 default:
388                     dep = null;
389                     break;
390             }
391         }
392 
393         @Override
transform(String source)394         public String transform(String source) {
395             String result = ENGLISH.getName(field, source);
396             String extra = "";
397             if (field == CLDRFile.LANGUAGE_NAME) {
398                 String lang = isBigLanguage(source);
399                 extra = lang == null ? "X" : lang;
400             } else if (field == CLDRFile.CURRENCY_NAME) {
401                 Date last = currencyToLast.get(source);
402                 extra = last == null ? "?" : last.compareTo(NOW) < 0 ? "old" : "";
403             }
404             R2<List<String>, String> depValue = dep == null ? null : dep.get(source);
405             if (depValue != null) {
406                 extra += extra.isEmpty() ? "" : "-";
407                 extra += depValue.get1();
408             }
409             return result + (extra.isEmpty() ? "" : "\t" + extra);
410         }
411     }
412 
413     RegexLookup<Level> exceptions =
414             RegexLookup.of(
415                             null,
416                             new Transform<String, Level>() {
417                                 @Override
418                                 public Level transform(String source) {
419                                     return Level.fromLevel(Integer.parseInt(source));
420                                 }
421                             },
422                             null)
423                     .loadFromFile(TestCoverageLevel.class, "TestCoverageLevel.txt");
424 
425     public void TestExceptions() {
426         for (Map.Entry<Finder, Level> x : exceptions) {
427             logln(x.getKey().toString() + " => " + x.getValue());
428         }
429     }
430 
431     public void TestNarrowCurrencies() {
432         String path = "//ldml/numbers/currencies/currency[@type=\"USD\"]/symbol[@alt=\"narrow\"]";
433         String value = ENGLISH.getStringValue(path);
434         assertEquals("Narrow $", "$", value);
435         SupplementalDataInfo sdi =
436                 SupplementalDataInfo.getInstance(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY);
437         Level level = sdi.getCoverageLevel(path, "en");
438         assertEquals("Narrow $", Level.MODERATE, level);
439     }
440 
441     public void TestA() {
442         String path = "//ldml/characterLabels/characterLabel[@type=\"other\"]";
443         SupplementalDataInfo sdi =
444                 SupplementalDataInfo.getInstance(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY);
445         Level level = sdi.getCoverageLevel(path, "en");
446         assertEquals("Quick Check for any attribute", Level.MODERN, level);
447     }
448 
449     public void TestCoverageCompleteness() {
450         /**
451          * Check that English paths are, except for known cases, at least modern coverage. We filter
452          * out the things we know about and have determined are OK to be in comprehensive. If we add
453          * a path that doesn't get its coverage set, this test should complain about it.
454          */
455         final ImmutableSet<String> inactiveMetazones =
456                 ImmutableSet.of(
457                         "Greenland", // TODO: New metazone added for tz2023d update,
458                         // In CLDR 45, we don't want to include this one in modern coverage because
459                         // we don't open ST for translating display names for this metazone.
460                         // After 45, we will include "Greenland" in modern coverage.
461                         "Bering",
462                         "Dominican",
463                         "Shevchenko",
464                         "Alaska_Hawaii",
465                         "Yerevan",
466                         "Africa_FarWestern",
467                         "British",
468                         "Sverdlovsk",
469                         "Karachi",
470                         "Malaya",
471                         "Oral",
472                         "Frunze",
473                         "Dutch_Guiana",
474                         "Irish",
475                         "Uralsk",
476                         "Tashkent",
477                         "Kwajalein",
478                         "Ashkhabad",
479                         "Kizilorda",
480                         "Kuybyshev",
481                         "Baku",
482                         "Dushanbe",
483                         "Goose_Bay",
484                         "Liberia",
485                         "Samarkand",
486                         "Tbilisi",
487                         "Borneo",
488                         "Greenland_Central",
489                         "Dacca",
490                         "Aktyubinsk",
491                         "Turkey",
492                         "Urumqi",
493                         "Acre",
494                         "Almaty",
495                         "Anadyr",
496                         "Aqtau",
497                         "Aqtobe",
498                         "Kamchatka",
499                         "Macau",
500                         "Qyzylorda",
501                         "Samara",
502                         "Casey",
503                         "Guam",
504                         "Lanka",
505                         "North_Mariana");
506 
507         final Pattern calendar100 =
508                 PatternCache.get("(coptic|ethiopic-amete-alem|islamic-(rgsa|tbla|umalqura))");
509 
510         /**
511          * Recommended scripts that are allowed for comprehensive coverage. Not-recommended scripts
512          * (according to ScriptMetadata) are filtered out automatically.
513          */
514         final Pattern script100 = PatternCache.get("(Zinh)");
515 
516         final Pattern keys100 =
517                 PatternCache.get(
518                         "(col(Alternate|Backwards|CaseFirst|CaseLevel|HiraganaQuaternary|"
519                                 + "Normalization|Numeric|Reorder|Strength)|kv|sd|mu|timezone|va|variableTop|x|d0|h0|i0|k0|m0|s0)");
520 
521         final Pattern numberingSystem100 =
522                 PatternCache.get(
523                         "("
524                                 + "finance|native|traditional|adlm|ahom|bali|bhks|brah|cakm|cham|cyrl|diak|"
525                                 + "gara|gong|gonm|gukh|hanidays|hmng|hmnp|java|jpanyear|kali|kawi|krai|lana(tham)?|lepc|limb|"
526                                 + "math(bold|dbl|mono|san[bs])|modi|mong|mroo|mtei|mymr(epka|pao|shan|tlng)|"
527                                 + "nagm|newa|nkoo|olck|onao|osma|outlined|rohg|saur|segment|shrd|sin[dh]|sora|sund|sunu|"
528                                 + "takr|talu|tirh|tnsa|vaii|wara|wcho)");
529 
530         final Pattern collation100 =
531                 PatternCache.get(
532                         "("
533                                 + "big5han|compat|dictionary|emoji|eor|gb2312han|phonebook|phonetic|pinyin|searchjl|stroke|traditional|unihan|zhuyin)");
534 
535         SupplementalDataInfo sdi = testInfo.getSupplementalDataInfo();
536         CLDRFile english = testInfo.getEnglish();
537 
538         // Calculate date of the upcoming CLDR release, minus 5 years (deprecation policy)
539         final int versionNumber = Integer.valueOf((CLDRFile.GEN_VERSION).split("\\.")[0]);
540         Calendar cal = Calendar.getInstance();
541         cal.set(versionNumber / 2 + versionNumber % 2 + 2001, 8 - (versionNumber % 2) * 6, 15);
542         Date cldrReleaseMinus5Years = cal.getTime();
543         Set<String> modernCurrencies =
544                 SDI.getCurrentCurrencies(SDI.getCurrencyTerritories(), cldrReleaseMinus5Years, NOW);
545 
546         Set<String> needsNumberSystem = new HashSet<>();
547         DtdData dtdData = DtdData.getInstance(DtdType.ldml);
548         Element numbersElement = dtdData.getElementFromName().get("numbers");
549         for (Element childOfNumbers : numbersElement.getChildren().keySet()) {
550             if (childOfNumbers.containsAttribute("numberSystem")) {
551                 needsNumberSystem.add(childOfNumbers.name);
552             }
553         }
554 
555         for (String path : english.fullIterable()) {
556             logln("Testing path => " + path);
557             XPathParts xpp = XPathParts.getFrozenInstance(path);
558             if (path.endsWith("/alias")
559                     || path.matches(
560                             "//ldml/(identity|contextTransforms|layout|localeDisplayNames/transformNames)/.*")) {
561                 continue;
562             }
563             if (sdi.isDeprecated(DtdType.ldml, path)) {
564                 continue;
565             }
566             Level lvl = sdi.getCoverageLevel(path, "en");
567             if (lvl == Level.UNDETERMINED) {
568                 errln("Undetermined coverage value for path => " + path);
569                 continue;
570             }
571             if (lvl.compareTo(Level.MODERN) <= 0) {
572                 logln("Level OK [" + lvl.toString() + "] for path => " + path);
573                 continue;
574             }
575 
576             if (path.startsWith("//ldml/numbers")) {
577                 // Paths in numbering systems outside "latn" are specifically excluded.
578                 String numberingSystem = xpp.findFirstAttributeValue("numberSystem");
579                 if (numberingSystem != null && !numberingSystem.equals("latn")) {
580                     continue;
581                 }
582                 if (xpp.containsElement("currencySpacing") || xpp.containsElement("list")) {
583                     continue;
584                 }
585                 if (xpp.containsElement("currency")) {
586                     String currencyType = xpp.findAttributeValue("currency", "type");
587                     if (!modernCurrencies.contains(currencyType)) {
588                         continue; // old currency or not tender, so we don't care
589                     }
590                 }
591                 // Currently not collecting timeSeparator data in SurveyTool
592                 if (xpp.containsElement("timeSeparator")) {
593                     continue;
594                 }
595                 // Other paths in numbers without a numbering system are deprecated.
596                 //                if (numberingSystem == null) {
597                 //                    continue;
598                 //                }
599                 if (needsNumberSystem.contains(xpp.getElement(2))) {
600                     continue;
601                 }
602             } else if (xpp.containsElement("zone")) {
603                 String zoneType = xpp.findAttributeValue("zone", "type");
604                 if ((zoneType.startsWith("Etc/GMT") || zoneType.equals("Etc/UTC"))
605                         && path.endsWith("exemplarCity")) {
606                     continue;
607                 }
608                 // We don't survey for short timezone names or at least some alts
609                 if (path.contains("/short/") || path.contains("[@alt=\"formal\"]")) {
610                     continue;
611                 }
612             } else if (xpp.containsElement("metazone")) {
613                 // We don't survey for short metazone names
614                 if (path.contains("/short/")) {
615                     continue;
616                 }
617                 String mzName = xpp.findAttributeValue("metazone", "type");
618                 // Skip inactive metazones.
619                 if (inactiveMetazones.contains(mzName)) {
620                     continue;
621                 }
622                 // Skip paths for daylight or generic mz strings where
623                 // the mz doesn't use DST.
624                 if ((path.endsWith("daylight") || path.endsWith("generic"))
625                         && !LogicalGrouping.metazonesDSTSet.contains(mzName)) {
626                     continue;
627                 }
628             } else if (path.startsWith("//ldml/dates/fields")) {
629                 if ("variant".equals(xpp.findAttributeValue("displayName", "alt"))) {
630                     continue;
631                 }
632                 // relative day/week/month, etc. short or narrow
633                 if (xpp.getElement(-1).equals("relative")) {
634                     String fieldType = xpp.findAttributeValue("field", "type");
635                     if (fieldType.matches(".*-(short|narrow)|quarter")) {
636                         continue;
637                     }
638                 }
639             } else if (xpp.containsElement("language")) {
640                 // Comprehensive coverage is OK for some languages.
641                 String languageType = xpp.findAttributeValue("language", "type");
642                 if (!SDI.getLanguageTcOrBasic().contains(languageType)) {
643                     continue;
644                 }
645             } else if (xpp.containsElement("script")) {
646                 // Skip user defined script codes and alt=short
647                 String scriptType = xpp.findAttributeValue("script", "type");
648                 if (scriptType.startsWith("Q")
649                         || "short".equals(xpp.findAttributeValue("script", "alt"))) {
650                     continue;
651                 }
652                 ScriptMetadata.Info scriptInfo = ScriptMetadata.getInfo(scriptType);
653                 if (scriptInfo == null
654                         || scriptInfo.idUsage != ScriptMetadata.IdUsage.RECOMMENDED) {
655                     continue;
656                 }
657                 if (script100.matcher(scriptType).matches()) {
658                     continue;
659                 }
660             } else if (xpp.containsElement("territory")) {
661                 String territoryType = xpp.findAttributeValue("territory", "type");
662                 if (territoryType.equals("CQ")) { // Exceptionally reserved by ISO-3166
663                     continue;
664                 }
665             } else if (xpp.containsElement("key")) {
666                 // Comprehensive coverage is OK for some key/types.
667                 String keyType = xpp.findAttributeValue("key", "type");
668                 if (keys100.matcher(keyType).matches()) {
669                     continue;
670                 }
671             } else if (xpp.containsElement("type")) {
672                 if ("short".equals(xpp.findAttributeValue("type", "alt"))) {
673                     continue;
674                 }
675                 // Comprehensive coverage is OK for some key/types.
676                 String keyType = xpp.findAttributeValue("type", "key");
677                 if (keys100.matcher(keyType).matches()) {
678                     continue;
679                 }
680                 if (keyType.equals("numbers")) {
681                     String ns = xpp.findAttributeValue("type", "type");
682                     if (numberingSystem100.matcher(ns).matches()) {
683                         continue;
684                     }
685                 }
686                 if (keyType.equals("collation")) {
687                     String ct = xpp.findAttributeValue("type", "type");
688                     if (collation100.matcher(ct).matches()) {
689                         continue;
690                     }
691                 }
692                 if (keyType.equals("calendar")) {
693                     String ct = xpp.findAttributeValue("type", "type");
694                     if (calendar100.matcher(ct).matches()) {
695                         continue;
696                     }
697                 }
698             } else if (xpp.containsElement("variant")) {
699                 // All variant names are comprehensive coverage
700                 continue;
701             } else if (path.startsWith("//ldml/dates/calendars")) {
702                 String calType = xpp.findAttributeValue("calendar", "type");
703                 if (!calType.matches("(gregorian|generic)")) {
704                     continue;
705                 }
706                 // So far we are generating datetimeSkeleton mechanically, no coverage
707                 if (xpp.containsElement("datetimeSkeleton")) {
708                     continue;
709                 }
710                 // The alt="ascii" time patterns are hopefully short-lived. We do not survey
711                 // for them, they can be generated mechanically from the non-alt patterns.
712                 // CLDR-16606
713                 if (path.contains("[@alt=\"ascii\"]")) {
714                     continue;
715                 }
716                 String element = xpp.getElement(-1);
717                 // Skip things that shouldn't normally exist in the generic calendar
718                 // days, dayPeriods, quarters, and months
719                 if (calType.equals("generic")) {
720                     if (element.matches("(day(Period)?|month|quarter|era|appendItem)")) {
721                         continue;
722                     }
723                     if (xpp.containsElement("intervalFormatItem")) {
724                         String intervalFormatID =
725                                 xpp.findAttributeValue("intervalFormatItem", "id");
726                         // "Time" related, so shouldn't be in generic calendar.
727                         if (intervalFormatID.matches("(h|H).*")) {
728                             continue;
729                         }
730                     }
731                     if (xpp.containsElement("dateFormatItem")) {
732                         String dateFormatID = xpp.findAttributeValue("dateFormatItem", "id");
733                         // "Time" related, so shouldn't be in generic calendar.
734                         if (dateFormatID.matches("E?(h|H|m).*")) {
735                             continue;
736                         }
737                     }
738                     if (xpp.containsElement("timeFormat")) {
739                         continue;
740                     }
741                 } else { // Gregorian calendar
742                     if (xpp.containsElement("eraNarrow")) {
743                         continue;
744                     }
745                     if (element.equals("appendItem")) {
746                         String request = xpp.findAttributeValue("appendItem", "request");
747                         if (!request.equals("Timezone")) {
748                             continue;
749                         }
750                     } else if (element.equals("dayPeriod")) {
751                         if ("variant".equals(xpp.findAttributeValue("dayPeriod", "alt"))) {
752                             continue;
753                         }
754                     } else if (element.equals("dateFormatItem")) {
755                         // ldml/dates/calendars/calendar[@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[@id='%dateFormatItems']
756                         assertEquals(path, Level.BASIC, lvl);
757                         continue;
758                     }
759                 }
760             } else if (path.startsWith("//ldml/units")) {
761                 // Skip paths for narrow unit fields.
762                 if ("narrow".equals(xpp.findAttributeValue("unitLength", "type"))
763                         || path.endsWith("/compoundUnitPattern1")) {
764                     continue;
765                 }
766             } else if (xpp.contains("posix")) {
767                 continue;
768             }
769 
770             errln("Comprehensive & no exception for path =>\t" + path);
771         }
772     }
773 
774     public static class TargetsAndSublocales {
775         public final CoverageVariableInfo cvi;
776         public Set<String> scripts;
777         public Set<String> regions;
778 
779         public TargetsAndSublocales(String localeLanguage) {
780             cvi = SDI.getCoverageVariableInfo(localeLanguage);
781             scripts = new TreeSet<>();
782             regions = new TreeSet<>();
783         }
784 
785         public boolean addScript(String localeScript) {
786             return scripts.add(localeScript);
787         }
788 
789         public boolean addRegion(String localeRegion) {
790             return regions.add(localeRegion);
791         }
792     }
793 
794     public void TestCoverageVariableInfo() {
795         /**
796          * Compare the targetScripts and targetTerritories for a language to what we actually have
797          * in locales
798          */
799         Map<String, TargetsAndSublocales> langToTargetsAndSublocales = new TreeMap<>();
800         org.unicode.cldr.util.Factory factory = testInfo.getCldrFactory();
801         for (CLDRLocale locale : factory.getAvailableCLDRLocales()) {
802             String language = locale.getLanguage();
803             if (language.length() == 0 || language.equals("root")) {
804                 continue;
805             }
806             TargetsAndSublocales targetsAndSublocales = langToTargetsAndSublocales.get(language);
807             if (targetsAndSublocales == null) {
808                 targetsAndSublocales = new TargetsAndSublocales(language);
809                 langToTargetsAndSublocales.put(language, targetsAndSublocales);
810             }
811             String script = locale.getScript();
812             if (script.length() > 0) {
813                 targetsAndSublocales.addScript(script);
814             }
815             String region = locale.getCountry();
816             if (region.length() > 0
817                     && region.length() < 3) { // do not want numeric codes like 001, 419
818                 targetsAndSublocales.addRegion(region);
819             }
820         }
821 
822         for (String language : langToTargetsAndSublocales.keySet()) {
823             TargetsAndSublocales targetsAndSublocales = langToTargetsAndSublocales.get(language);
824             if (targetsAndSublocales == null) {
825                 continue;
826             }
827             Set<String> targetScripts = new TreeSet<>(targetsAndSublocales.cvi.targetScripts);
828             Set<String> localeScripts = targetsAndSublocales.scripts;
829             localeScripts.removeAll(targetScripts);
830             if (localeScripts.size() > 0) {
831                 errln(
832                         "Missing scripts for language: "
833                                 + language
834                                 + ", target scripts: "
835                                 + targetScripts
836                                 + ", but locales also have: "
837                                 + localeScripts);
838             }
839             Set<String> targetRegions = new TreeSet<>(targetsAndSublocales.cvi.targetTerritories);
840             Set<String> localeRegions = targetsAndSublocales.regions;
841             localeRegions.removeAll(targetRegions);
842             if (localeRegions.size() > 0) {
843                 errln(
844                         "Missing regions for language: "
845                                 + language
846                                 + ", target regions: "
847                                 + targetRegions
848                                 + ", but locales also have: "
849                                 + localeRegions);
850             }
851         }
852     }
853 
testBreakingLogicalGrouping()854     public void testBreakingLogicalGrouping() {
855         checkBreakingLogicalGrouping("en");
856         checkBreakingLogicalGrouping("ar");
857         checkBreakingLogicalGrouping("de");
858         checkBreakingLogicalGrouping("pl");
859     }
860 
checkBreakingLogicalGrouping(String localeId)861     private void checkBreakingLogicalGrouping(String localeId) {
862         SupplementalDataInfo sdi = testInfo.getSupplementalDataInfo();
863         CLDRFile cldrFile = testInfo.getCldrFactory().make(localeId, true);
864         HashSet<String> seen = new HashSet<>();
865         Multimap<Level, String> levelToPaths = TreeMultimap.create();
866         int count = 0;
867         for (String path : cldrFile.fullIterable()) {
868             if (seen.contains(path)) {
869                 continue;
870             }
871             Set<String> grouping = LogicalGrouping.getPaths(cldrFile, path);
872             seen.add(path);
873             if (grouping == null) {
874                 continue;
875             }
876             seen.addAll(grouping);
877             levelToPaths.clear();
878             for (String groupingPath : grouping) {
879                 if (LogicalGrouping.isOptional(cldrFile, groupingPath)) {
880                     continue;
881                 }
882                 Level level = sdi.getCoverageLevel(groupingPath, localeId);
883                 levelToPaths.put(level, groupingPath);
884             }
885             if (levelToPaths.keySet().size() <= 1) {
886                 continue;
887             }
888             // we have a failure
889             for (Entry<Level, Collection<String>> entry : levelToPaths.asMap().entrySet()) {
890                 errln(
891                         localeId
892                                 + " ("
893                                 + count
894                                 + ") Broken Logical Grouping: "
895                                 + entry.getKey()
896                                 + " => "
897                                 + entry.getValue());
898             }
899             ++count;
900         }
901     }
902 
testLogicalGroupingSamples()903     public void testLogicalGroupingSamples() {
904         getLogger().fine(GrammarInfo.getGrammarLocales().toString());
905         String[][] test = {
906             {
907                 "de", "SINGLETON", "//ldml/localeDisplayNames/localeDisplayPattern/localePattern",
908             },
909             {
910                 "de",
911                 "METAZONE",
912                 "//ldml/dates/timeZoneNames/metazone[@type=\"Alaska\"]/long/generic",
913                 "//ldml/dates/timeZoneNames/metazone[@type=\"Alaska\"]/long/standard",
914                 "//ldml/dates/timeZoneNames/metazone[@type=\"Alaska\"]/long/daylight",
915             },
916             {
917                 "de",
918                 "DAYS",
919                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"sun\"]",
920                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"mon\"]",
921                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"tue\"]",
922                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"wed\"]",
923                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"thu\"]",
924                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"fri\"]",
925                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"sat\"]",
926             },
927             {
928                 "nl",
929                 "DAY_PERIODS",
930                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"morning1\"]",
931                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"afternoon1\"]",
932                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"evening1\"]",
933                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"night1\"]",
934                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"midnight\"]",
935             },
936             {
937                 "de",
938                 "QUARTERS",
939                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/quarters/quarterContext[@type=\"format\"]/quarterWidth[@type=\"wide\"]/quarter[@type=\"1\"]",
940                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/quarters/quarterContext[@type=\"format\"]/quarterWidth[@type=\"wide\"]/quarter[@type=\"2\"]",
941                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/quarters/quarterContext[@type=\"format\"]/quarterWidth[@type=\"wide\"]/quarter[@type=\"3\"]",
942                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/quarters/quarterContext[@type=\"format\"]/quarterWidth[@type=\"wide\"]/quarter[@type=\"4\"]",
943             },
944             {
945                 "de",
946                 "MONTHS",
947                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"1\"]",
948                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"2\"]",
949                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"3\"]",
950                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"4\"]",
951                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"5\"]",
952                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"6\"]",
953                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"7\"]",
954                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"8\"]",
955                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"9\"]",
956                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"10\"]",
957                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"11\"]",
958                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"12\"]",
959             },
960             {
961                 "de",
962                 "RELATIVE",
963                 "//ldml/dates/fields/field[@type=\"week-short\"]/relative[@type=\"-1\"]",
964                 "//ldml/dates/fields/field[@type=\"week-short\"]/relative[@type=\"0\"]",
965                 "//ldml/dates/fields/field[@type=\"week-short\"]/relative[@type=\"1\"]",
966             },
967             {
968                 "de",
969                 "DECIMAL_FORMAT_LENGTH",
970                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"one\"]",
971                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"other\"]",
972                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"one\"]",
973                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"other\"]",
974                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"one\"]",
975                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"other\"]",
976             },
977             {
978                 "cs",
979                 "COUNT",
980                 "//ldml/numbers/currencies/currency[@type=\"BMD\"]/displayName[@count=\"one\"]",
981                 "//ldml/numbers/currencies/currency[@type=\"BMD\"]/displayName[@count=\"few\"]",
982                 "//ldml/numbers/currencies/currency[@type=\"BMD\"]/displayName[@count=\"many\"]",
983                 "//ldml/numbers/currencies/currency[@type=\"BMD\"]/displayName[@count=\"other\"]",
984             },
985             {
986                 "de",
987                 "COUNT",
988                 "//ldml/numbers/minimalPairs/pluralMinimalPairs[@count=\"one\"]",
989                 "//ldml/numbers/minimalPairs/pluralMinimalPairs[@count=\"other\"]",
990             },
991             {
992                 "de",
993                 "COUNT_CASE",
994                 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"one\"][@case=\"accusative\"]",
995                 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"one\"][@case=\"dative\"]",
996                 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"one\"][@case=\"genitive\"]",
997                 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"one\"]",
998                 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"other\"][@case=\"accusative\"]",
999                 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"other\"][@case=\"dative\"]",
1000                 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"other\"][@case=\"genitive\"]",
1001                 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"other\"]",
1002             },
1003             {
1004                 "hi",
1005                 "COUNT_CASE_GENDER",
1006                 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"]",
1007                 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"]",
1008                 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"]",
1009                 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"]",
1010                 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@case=\"oblique\"]",
1011                 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"oblique\"]",
1012                 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@case=\"oblique\"]",
1013                 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"][@case=\"oblique\"]"
1014             }
1015         };
1016         Set<PathType> seenPt = new TreeSet<>(Arrays.asList(PathType.values()));
1017         for (String[] row : test) {
1018             String locale = row[0];
1019             PathType expectedPathType = PathType.valueOf(row[1]);
1020             CLDRFile cldrFile = testInfo.getCldrFactory().make(locale, true);
1021             List<String> paths = Arrays.asList(row);
1022             paths = paths.subList(2, paths.size());
1023             Set<String> expected = new TreeSet<>(paths);
1024             Set<Multimap<String, String>> seen = new LinkedHashSet<>();
1025             for (String path : expected) {
1026                 Set<String> grouping = new TreeSet<>(LogicalGrouping.getPaths(cldrFile, path));
1027                 final Multimap<String, String> deltaValue = delta(expected, grouping);
1028                 if (seen.add(deltaValue)) {
1029                     assertEquals(
1030                             "Logical group for " + locale + ", " + path,
1031                             ImmutableListMultimap.of(),
1032                             deltaValue);
1033                 }
1034                 PathType actualPathType = PathType.getPathTypeFromPath(path);
1035                 assertEquals("PathType", expectedPathType, actualPathType);
1036             }
1037             seenPt.remove(expectedPathType);
1038         }
1039         assertEquals("PathTypes tested", Collections.emptySet(), seenPt);
1040     }
1041 
delta(Set<String> expected, Set<String> grouping)1042     private Multimap<String, String> delta(Set<String> expected, Set<String> grouping) {
1043         if (expected.equals(grouping)) {
1044             return ImmutableListMultimap.of();
1045         }
1046         Multimap<String, String> result = LinkedHashMultimap.create();
1047         TreeSet<String> aMinusB = new TreeSet<>(expected);
1048         aMinusB.removeAll(grouping);
1049         result.putAll("expected-actual", aMinusB);
1050         TreeSet<String> bMinusA = new TreeSet<>(grouping);
1051         bMinusA.removeAll(expected);
1052         result.putAll("actual-expected", bMinusA);
1053         return result;
1054     }
1055 
1056     static class CoverageStatus {
1057 
1058         private Level level;
1059         private boolean inRoot;
1060         private boolean inId;
1061         private Level languageLevel;
1062         private String displayName;
1063 
CoverageStatus( Level level, boolean inRoot, boolean inId, Level languageLevel, String displayName)1064         public CoverageStatus(
1065                 Level level,
1066                 boolean inRoot,
1067                 boolean inId,
1068                 Level languageLevel,
1069                 String displayName) {
1070             this.level = level;
1071             this.inRoot = inRoot;
1072             this.inId = inId;
1073             this.languageLevel = languageLevel == null ? Level.UNDETERMINED : languageLevel;
1074             this.displayName = displayName;
1075         }
1076 
1077         @Override
toString()1078         public String toString() {
1079             return (inRoot ? "root" : "x")
1080                     + "\t"
1081                     + (inId ? "ids" : "x")
1082                     + "\t"
1083                     + stringForm(languageLevel)
1084                     + "\t"
1085                     + stringForm(level)
1086                     + "\t"
1087                     + displayName;
1088         }
1089 
stringForm(Level level2)1090         private String stringForm(Level level2) {
1091             if (level == null) {
1092                 return "υnd";
1093             }
1094             switch (level2) {
1095                 case UNDETERMINED:
1096                     return "υnd";
1097                 case COMPREHENSIVE:
1098                     return "ϲomp";
1099                 default:
1100                     return level2.toString();
1101             }
1102         }
1103     }
1104 
testLSR()1105     public void testLSR() {
1106         SupplementalDataInfo supplementalData = testInfo.getSupplementalDataInfo();
1107         org.unicode.cldr.util.Factory factory = testInfo.getCldrFactory();
1108         CLDRFile root = factory.make(LocaleNames.ROOT, true);
1109         CoverageLevel2 coverageLevel =
1110                 CoverageLevel2.getInstance(supplementalData, "qtz"); // non-existent locale
1111 
1112         Set<String> langsRoot = new TreeSet<>();
1113         Set<String> scriptsRoot = new TreeSet<>();
1114         Set<String> regionsRoot = new TreeSet<>();
1115 
1116         // Get root LSR codes
1117 
1118         for (String path : root) {
1119             if (!path.startsWith("//ldml/localeDisplayNames/")) {
1120                 continue;
1121             }
1122             XPathParts parts = XPathParts.getFrozenInstance(path);
1123             String code = parts.getAttributeValue(3, "type");
1124             if (code == null || code.contains("_")) {
1125                 continue;
1126             }
1127             switch (parts.getElement(3)) {
1128                 case "language":
1129                     langsRoot.add(code);
1130                     break;
1131                 case "script":
1132                     scriptsRoot.add(code);
1133                     break;
1134                 case "territory":
1135                     regionsRoot.add(code);
1136                     break;
1137             }
1138         }
1139         langsRoot = ImmutableSet.copyOf(langsRoot);
1140         scriptsRoot = ImmutableSet.copyOf(scriptsRoot);
1141         regionsRoot = ImmutableSet.copyOf(regionsRoot);
1142 
1143         // get CLDR locale IDs' codes
1144 
1145         // the maps are from codes (like en) to the best level in the CLDR Organization.
1146         Map<String, Level> langs = new TreeMap<>();
1147         Map<String, Level> scripts = new TreeMap<>();
1148         Map<String, Level> regions = new TreeMap<>();
1149         LikelySubtags likely = new LikelySubtags();
1150 
1151         LanguageTagParser ltp = new LanguageTagParser();
1152         for (String locale : factory.getAvailable()) {
1153             Level languageLevel = STANDARD_CODES.getLocaleCoverageLevel(Organization.cldr, locale);
1154             if (languageLevel == null || languageLevel == Level.UNDETERMINED) {
1155                 languageLevel = Level.CORE;
1156             }
1157             ltp.set(locale);
1158             likely.maximize(ltp);
1159             addBestLevel(langs, ltp.getLanguage(), languageLevel);
1160             addBestLevel(scripts, ltp.getScript(), languageLevel);
1161             addBestLevel(regions, ltp.getRegion(), languageLevel);
1162         }
1163         regions.remove("");
1164         scripts.remove("");
1165 
1166         // get the data
1167 
1168         Map<String, CoverageStatus> data = new TreeMap<>();
1169 
1170         // This is a map from integers (representing language, script or region; should rewrite to
1171         // use enums)
1172         // to a row of data:
1173         //      name,
1174         //      map code => best cldr org level,
1175         //      codes in root
1176         //      expected coverage levels levels
1177         // should change the row of data into a class; would be much easier to understand
1178 
1179         ImmutableMap<Integer, R4<String, Map<String, Level>, Set<String>, Level>> typeToInfo =
1180                 ImmutableMap.of(
1181                         CLDRFile.LANGUAGE_NAME,
1182                         Row.of("language", langs, langsRoot, Level.MODERN),
1183                         CLDRFile.SCRIPT_NAME,
1184                         Row.of("script", scripts, scriptsRoot, Level.MODERATE),
1185                         CLDRFile.TERRITORY_NAME,
1186                         Row.of("region", regions, regionsRoot, Level.MODERATE));
1187 
1188         for (Entry<Integer, R4<String, Map<String, Level>, Set<String>, Level>> typeAndInfo :
1189                 typeToInfo.entrySet()) {
1190             int type = typeAndInfo.getKey();
1191             String name = typeAndInfo.getValue().get0();
1192             Map<String, Level> idPartMap =
1193                     typeAndInfo.getValue().get1(); // map from code to best cldr level
1194             Set<String> setRoot = typeAndInfo.getValue().get2(); // set of codes in root
1195             Level targetLevel =
1196                     typeAndInfo.getValue().get3(); // it looks like the targetLevel is ignored
1197 
1198             for (String code : Sets.union(idPartMap.keySet(), setRoot)) {
1199                 String displayName = testInfo.getEnglish().getName(type, code);
1200                 String path = CLDRFile.getKey(type, code);
1201                 Level level = coverageLevel.getLevel(path);
1202                 data.put(
1203                         name + "\t" + code,
1204 
1205                         // Level level;
1206                         // boolean inRoot;
1207                         // boolean inId;
1208                         // Level languageLevel; best in cldr org
1209                         // String displayName;
1210                         new CoverageStatus(
1211                                 level,
1212                                 setRoot.contains(code),
1213                                 idPartMap.containsKey(code),
1214                                 idPartMap.get(code),
1215                                 displayName));
1216             }
1217         }
1218         if (SHOW_LSR_DATA) {
1219 
1220             System.out.println(
1221                     "\nType\tCode\tIn Root\tIn CLDR Locales\tCLDR TargeLevel\tRoot Path Level\tCombinations");
1222             for (Entry<String, CoverageStatus> entry : data.entrySet()) {
1223                 System.out.println(entry.getKey() + "\t" + entry.getValue());
1224             }
1225             System.out.println();
1226             for (Entry<String, CoverageStatus> entry : data.entrySet()) {
1227                 final String key = entry.getKey();
1228                 if (!key.startsWith("language")) {
1229                     continue;
1230                 }
1231                 final CoverageStatus value = entry.getValue();
1232                 if (value.inId) {
1233                     continue;
1234                 }
1235                 String[] parts = key.split("\t");
1236                 PopulationData population = SDI.getBaseLanguagePopulationData(parts[1]);
1237                 if (population == null) {
1238                     System.out.println(key + "\t" + value.displayName + "\t" + value + "\t-1\t-1");
1239                 } else {
1240                     System.out.println(
1241                             key
1242                                     + "\t"
1243                                     + value.displayName
1244                                     + "\t"
1245                                     + value
1246                                     + "\t"
1247                                     + population.getPopulation()
1248                                     + "\t"
1249                                     + population.getLiteratePopulation());
1250                 }
1251             }
1252         }
1253 
1254         // just check languages
1255         Set<String> ids = new TreeSet<>();
1256         Set<String> missing = new TreeSet<>();
1257         for (Entry<String, CoverageStatus> entry : data.entrySet()) {
1258             final String key = entry.getKey();
1259             if (!key.startsWith("language")) {
1260                 continue;
1261             }
1262             final CoverageStatus value = entry.getValue();
1263             if (value.inId) {
1264                 String[] parts = key.split("\t"); // split into language and code
1265                 ids.add(parts[1]);
1266                 if (!value.inRoot) {
1267                     missing.add(parts[1]);
1268                 }
1269             }
1270         }
1271         if (!assertEquals(
1272                 "Language subtags in a locale's ID must be in one of the attributeValueValidity.xml $language* sets, typically $languageNonTcLtBasic  ("
1273                         + missing.size()
1274                         + ")",
1275                 "",
1276                 Joiner.on(' ').join(missing))) {
1277             warnln(
1278                     "Full set for resetting $language in attributeValueValidity.xml ("
1279                             + ids.size()
1280                             + "):"
1281                             + breakLines(ids, "\n                "));
1282         }
1283     }
1284 
breakLines(Set<String> ids, String indent)1285     private String breakLines(Set<String> ids, String indent) {
1286         StringBuilder result = new StringBuilder();
1287         int lastFirstChar = 0;
1288         for (String id : ids) {
1289             int firstChar = id.codePointAt(0);
1290             result.append(firstChar == lastFirstChar ? " " : indent);
1291             result.append(id);
1292             lastFirstChar = firstChar;
1293         }
1294         return result.toString();
1295     }
1296 
addBestLevel(Map<String, Level> codeToBestLevel, String code, Level level)1297     private void addBestLevel(Map<String, Level> codeToBestLevel, String code, Level level) {
1298         if (level != Level.UNDETERMINED) {
1299             int debug = 0;
1300         }
1301         Level old = codeToBestLevel.get(code);
1302         if (old == null) {
1303             codeToBestLevel.put(code, level);
1304         } else if (level.compareTo(old) > 0) {
1305             codeToBestLevel.put(code, level);
1306         } else if (level != old) {
1307             int debug = 0;
1308         }
1309     }
1310 
TestEnglishCoverage()1311     public void TestEnglishCoverage() {
1312         Output<String> pathWhereFound = new Output<>();
1313         Output<String> localeWhereFound = new Output<>();
1314         Set<Row.R5<String, String, Boolean, Boolean, Level>> inherited = new TreeSet<>();
1315         for (String path : ENGLISH) {
1316             String value = ENGLISH.getStringValueWithBailey(path, pathWhereFound, localeWhereFound);
1317             final boolean samePath = path.equals(pathWhereFound.value);
1318             final boolean sameLocale = "en".equals(localeWhereFound.value);
1319             if (!samePath) {
1320                 Level level = SDI.getCoverageLevel(path, "en");
1321                 if (level.compareTo(Level.MODERN) <= 0) {
1322                     inherited.add(Row.of(path, value, samePath, sameLocale, level));
1323                 }
1324             }
1325         }
1326         if (!assertEquals("English has sideways inheritance:", 0, inherited.size())) {
1327             System.out.println("Check the following, then use in modify_config.txt\n");
1328             String pattern = "locale=en ; action=add ; new_path=%s ; new_value=%s";
1329             for (Row.R5<String, String, Boolean, Boolean, Level> row : inherited) {
1330                 System.out.println(String.format(pattern, row.get0(), row.get1()));
1331                 if (DEBUG) {
1332                     System.out.println(
1333                             String.format(
1334                                     "%s\t%s\t%s\t%s\t%s",
1335                                     row.get0(), row.get1(), row.get2(), row.get3(), row.get4()));
1336                 }
1337             }
1338         }
1339     }
1340 
TestNumberElementsCoverage()1341     public void TestNumberElementsCoverage() {
1342         class NumPathCoverageItem {
1343             public String numPath;
1344             public Level defaultLevel;
1345             public Level nativeLevel;
1346             public Level financeLevel;
1347 
1348             public NumPathCoverageItem(
1349                     String path, Level defLevel, Level natLevel, Level finLevel) {
1350                 numPath = path;
1351                 defaultLevel = defLevel;
1352                 nativeLevel = natLevel;
1353                 financeLevel = finLevel;
1354             }
1355         }
1356         final NumPathCoverageItem[] testItems = {
1357             // number element path, then expected max coverage levels if  xxxx is replaced
1358             // respectively by the default, native, and financial number system.
1359             new NumPathCoverageItem(
1360                     "//ldml/numbers/currencyFormats[@numberSystem=\"xxxx\"]/currencyFormatLength/currencyFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1361                     Level.MODERATE,
1362                     Level.MODERATE,
1363                     Level.MODERATE),
1364             new NumPathCoverageItem(
1365                     "//ldml/numbers/decimalFormats[@numberSystem=\"xxxx\"]/decimalFormatLength/decimalFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1366                     Level.MODERATE,
1367                     Level.MODERATE,
1368                     Level.MODERATE),
1369             new NumPathCoverageItem(
1370                     "//ldml/numbers/symbols[@numberSystem=\"xxxx\"]/decimal",
1371                     Level.MODERATE,
1372                     Level.MODERATE,
1373                     Level.MODERATE),
1374             new NumPathCoverageItem(
1375                     "//ldml/numbers/symbols[@numberSystem=\"xxxx\"]/group",
1376                     Level.MODERATE,
1377                     Level.MODERATE,
1378                     Level.MODERATE),
1379             new NumPathCoverageItem(
1380                     "//ldml/numbers/symbols[@numberSystem=\"xxxx\"]/infinity",
1381                     Level.MODERN,
1382                     Level.MODERN,
1383                     Level.MODERN),
1384             new NumPathCoverageItem(
1385                     "//ldml/numbers/symbols[@numberSystem=\"xxxx\"]/perMille",
1386                     Level.MODERN,
1387                     Level.MODERN,
1388                     Level.MODERN),
1389         };
1390         org.unicode.cldr.util.Factory factory = testInfo.getCldrFactory();
1391         for (String localeId : factory.getAvailable()) {
1392             CLDRFile cldrFile = factory.make(localeId, true);
1393             String defaultNumberSystem =
1394                     cldrFile.getStringValue("//ldml/numbers/defaultNumberingSystem");
1395             String nativeNumberSystem =
1396                     cldrFile.getStringValue("//ldml/numbers/otherNumberingSystems/native");
1397             String financeNumberSystem =
1398                     cldrFile.getStringValue(
1399                             "//ldml/numbers/otherNumberingSystems/finance"); // could be null
1400             for (NumPathCoverageItem item : testItems) {
1401                 String pathForDefault = item.numPath.replace("xxxx", defaultNumberSystem);
1402                 Level defaultLevel = SDI.getCoverageLevel(pathForDefault, localeId);
1403                 if (defaultLevel.compareTo(item.defaultLevel) > 0) {
1404                     errln(
1405                             localeId
1406                                     + ", path "
1407                                     + pathForDefault
1408                                     + ", expected coverage for default system to be "
1409                                     + item.defaultLevel.toString()
1410                                     + " or lower, but got "
1411                                     + defaultLevel.toString());
1412                 }
1413                 String pathForNative = item.numPath.replace("xxxx", nativeNumberSystem);
1414                 Level nativeLevel = SDI.getCoverageLevel(pathForNative, localeId);
1415                 if (nativeLevel.compareTo(item.nativeLevel) > 0) {
1416                     errln(
1417                             localeId
1418                                     + ", path "
1419                                     + pathForNative
1420                                     + ", expected coverage for native system to be "
1421                                     + item.nativeLevel.toString()
1422                                     + " or lower, but got "
1423                                     + nativeLevel.toString());
1424                 }
1425                 if (financeNumberSystem != null) {
1426                     String pathForFinance = item.numPath.replace("xxxx", financeNumberSystem);
1427                     Level financeLevel = SDI.getCoverageLevel(pathForFinance, localeId);
1428                     if (financeLevel.compareTo(item.financeLevel) > 0) {
1429                         errln(
1430                                 localeId
1431                                         + ", path "
1432                                         + pathForFinance
1433                                         + ", expected coverage for finance system to be "
1434                                         + item.financeLevel.toString()
1435                                         + " or lower, but got "
1436                                         + financeLevel.toString());
1437                     }
1438                 }
1439             }
1440         }
1441     }
1442 }
1443