• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.unittest;
2 
3 import java.util.ArrayList;
4 import java.util.Arrays;
5 import java.util.Collection;
6 import java.util.Collections;
7 import java.util.Date;
8 import java.util.EnumMap;
9 import java.util.EnumSet;
10 import java.util.HashMap;
11 import java.util.HashSet;
12 import java.util.Iterator;
13 import java.util.LinkedHashMap;
14 import java.util.LinkedHashSet;
15 import java.util.List;
16 import java.util.Locale;
17 import java.util.Map;
18 import java.util.Map.Entry;
19 import java.util.Set;
20 import java.util.TreeMap;
21 import java.util.TreeSet;
22 import java.util.logging.Logger;
23 import java.util.regex.Matcher;
24 import java.util.regex.Pattern;
25 
26 import org.unicode.cldr.draft.ScriptMetadata;
27 import org.unicode.cldr.test.CoverageLevel2;
28 import org.unicode.cldr.tool.LikelySubtags;
29 import org.unicode.cldr.tool.PluralMinimalPairs;
30 import org.unicode.cldr.tool.PluralRulesFactory;
31 import org.unicode.cldr.util.Builder;
32 import org.unicode.cldr.util.CLDRConfig;
33 import org.unicode.cldr.util.CLDRFile;
34 import org.unicode.cldr.util.CLDRFile.WinningChoice;
35 import org.unicode.cldr.util.CLDRLocale;
36 import org.unicode.cldr.util.CldrUtility;
37 import org.unicode.cldr.util.GrammarInfo;
38 import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature;
39 import org.unicode.cldr.util.GrammarInfo.GrammaticalScope;
40 import org.unicode.cldr.util.GrammarInfo.GrammaticalTarget;
41 import org.unicode.cldr.util.Iso639Data;
42 import org.unicode.cldr.util.Iso639Data.Scope;
43 import org.unicode.cldr.util.IsoCurrencyParser;
44 import org.unicode.cldr.util.LanguageTagCanonicalizer;
45 import org.unicode.cldr.util.LanguageTagParser;
46 import org.unicode.cldr.util.Level;
47 import org.unicode.cldr.util.Organization;
48 import org.unicode.cldr.util.Pair;
49 import org.unicode.cldr.util.PluralRanges;
50 import org.unicode.cldr.util.PreferredAndAllowedHour;
51 import org.unicode.cldr.util.PreferredAndAllowedHour.HourStyle;
52 import org.unicode.cldr.util.StandardCodes;
53 import org.unicode.cldr.util.StandardCodes.CodeType;
54 import org.unicode.cldr.util.StandardCodes.LstrType;
55 import org.unicode.cldr.util.SupplementalDataInfo;
56 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData;
57 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type;
58 import org.unicode.cldr.util.SupplementalDataInfo.ContainmentStyle;
59 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo;
60 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyNumberInfo;
61 import org.unicode.cldr.util.SupplementalDataInfo.DateRange;
62 import org.unicode.cldr.util.SupplementalDataInfo.MetaZoneRange;
63 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus;
64 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
65 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
66 import org.unicode.cldr.util.SupplementalDataInfo.PluralType;
67 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData;
68 import org.unicode.cldr.util.SupplementalDataInfo.SampleList;
69 import org.unicode.cldr.util.Validity;
70 import org.unicode.cldr.util.Validity.Status;
71 
72 import com.google.common.base.Joiner;
73 import com.google.common.collect.ImmutableSet;
74 import com.google.common.collect.Multimap;
75 import com.google.common.collect.TreeMultimap;
76 import com.ibm.icu.impl.Relation;
77 import com.ibm.icu.impl.Row;
78 import com.ibm.icu.impl.Row.R2;
79 import com.ibm.icu.impl.Row.R3;
80 import com.ibm.icu.impl.Utility;
81 import com.ibm.icu.lang.UCharacter;
82 import com.ibm.icu.lang.UCharacterEnums;
83 import com.ibm.icu.lang.UScript;
84 import com.ibm.icu.text.PluralRules;
85 import com.ibm.icu.text.PluralRules.FixedDecimal;
86 import com.ibm.icu.text.PluralRules.FixedDecimalRange;
87 import com.ibm.icu.text.PluralRules.FixedDecimalSamples;
88 import com.ibm.icu.text.PluralRules.Operand;
89 import com.ibm.icu.text.PluralRules.SampleType;
90 import com.ibm.icu.text.StringTransform;
91 import com.ibm.icu.text.UnicodeSet;
92 import com.ibm.icu.util.Output;
93 import com.ibm.icu.util.TimeZone;
94 import com.ibm.icu.util.ULocale;
95 
96 public class TestSupplementalInfo extends TestFmwkPlus {
97     static CLDRConfig testInfo = CLDRConfig.getInstance();
98 
99     private static final StandardCodes STANDARD_CODES = StandardCodes.make();
100 
101     private static final SupplementalDataInfo SUPPLEMENTAL = testInfo
102         .getSupplementalDataInfo();
103 
main(String[] args)104     public static void main(String[] args) {
105         new TestSupplementalInfo().run(args);
106     }
107 
TestPluralSampleOrder()108     public void TestPluralSampleOrder() {
109         HashSet<PluralInfo> seen = new HashSet<>();
110         for (String locale : SUPPLEMENTAL.getPluralLocales()) {
111             if (locale.equals("root")) {
112                 continue;
113             }
114             PluralInfo pi = SUPPLEMENTAL.getPlurals(locale);
115             if (seen.contains(pi)) {
116                 continue;
117             }
118             seen.add(pi);
119             for (SampleType s : SampleType.values()) {
120                 for (Count c : pi.getCounts(s)) {
121                     FixedDecimalSamples sSamples = pi.getPluralRules()
122                         .getDecimalSamples(c.toString(), s);
123                     if (sSamples == null) {
124                         errln(locale + " no sample for " + c);
125                         continue;
126                     }
127                     if (s == SampleType.DECIMAL) {
128                         continue; // skip
129                     }
130                     FixedDecimalRange lastSample = null;
131                     for (FixedDecimalRange sample : sSamples.samples) {
132                         if (lastSample != null) {
133                             if (compare(lastSample.start,sample.start) > 0) {
134                                 errln(locale + ":" + c + ": out of order with "
135                                     + lastSample + " > " + sample);
136                             } else if (false) {
137                                 logln(locale + ":" + c + ": in order with "
138                                     + lastSample + " < " + sample);
139                             }
140                         }
141                         lastSample = sample;
142                     }
143                 }
144             }
145         }
146     }
147 
148     /**
149      * Hack until ICU's FixedDecimal is fixed
150      *
151      */
compare(PluralRules.FixedDecimal me, PluralRules.FixedDecimal other)152     public static int compare(PluralRules.FixedDecimal me, PluralRules.FixedDecimal other) {
153         if (me.getPluralOperand(Operand.e) != other.getPluralOperand(Operand.e)) {
154             return me.getPluralOperand(Operand.e) < other.getPluralOperand(Operand.e) ? -1 : 1;
155         }
156         if (me.getIntegerValue() != other.getIntegerValue()) {
157             return me.getIntegerValue() < other.getIntegerValue() ? -1 : 1;
158         }
159         if (me.getSource() != other.getSource()) {
160             return me.getSource() < other.getSource() ? -1 : 1;
161         }
162         if (me.getVisibleDecimalDigitCount() != other.getVisibleDecimalDigitCount()) {
163             return me.getVisibleDecimalDigitCount() < other.getVisibleDecimalDigitCount() ? -1 : 1;
164         }
165         long diff = me.getDecimalDigits() - other.getDecimalDigits();
166         if (diff != 0) {
167             return diff < 0 ? -1 : 1;
168         }
169         return 0;
170     }
171 
TestPluralRanges()172     public void TestPluralRanges() {
173         PluralRulesFactory prf = PluralRulesFactory.getInstance(SUPPLEMENTAL);
174         Set<String> localesToTest = new TreeSet<>(
175             SUPPLEMENTAL.getPluralRangesLocales());
176         for (String locale : StandardCodes.make().getLocaleCoverageLocales(
177             "google")) { // superset
178             if (locale.equals("*") || locale.contains("_")) {
179                 continue;
180             }
181             localesToTest.add(locale);
182         }
183         Set<String> modernLocales = StandardCodes.make()
184             .getLocaleCoverageLocales(Organization.cldr,
185                 EnumSet.of(Level.MODERN));
186 
187         Output<FixedDecimal> maxSample = new Output<>();
188         Output<FixedDecimal> minSample = new Output<>();
189 
190         for (String locale : localesToTest) {
191             final String templateLine = "Template for " + ULocale.getDisplayName(locale, "en") + " (" + locale + ") translators to fix:";
192             PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(locale);
193             Set<Count> counts = pluralInfo.getCounts();
194 
195             final PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(new ULocale(locale).toString());
196 
197             // check that there are no null values
198             PluralRanges pluralRanges = SUPPLEMENTAL.getPluralRanges(locale);
199             if (pluralRanges == null) {
200                 if (!modernLocales.contains(locale)) {
201                     logln("Missing plural ranges for " + locale);
202                 } else {
203                     errOrLog(CoverageIssue.error, locale + "\tMissing plural ranges", "Cldrbug:7839", "Missing plural data for modern locales");
204                     StringBuilder failureCases = new StringBuilder(templateLine);
205                     for (Count start : counts) {
206                         for (Count end : counts) {
207                             pluralInfo.rangeExists(start, end, minSample, maxSample);
208                             final String rangeLine = getRangeLine(start, end, null, maxSample, minSample, samplePatterns);
209                             failureCases.append("\n" + locale + "\t" + rangeLine);
210                         }
211                     }
212                     errOrLog(CoverageIssue.warn, failureCases.toString());
213                 }
214                 continue;
215             }
216             EnumSet<Count> found = EnumSet.noneOf(Count.class);
217             for (Count count : Count.values()) {
218                 if (pluralRanges.isExplicitlySet(count)
219                     && !counts.contains(count)) {
220                     assertTrue(
221                         locale
222                         + "\t pluralRanges categories must be valid for locale:\t"
223                         + count + " must be in " + counts,
224                         !pluralRanges.isExplicitlySet(count));
225                 }
226                 for (Count end : Count.values()) {
227                     Count result = pluralRanges.getExplicit(count, end);
228                     if (result != null) {
229                         found.add(result);
230                     }
231                 }
232             }
233 
234             // check empty range results
235             if (found.isEmpty()) {
236                 errOrLog(CoverageIssue.error, "Empty range results for " + locale, "Cldrbug:7839", "Missing plural data for modern locales");
237             } else {
238                 if (samplePatterns == null) {
239                     errOrLog(CoverageIssue.error, locale + "\tMissing sample patterns", "Cldrbug:7839", "Missing plural data for modern locales");
240                 } else {
241                     for (Count result : found) {
242                         String samplePattern = samplePatterns.get(
243                             PluralRules.PluralType.CARDINAL, result);
244                         if (samplePattern != null && !samplePattern.contains("{0}")) {
245                             errln("Plural Ranges cannot have results that don't use {0} in samples: "
246                                 + locale
247                                 + ", "
248                                 + result
249                                 + "\t«"
250                                 + samplePattern + "»");
251                         }
252                     }
253                 }
254                 if (isVerbose()) {
255                     logln("Range results for " + locale + ":\t" + found);
256                 }
257             }
258 
259             // check for missing values
260             boolean failure = false;
261             StringBuilder failureCases = new StringBuilder(templateLine);
262             for (Count start : counts) {
263                 for (Count end : counts) {
264                     boolean needsValue = pluralInfo.rangeExists(start, end,
265                         minSample, maxSample);
266                     Count explicitValue = pluralRanges.getExplicit(start, end);
267                     final String rangeLine = getRangeLine(start, end, explicitValue, maxSample, minSample, samplePatterns);
268                     failureCases.append("\n" + locale + "\t" + rangeLine);
269                     if (needsValue && explicitValue == null) {
270                         errOrLog(CoverageIssue.error, locale + "\tNo explicit value for range: "
271                             + rangeLine,
272                             "Cldrbug:7839", "Missing plural data for modern locales");
273                         failure = true;
274                         failureCases.append("\tError — need explicit result");
275                     } else if (!needsValue && explicitValue != null) {
276                         errOrLog(CoverageIssue.error, locale + "\tDoesn't need explicit value, but has one: "
277                             + PluralRanges.showRange(start, end, explicitValue),
278                             "Cldrbug:7839", "Missing plural data for modern locales");
279                         failureCases.append("\tUnnecessary");
280                         failure = true;
281                     } else {
282                         failureCases.append("\tOK");
283                     }
284                 }
285             }
286             if (failure) {
287                 errOrLog(CoverageIssue.warn, failureCases.toString());
288             }
289         }
290     }
291 
getRangeLine(Count start, Count end, Count result, Output<FixedDecimal> maxSample, Output<FixedDecimal> minSample, PluralMinimalPairs samplePatterns)292     private String getRangeLine(Count start, Count end, Count result,
293         Output<FixedDecimal> maxSample, Output<FixedDecimal> minSample,
294         PluralMinimalPairs samplePatterns) {
295         final String range = minSample + "–" + maxSample;
296         String example = range;
297         if (samplePatterns != null) {
298             example = "";
299             if (result != null) {
300                 String pat = samplePatterns.get(PluralRules.PluralType.CARDINAL, result);
301                 example += "«" + (pat == null ? "MISSING-PATTERN" : pat.replace("{0}", range)) + "»";
302             } else {
303                 for (Count c : new TreeSet<>(Arrays.asList(start, end, Count.other))) {
304                     String pat = samplePatterns.get(PluralRules.PluralType.CARDINAL, c);
305                     example += c + ":«" + (pat == null ? "MISSING-PATTERN" : pat.replace("{0}", range)) + "»" + "?\tOR ";
306                 }
307                 example += " …";
308             }
309         }
310         return start + "\t" + end + "\t" + (result == null ? "?" : result.toString()) + "\t" + example;
311     }
312 
getRangeLine(Count count, PluralRules pluralRules, String pattern)313     private String getRangeLine(Count count, PluralRules pluralRules, String pattern) {
314         String sample = "?";
315         FixedDecimalSamples exampleList = pluralRules.getDecimalSamples(count.toString(), PluralRules.SampleType.INTEGER);
316         if (exampleList == null) {
317             exampleList = pluralRules.getDecimalSamples(count.toString(), PluralRules.SampleType.DECIMAL);
318         }
319         FixedDecimal sampleDecimal = PluralInfo.getNonZeroSampleIfPossible(exampleList);
320         sample = sampleDecimal.toString();
321 
322         String example = pattern == null ? "NO-SAMPLE!" : "«" + pattern.replace("{0}", sample) + "»";
323         return count + "\t" + example;
324     }
325 
TestPluralSamples()326     public void TestPluralSamples() {
327         String[][] test = { { "en", "ordinal", "1", "one" },
328             { "en", "ordinal", "2", "two" },
329             { "en", "ordinal", "3", "few" },
330             { "en", "ordinal", "4", "other" },
331             { "sl", "cardinal", "2", "two" }, };
332         for (String[] row : test) {
333             checkPluralSamples(row);
334         }
335     }
336 
TestPluralSamples2()337     public void TestPluralSamples2() {
338         PluralRulesFactory prf = PluralRulesFactory.getInstance(SUPPLEMENTAL);
339         for (String locale : prf.getLocales()) {
340             if (locale.equals("und")) {
341                 continue;
342             }
343             if (locale.equals("pl")) {
344                 int debug = 0;
345             }
346             final PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(locale);
347             for (PluralRules.PluralType type : PluralRules.PluralType.values()) {
348                 PluralInfo rules = SUPPLEMENTAL.getPlurals(
349                     SupplementalDataInfo.PluralType.fromStandardType(type),
350                     locale.toString());
351                 if (rules.getCounts().size() == 1) {
352                     continue; // don't require rules for unary cases
353                 }
354                 Multimap<String, Count> sampleToCount = TreeMultimap.create();
355 
356                 for (Count count : rules.getCounts()) {
357                     String sample = samplePatterns.get(type, count);
358                     if (sample == null) {
359                         errOrLog(CoverageIssue.error, locale + "\t" + type + " \tmissing samples for " + count, "cldrbug:7075",
360                             "Missing ordinal minimal pairs");
361                     } else {
362                         sampleToCount.put(sample, count);
363                         PluralRules pRules = rules.getPluralRules();
364                         double unique = pRules.getUniqueKeywordValue(count
365                             .toString());
366                         if (unique == PluralRules.NO_UNIQUE_VALUE
367                             && !sample.contains("{0}")) {
368                             errln("Missing {0} in sample: " + locale + ", " + type + ", " + count + " «" + sample + "»");
369                         }
370                     }
371                 }
372                 for (Entry<String, Collection<Count>> entry : sampleToCount.asMap().entrySet()) {
373                     if (entry.getValue().size() > 1) {
374                         errln("Colliding minimal pair samples: " + locale + ", " + type + ", " + entry.getValue() + " «" + entry.getKey() + "»");
375                     }
376                 }
377             }
378         }
379     }
380 
TestCldrScriptCodes()381     public void TestCldrScriptCodes() {
382         Set<String> codes = SUPPLEMENTAL.getCLDRScriptCodes();
383 
384         Set<String> unicodeScripts = ScriptMetadata.getScripts();
385         assertRelation("getCLDRScriptCodes contains Unicode Scripts", true, codes, CONTAINS_ALL, unicodeScripts);
386 
387         ImmutableSet<String> allSpecials = ImmutableSet.of("Zinh", "Zmth", "Zsye", "Zsym", "Zxxx", "Zyyy", "Zzzz");
388         assertRelation("getCLDRScriptCodes contains allSpecials", true, codes, CONTAINS_ALL, allSpecials);
389 
390         ImmutableSet<String> allCompos = ImmutableSet.of("Hanb", "Hrkt", "Jamo", "Jpan", "Kore");
391         assertRelation("getCLDRScriptCodes contains allCompos", true, codes, CONTAINS_ALL, allCompos);
392 
393         Map<Status, Set<String>> scripts = Validity.getInstance().getStatusToCodes(LstrType.script);
394         for (Entry<Status, Set<String>> e : scripts.entrySet()) {
395             switch (e.getKey()) {
396             case regular:
397             case special:
398             case unknown:
399                 assertRelation("getCLDRScriptCodes contains " + e.getKey(), true, codes, CONTAINS_ALL, e.getValue());
400                 break;
401             default:
402                 break; // do nothin
403             }
404         }
405 
406         ImmutableSet<String> variants = ImmutableSet.of("Cyrs", "Geok", "Latf", "Latg", "Syre", "Syrj", "Syrn");
407         assertRelation("getCLDRScriptCodes contains variants", false, codes, CONTAINS_SOME, variants);
408     }
409 
checkPluralSamples(String... row)410     public void checkPluralSamples(String... row) {
411         PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(
412             PluralType.valueOf(row[1]), row[0]);
413         Count count = pluralInfo.getCount(new FixedDecimal(row[2]));
414         assertEquals(String.join(", ", row),
415             Count.valueOf(row[3]), count);
416     }
417 
TestPluralLocales()418     public void TestPluralLocales() {
419         // get the unique rules
420         for (PluralType type : PluralType.values()) {
421             Relation<PluralInfo, String> pluralsToLocale = Relation.of(
422                 new HashMap<PluralInfo, Set<String>>(), TreeSet.class);
423             for (String locale : new TreeSet<>(
424                 SUPPLEMENTAL.getPluralLocales(type))) {
425                 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(type, locale);
426                 pluralsToLocale.put(pluralInfo, locale);
427             }
428 
429             String[][] equivalents = { { "mo", "ro" }, { "tl", "fil" },
430                 { "he", "iw" }, { "in", "id" }, { "jw", "jv" },
431                 { "ji", "yi" }, { "sh", "sr" }, };
432             for (Entry<PluralInfo, Set<String>> pluralInfoEntry : pluralsToLocale
433                 .keyValuesSet()) {
434                 PluralInfo pluralInfo2 = pluralInfoEntry.getKey();
435                 Set<String> locales = pluralInfoEntry.getValue();
436                 // check that equivalent locales are either both in or both out
437                 for (String[] row : equivalents) {
438                     assertEquals(
439                         type + " must be equivalent: " + Arrays.asList(row),
440                         locales.contains(row[0]), locales.contains(row[1]));
441                 }
442                 // check that no rules contain 'within'
443                 for (Count count : pluralInfo2.getCounts()) {
444                     String rule = pluralInfo2.getRule(count);
445                     if (rule == null) {
446                         continue;
447                     }
448                     assertFalse(
449                         "Rule '" + rule + "' for " + Arrays.asList(locales)
450                         + " doesn't contain 'within'",
451                         rule.contains("within"));
452                 }
453             }
454         }
455     }
456 
TestDigitPluralCases()457     public void TestDigitPluralCases() {
458         String[][] tests = {
459             { "en", "one", "1", "1" },
460             { "en", "one", "2", "" },
461             { "en", "one", "3", "" },
462             { "en", "one", "4", "" },
463             { "en", "other", "1", "0, 2-9, 0.0, 0.1, 0.2, …" },
464             { "en", "other", "2", "10-99, 10.0, 10.1, 10.2, …" },
465             { "en", "other", "3", "100-999, 100.0, 100.1, 100.2, …" },
466             { "en", "other", "4", "1000-9999, 1000.0, 1000.1, 1000.2, …" },
467             { "hr", "one", "1", "1, 0.1, 2.10, 1.1, …" },
468             { "hr", "one", "2",
469             "21, 31, 41, 51, 61, 71, …, 10.1, 12.10, 11.1, …" },
470             { "hr", "one", "3",
471             "101, 121, 131, 141, 151, 161, …, 100.1, 102.10, 101.1, …" },
472             { "hr", "one", "4",
473             "1001, 1021, 1031, 1041, 1051, 1061, …, 1000.1, 1002.10, 1001.1, …" },
474             { "hr", "few", "1", "2-4, 0.2, 0.3, 0.4, …" },
475             { "hr", "few", "2",
476             "22-24, 32-34, 42-44, …, 10.2, 10.3, 10.4, …" },
477             { "hr", "few", "3",
478             "102-104, 122-124, 132-134, …, 100.2, 100.3, 100.4, …" },
479             { "hr", "few", "4",
480             "1002-1004, 1022-1024, 1032-1034, …, 1000.2, 1000.3, 1000.4, …" },
481             { "hr", "other", "1", "0, 5-9, 0.0, 0.5, 0.6, …" },
482             { "hr", "other", "2",
483             "10-20, 25-30, 35-40, …, 10.0, 10.5, 10.6, …" },
484             { "hr", "other", "3",
485             "100, 105-120, 125-130, 135-140, …, 100.0, 100.5, 100.6, …" },
486             { "hr", "other", "4",
487             "1000, 1005-1020, 1025-1030, 1035-1040, …, 1000.0, 1000.5, 1000.6, …" }, };
488         for (String[] row : tests) {
489             PluralInfo plurals = SUPPLEMENTAL.getPlurals(row[0]);
490             SampleList uset = plurals.getSamples9999(Count.valueOf(row[1]),
491                 Integer.parseInt(row[2]));
492             assertEquals(row[0] + ", " + row[1] + ", " + row[2], row[3],
493                 uset.toString());
494         }
495     }
496 
TestDigitPluralCompleteness()497     public void TestDigitPluralCompleteness() {
498         String[][] exceptionStrings = {
499             // defaults
500             { "*", "zero", "0,00,000,0000" },
501             { "*", "one", "0" },
502             { "*", "two", "0,00,000,0000" },
503             { "*", "few", "0,00,000,0000" },
504             { "*", "many", "0,00,000,0000" },
505             { "*", "other", "0,00,000,0000" },
506             // others
507             { "mo", "other", "00,000,0000" }, //
508             { "ro", "other", "00,000,0000" }, //
509             { "cs", "few", "0" }, // j in 2..4
510             { "sk", "few", "0" }, // j in 2..4
511             { "da", "one", "0" }, // j is 1 or t is not 0 and n within 0..2
512             { "is", "one", "0,00,000,0000" }, // j is 1 or f is 1
513             { "sv", "one", "0" }, // j is 1
514             { "he", "two", "0" }, // j is 2
515             { "ru", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
516             // is not 11
517             { "uk", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
518             // is not 11
519             { "bs", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
520             // is not 11 or f mod 10 is
521             // 1 and f mod 100 is not 11
522             { "hr", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
523             // is not 11 or f mod 10 is
524             // 1 and f mod 100 is not 11
525             { "sh", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
526             // is not 11 or f mod 10 is
527             // 1 and f mod 100 is not 11
528             { "sr", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
529             // is not 11 or f mod 10 is
530             // 1 and f mod 100 is not 11
531             { "mk", "one", "0,00,000,0000" }, // j mod 10 is 1 or f mod 10
532             // is 1
533             { "sl", "one", "0,000,0000" }, // j mod 100 is 1
534             { "sl", "two", "0,000,0000" }, // j mod 100 is 2
535             { "he", "many", "00,000,0000" }, // j not in 0..10 and j mod 10
536             // is 0
537             { "tzm", "one", "0,00" }, // n in 0..1 or n in 11..99
538             { "gd", "one", "0,00" }, // n in 1,11
539             { "gd", "two", "0,00" }, // n in 2,12
540             { "shi", "few", "0,00" }, // n in 2..10
541             { "gd", "few", "0,00" }, // n in 3..10,13..19
542             { "ga", "few", "0" }, // n in 3..6
543             { "ga", "many", "0,00" }, // n in 7..10
544             { "ar", "zero", "0" }, // n is 0
545             { "cy", "zero", "0" }, // n is 0
546             { "ksh", "zero", "0" }, // n is 0
547             { "lag", "zero", "0" }, // n is 0
548             { "pt", "one", "0" }, // i = 1 and v = 0 or i = 0 and t = 1
549             { "pt_PT", "one", "0" }, // n = 1 and v = 0
550             { "ar", "two", "0" }, // n is 2
551             { "cy", "two", "0" }, // n is 2
552             { "ga", "two", "0" }, // n is 2
553             { "iu", "two", "0" }, // n is 2
554             { "naq", "two", "0" }, // n is 2
555             { "se", "two", "0" }, // n is 2
556             { "sma", "two", "0" }, // n is 2
557             { "smi", "two", "0" }, // n is 2
558             { "smj", "two", "0" }, // n is 2
559             { "smn", "two", "0" }, // n is 2
560             { "sms", "two", "0" }, // n is 2
561             { "cy", "few", "0" }, // n is 3
562             { "cy", "many", "0" }, // n is 6
563             { "br", "many", "" }, // n is not 0 and n mod 1000000 is 0
564             { "gv", "one", "0,00,000,0000" }, // n mod 10 is 1
565             { "be", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100
566             // is not 11
567             { "lv", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100
568             // is not 11 or v is 2 and f
569             // mod 10 is 1 and f mod 100
570             // is not 11 or v is not 2
571             // and f mod 10 is 1
572             { "br", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100
573             // not in 11,71,91
574             { "lt", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100
575             // not in 11..19
576             { "fil", "one", "0,00,000,0000" }, // v = 0 and i = 1,2,3 or v =
577             // 0 and i % 10 != 4,6,9 or
578             // v != 0 and f % 10 !=
579             // 4,6,9
580             { "tl", "one", "0,00,000,0000" }, // v = 0 and i = 1,2,3 or v =
581             // 0 and i % 10 != 4,6,9 or
582             // v != 0 and f % 10 !=
583             // 4,6,9
584             { "dsb", "one", "0,00,000,0000" }, // v = 0 and i % 100 = 1 or f
585             // % 100 = 1
586             {"kw", "many", "00,000,0000"},  // n != 1 and n % 100 = 1,21,41,61,81
587             {"kw", "zero", "0"},    // n = 0
588             {"fr", "many", ""},    // e is special
589             {"it", "many", ""},    // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5
590             {"es", "many", ""},    // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5
591             {"pt", "many", ""},    // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5
592             {"pt_PT", "many", ""}, // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5
593         };
594         // parse out the exceptions
595         Map<PluralInfo, Relation<Count, Integer>> exceptions = new HashMap<>();
596         Relation<Count, Integer> fallback = Relation.of(
597             new EnumMap<Count, Set<Integer>>(Count.class), TreeSet.class);
598         for (String[] row : exceptionStrings) {
599             Relation<Count, Integer> countToDigits;
600             if (row[0].equals("*")) {
601                 countToDigits = fallback;
602             } else {
603                 PluralInfo plurals = SUPPLEMENTAL.getPlurals(row[0]);
604                 countToDigits = exceptions.get(plurals);
605                 if (countToDigits == null) {
606                     exceptions.put(
607                         plurals,
608                         countToDigits = Relation.of(
609                             new EnumMap<Count, Set<Integer>>(
610                                 Count.class),
611                             TreeSet.class));
612                 }
613             }
614             Count c = Count.valueOf(row[1]);
615             for (String digit : row[2].split(",")) {
616                 // "99" is special, just to have the result be non-empty
617                 countToDigits.put(c, digit.length());
618             }
619         }
620         Set<PluralInfo> seen = new HashSet<>();
621         Set<String> sorted = new TreeSet<>(
622             SUPPLEMENTAL.getPluralLocales(PluralType.cardinal));
623         Relation<String, String> ruleToExceptions = Relation.of(
624             new TreeMap<String, Set<String>>(), TreeSet.class);
625 
626         for (String locale : sorted) {
627             PluralInfo plurals = SUPPLEMENTAL.getPlurals(locale);
628             if (seen.contains(plurals)) { // skip identicals
629                 continue;
630             }
631             Relation<Count, Integer> countToDigits = exceptions.get(plurals);
632             if (countToDigits == null) {
633                 countToDigits = fallback;
634             }
635             for (Count c : plurals.getCounts()) {
636                 List<String> compose = new ArrayList<>();
637                 boolean needLine = false;
638                 Set<Integer> digitSet = countToDigits.get(c);
639                 if (digitSet == null) {
640                     digitSet = fallback.get(c);
641                 }
642                 for (int digits = 1; digits < 5; ++digits) {
643                     boolean expected = digitSet.contains(digits);
644                     boolean hasSamples = plurals.hasSamples(c, digits);
645                     if (hasSamples) {
646                         compose.add(Utility.repeat("0", digits));
647                     }
648                     if (!assertEquals(locale + ", " + digits + ", " + c,
649                         expected, hasSamples)) {
650                         needLine = true;
651                     }
652                 }
653                 if (needLine) {
654                     String countRules = plurals.getPluralRules().getRules(
655                         c.toString());
656                     ruleToExceptions.put(countRules == null ? "" : countRules,
657                         "{\"" + locale + "\", \"" + c + "\", \""
658                             + Joiner.on(",").join(compose)
659                             + "\"},");
660                 }
661             }
662         }
663         if (!ruleToExceptions.isEmpty()) {
664             System.out
665             .println("To fix the above, review the following, then replace in TestDigitPluralCompleteness");
666             for (Entry<String, String> entry : ruleToExceptions.entrySet()) {
667                 System.out.println(entry.getValue() + "\t// " + entry.getKey());
668             }
669         }
670     }
671 
TestLikelyCode()672     public void TestLikelyCode() {
673         Map<String, String> likely = SUPPLEMENTAL.getLikelySubtags();
674         String[][] tests = { { "it_AQ", "it_Latn_AQ" },
675             { "it_Arab", "it_Arab_IT" }, { "az_Cyrl", "az_Cyrl_AZ" }, };
676         for (String[] pair : tests) {
677             String newMax = LikelySubtags.maximize(pair[0], likely);
678             assertEquals("Likely", pair[1], newMax);
679         }
680 
681     }
682 
TestLikelySubtagCompleteness()683     public void TestLikelySubtagCompleteness() {
684         Map<String, String> likely = SUPPLEMENTAL.getLikelySubtags();
685 
686         for (String language : SUPPLEMENTAL.getCLDRLanguageCodes()) {
687             if (!likely.containsKey(language)) {
688                 logln("WARNING: No likely subtag for CLDR language code ("
689                     + language + ")");
690             }
691         }
692         for (String script : SUPPLEMENTAL.getCLDRScriptCodes()) {
693             if (!likely.containsKey("und_" + script)
694                 && !script.equals("Latn")
695                 && !script.equals("Zinh")
696                 && !script.equals("Zyyy")
697                 && ScriptMetadata.getInfo(script) != null
698                 && ScriptMetadata.getInfo(script).idUsage != ScriptMetadata.IdUsage.EXCLUSION
699                 && ScriptMetadata.getInfo(script).idUsage != ScriptMetadata.IdUsage.UNKNOWN) {
700                 errln("No likely subtag for CLDR script code (und_" + script
701                     + ")");
702             }
703         }
704 
705     }
706 
TestEquivalentLocales()707     public void TestEquivalentLocales() {
708         Set<Set<String>> seen = new HashSet<>();
709         Set<String> toTest = new TreeSet<>(testInfo.getCldrFactory()
710             .getAvailable());
711         toTest.addAll(SUPPLEMENTAL.getLikelySubtags().keySet());
712         toTest.addAll(SUPPLEMENTAL.getLikelySubtags().values());
713         toTest.addAll(SUPPLEMENTAL.getDefaultContentLocales());
714         LanguageTagParser ltp = new LanguageTagParser();
715         main: for (String locale : toTest) {
716             if (locale.startsWith("und") || locale.equals("root")) {
717                 continue;
718             }
719             Set<String> s = SUPPLEMENTAL.getEquivalentsForLocale(locale);
720             if (seen.contains(s)) {
721                 continue;
722             }
723 
724             List<String> ss = new ArrayList<>(s);
725             String last = ss.get(ss.size() - 1);
726             ltp.set(last);
727             if (!ltp.getVariants().isEmpty() || !ltp.getExtensions().isEmpty()) {
728                 continue; // skip variants for now.
729             }
730             String language = ltp.getLanguage();
731             String script = ltp.getScript();
732             String region = ltp.getRegion();
733             if (!script.isEmpty() && !region.isEmpty()) {
734                 String noScript = ltp.setScript("").toString();
735                 String noRegion = ltp.setScript(script).setRegion("")
736                     .toString();
737                 switch (s.size()) {
738                 case 1: // ok if already maximized and strange script/country,
739                     // eg it_Arab_JA
740                     continue main;
741                 case 2: // ok if adds default country/script, eg {en_Cyrl,
742                     // en_Cyrl_US} or {en_GB, en_Latn_GB}
743                     String first = ss.get(0);
744                     if (first.equals(noScript) || first.equals(noRegion)) {
745                         continue main;
746                     }
747                     break;
748                 case 3: // ok if different script in different country, eg
749                     // {az_IR, az_Arab, az_Arab_IR}
750                     if (noScript.equals(ss.get(0))
751                         && noRegion.equals(ss.get(1))) {
752                         continue main;
753                     }
754                     break;
755                 case 4: // ok if all combinations, eg {en, en_US, en_Latn,
756                     // en_Latn_US}
757                     if (language.equals(ss.get(0))
758                         && noScript.equals(ss.get(1))
759                         && noRegion.equals(ss.get(2))) {
760                         continue main;
761                     }
762                     break;
763                 }
764             }
765             errln("Strange size or composition:\t" + s + " \t"
766                 + showLocaleParts(s));
767             seen.add(s);
768         }
769     }
770 
showLocaleParts(Set<String> s)771     private String showLocaleParts(Set<String> s) {
772         LanguageTagParser ltp = new LanguageTagParser();
773         Set<String> b = new LinkedHashSet<>();
774         for (String ss : s) {
775             ltp.set(ss);
776             addName(CLDRFile.LANGUAGE_NAME, ltp.getLanguage(), b);
777             addName(CLDRFile.SCRIPT_NAME, ltp.getScript(), b);
778             addName(CLDRFile.TERRITORY_NAME, ltp.getRegion(), b);
779         }
780         return Joiner.on("; ").join(b);
781     }
782 
addName(int languageName, String code, Set<String> b)783     private void addName(int languageName, String code, Set<String> b) {
784         if (code.isEmpty()) {
785             return;
786         }
787         String name = testInfo.getEnglish().getName(languageName, code);
788         if (!code.equals(name)) {
789             b.add(code + "=" + name);
790         }
791     }
792 
TestDefaultScriptCompleteness()793     public void TestDefaultScriptCompleteness() {
794         Relation<String, String> scriptToBase = Relation.of(
795             new LinkedHashMap<String, Set<String>>(), TreeSet.class);
796         main: for (String locale : testInfo.getCldrFactory()
797             .getAvailableLanguages()) {
798             if (!locale.contains("_") && !"root".equals(locale)) {
799                 String defaultScript = SUPPLEMENTAL.getDefaultScript(locale);
800                 if (defaultScript != null) {
801                     continue;
802                 }
803                 CLDRFile cldrFile = testInfo.getCLDRFile(locale,
804                     false);
805                 UnicodeSet set = cldrFile.getExemplarSet("",
806                     WinningChoice.NORMAL);
807                 for (String s : set) {
808                     int script = UScript.getScript(s.codePointAt(0));
809                     if (script != UScript.UNKNOWN && script != UScript.COMMON
810                         && script != UScript.INHERITED) {
811                         scriptToBase.put(UScript.getShortName(script), locale);
812                         continue main;
813                     }
814                 }
815                 scriptToBase.put(UScript.getShortName(UScript.UNKNOWN), locale);
816             }
817         }
818         if (scriptToBase.size() != 0) {
819             for (Entry<String, Set<String>> entry : scriptToBase.keyValuesSet()) {
820                 errln("Default Scripts missing:\t" + entry.getKey() + "\t"
821                     + entry.getValue());
822             }
823         }
824     }
825 
TestTimeData()826     public void TestTimeData() {
827         Map<String, PreferredAndAllowedHour> timeData = SUPPLEMENTAL
828             .getTimeData();
829         Set<String> regionsSoFar = new HashSet<>();
830         Set<String> current24only = new HashSet<>();
831         Set<String> current12preferred = new HashSet<>();
832 
833         boolean haveWorld = false;
834 
835         ImmutableSet<HourStyle> oldSchool = ImmutableSet.copyOf(EnumSet.of(HourStyle.H, HourStyle.h, HourStyle.K, HourStyle.k));
836 
837         for (Entry<String, PreferredAndAllowedHour> e : timeData.entrySet()) {
838             String region = e.getKey();
839             if (region.equals("001")) {
840                 haveWorld = true;
841             }
842             regionsSoFar.add(region);
843             PreferredAndAllowedHour preferredAndAllowedHour = e.getValue();
844             assertNotNull("Preferred must not be null", preferredAndAllowedHour.preferred);
845 
846             // find first h or H
847             HourStyle found = null;
848 
849             for (HourStyle item : preferredAndAllowedHour.allowed) {
850                 if (oldSchool.contains(item)) {
851                     found = item;
852                     if (item != preferredAndAllowedHour.preferred) {
853                         String message = "Inconsistent values for " + region + ": preferred=" + preferredAndAllowedHour.preferred
854                             + " but that isn't the first " + oldSchool + " in allowed: " + preferredAndAllowedHour.allowed;
855                         //if (!logKnownIssue("cldrbug:11448", message)) {
856                         errln(message);
857                         //}
858                     }
859                     break;
860                 }
861             }
862             if (found == null) {
863                 errln(region + ": preferred " + preferredAndAllowedHour.preferred
864                     + " not in " + preferredAndAllowedHour.allowed);
865             }
866 //            final HourStyle firstAllowed = preferredAndAllowedHour.allowed.iterator().next();
867 //            if (preferredAndAllowedHour.preferred == HourStyle.H && firstAllowed == HourStyle.h
868 //                || preferredAndAllowedHour.preferred == HourStyle.H && firstAllowed == HourStyle.hb
869 //                || preferredAndAllowedHour.preferred == HourStyle.h && firstAllowed == HourStyle.H) {
870 //                errln(region + ": allowed " + preferredAndAllowedHour.allowed
871 //                    + " starts with preferred " + preferredAndAllowedHour.preferred);
872 //            } else if (isVerbose()) {
873 //                logln(region + ": allowed " + preferredAndAllowedHour.allowed
874 //                    + " starts with preferred " + preferredAndAllowedHour.preferred);
875 //            }
876             // for (HourStyle c : preferredAndAllowedHour.allowed) {
877             // if (!PreferredAndAllowedHour.HOURS.contains(c)) {
878             // errln(region + ": illegal character in " +
879             // preferredAndAllowedHour.allowed + ". It contains " + c
880             // + " which is not in " + PreferredAndAllowedHour.HOURS);
881             // }
882             // }
883             if (!preferredAndAllowedHour.allowed.contains(HourStyle.h)
884                 && !preferredAndAllowedHour.allowed.contains(HourStyle.hb)) {
885                 current24only.add(region);
886             }
887             if (preferredAndAllowedHour.preferred == HourStyle.h) {
888                 current12preferred.add(region);
889             }
890         }
891         Set<String> missing = new TreeSet<>(
892             STANDARD_CODES.getGoodAvailableCodes(CodeType.territory));
893         missing.removeAll(regionsSoFar);
894         for (Iterator<String> it = missing.iterator(); it.hasNext();) {
895             if (!StandardCodes.isCountry(it.next())) {
896                 it.remove();
897             }
898         }
899 
900         // if we don't have 001, then we can't miss any regions
901         if (!missing.isEmpty()) {
902             if (haveWorld) {
903                 logln("Implicit regions: " + missing);
904             } else {
905                 errln("Missing regions: " + missing);
906             }
907         }
908 
909         // The feedback gathered from our translators is that the following use
910         // 24 hour time ONLY:
911         Set<String> only24lang = new TreeSet<>(
912             Arrays.asList(("sq, br, bu, ca, hr, cs, da, de, nl, et, eu, fi, "
913                 + "fr, gl, he, is, id, it, no, pt, ro, ru, sr, sk, sl, sv, tr, hy")
914                 .split(",\\s*")));
915         // With the new preferences, this is changed
916         Set<String> only24region = new TreeSet<>();
917         Set<String> either24or12region = new TreeSet<>();
918 
919         // get all countries where official or de-facto official
920         // add them two one of two lists, based on the above list of languages
921         for (String language : SUPPLEMENTAL
922             .getLanguagesForTerritoriesPopulationData()) {
923             boolean a24lang = only24lang.contains(language);
924             for (String region : SUPPLEMENTAL
925                 .getTerritoriesForPopulationData(language)) {
926                 PopulationData pop = SUPPLEMENTAL
927                     .getLanguageAndTerritoryPopulationData(language, region);
928                 if (pop.getOfficialStatus().compareTo(
929                     OfficialStatus.de_facto_official) < 0) {
930                     continue;
931                 }
932                 if (a24lang) {
933                     only24region.add(region);
934                 } else {
935                     either24or12region.add(region);
936                 }
937             }
938         }
939         // if we have a case like CA, where en uses 12/24 but fr uses 24, remove
940         // it for safety
941         only24region.removeAll(either24or12region);
942         // There are always exceptions... Remove SM (San Marino) and VA (Vatican),
943         // since they allows 12/24 but the de facto langauge is Italian.
944         only24region.remove("SM");
945         only24region.remove("VA");
946         // also remove all the regions where 'h' is preferred
947         only24region.removeAll(current12preferred);
948         // now verify
949         if (!current24only.containsAll(only24region)) {
950             Set<String> missing24only = new TreeSet<>(only24region);
951             missing24only.removeAll(current24only);
952 
953             errln("24-hour-only doesn't include needed items:\n"
954                 + " add "
955                 + CldrUtility.join(missing24only, " ")
956                 + "\n\t\t"
957                 + CldrUtility.join(missing24only, "\n\t\t",
958                     new NameCodeTransform(testInfo.getEnglish(),
959                         CLDRFile.TERRITORY_NAME)));
960         }
961     }
962 
963     public static class NameCodeTransform implements StringTransform {
964         private final CLDRFile file;
965         private final int codeType;
966 
NameCodeTransform(CLDRFile file, int code)967         public NameCodeTransform(CLDRFile file, int code) {
968             this.file = file;
969             this.codeType = code;
970         }
971 
972         @Override
transform(String code)973         public String transform(String code) {
974             return file.getName(codeType, code) + " [" + code + "]";
975         }
976     }
977 
TestAliases()978     public void TestAliases() {
979         StandardCodes.make();
980         Map<String, Map<String, Map<String, String>>> bcp47Data = StandardCodes
981             .getLStreg();
982         Map<String, Map<String, R2<List<String>, String>>> aliases = SUPPLEMENTAL
983             .getLocaleAliasInfo();
984 
985         for (Entry<String, Map<String, R2<List<String>, String>>> typeMap : aliases
986             .entrySet()) {
987             String type = typeMap.getKey();
988             Map<String, R2<List<String>, String>> codeReplacement = typeMap
989                 .getValue();
990 
991             Map<String, Map<String, String>> bcp47DataTypeData = bcp47Data
992                 .get(type.equals("territory") ? "region" : type);
993             if (bcp47DataTypeData == null) {
994                 logln("skipping BCP47 test for " + type);
995             } else {
996                 for (Entry<String, Map<String, String>> codeData : bcp47DataTypeData
997                     .entrySet()) {
998                     String code = codeData.getKey();
999                     if (codeReplacement.containsKey(code)
1000                         || codeReplacement.containsKey(code
1001                             .toUpperCase(Locale.ENGLISH))) {
1002                         continue;
1003                         // TODO, check the value
1004                     }
1005                     Map<String, String> data = codeData.getValue();
1006                     if (data.containsKey("Deprecated")
1007                         && SUPPLEMENTAL.getCLDRLanguageCodes().contains(
1008                             code)) {
1009                         errln("supplementalMetadata.xml: alias is missing <languageAlias type=\""
1010                             + code + "\" ... /> " + "\t" + data);
1011                     }
1012                 }
1013             }
1014 
1015             Set<R3<String, List<String>, List<String>>> failures = new LinkedHashSet<>();
1016             Set<String> nullReplacements = new TreeSet<>();
1017             for (Entry<String, R2<List<String>, String>> codeRep : codeReplacement
1018                 .entrySet()) {
1019                 String code = codeRep.getKey();
1020                 List<String> replacements = codeRep.getValue().get0();
1021                 if (replacements == null) {
1022                     nullReplacements.add(code);
1023                     continue;
1024                 }
1025                 Set<String> fixedReplacements = new LinkedHashSet<>();
1026                 for (String replacement : replacements) {
1027                     R2<List<String>, String> newReplacement = codeReplacement
1028                         .get(replacement);
1029                     if (newReplacement != null) {
1030                         List<String> list = newReplacement.get0();
1031                         if (list != null) {
1032                             fixedReplacements.addAll(list);
1033                         }
1034                     } else {
1035                         fixedReplacements.add(replacement);
1036                     }
1037                 }
1038                 List<String> fixedList = new ArrayList<>(
1039                     fixedReplacements);
1040                 if (!replacements.equals(fixedList)) {
1041                     R3<String, List<String>, List<String>> row = Row.of(code,
1042                         replacements, fixedList);
1043                     System.out.println(row.toString());
1044                     failures.add(row);
1045                 }
1046             }
1047 
1048             if (failures.size() != 0) {
1049                 for (R3<String, List<String>, List<String>> item : failures) {
1050                     String code = item.get0();
1051                     List<String> oldReplacement = item.get1();
1052                     List<String> newReplacement = item.get2();
1053 
1054                     errln(code + "\t=>\t" + oldReplacement + "\tshould be:\n\t"
1055                         + "<" + type + "Alias type=\"" + code
1056                         + "\" replacement=\""
1057                         + Joiner.on(" ").join(newReplacement)
1058                         + "\" reason=\"XXX\"/> <!-- YYY -->\n");
1059                 }
1060             }
1061             if (nullReplacements.size() != 0) {
1062                 logln("No Replacements\t" + type + "\t" + nullReplacements);
1063             }
1064         }
1065     }
1066 
1067     static final List<String> oldRegions = Arrays
1068         .asList("NT, YD, QU, SU, DD, FX, ZR, AN, BU, TP, CS, YU"
1069             .split(", "));
1070 
TestTerritoryContainment()1071     public void TestTerritoryContainment() {
1072         Relation<String, String> map = SUPPLEMENTAL
1073             .getTerritoryToContained(ContainmentStyle.all);
1074         Relation<String, String> mapCore = SUPPLEMENTAL.getContainmentCore();
1075         Set<String> mapItems = new LinkedHashSet<>();
1076         // get all the items
1077         for (String item : map.keySet()) {
1078             mapItems.add(item);
1079             mapItems.addAll(map.getAll(item));
1080         }
1081         Map<String, Map<String, String>> bcp47RegionData = StandardCodes
1082             .getLStreg().get("region");
1083 
1084         // verify that all regions are covered
1085         Set<String> bcp47Regions = new LinkedHashSet<>(
1086             bcp47RegionData.keySet());
1087         bcp47Regions.remove("ZZ"); // We don't care about ZZ since it is the
1088         // unknown region...
1089         for (Iterator<String> it = bcp47Regions.iterator(); it.hasNext();) {
1090             String region = it.next();
1091             Map<String, String> data = bcp47RegionData.get(region);
1092             if (data.containsKey("Deprecated")) {
1093                 logln("Removing deprecated " + region);
1094                 it.remove();
1095             }
1096             if ("Private use".equals(data.get("Description"))) {
1097                 it.remove();
1098             }
1099         }
1100 
1101         if (!mapItems.equals(bcp47Regions)) {
1102             mapItems.removeAll(oldRegions);
1103             errlnDiff("containment items not in bcp47 regions: ", mapItems,
1104                 bcp47Regions);
1105             errlnDiff("bcp47 regions not in containment items: ", bcp47Regions,
1106                 mapItems);
1107         }
1108 
1109         // verify that everything in the containment core can be reached
1110         // downwards from 001.
1111 
1112         Map<String, Integer> from001 = getRecursiveContainment("001", map,
1113             new LinkedHashMap<String, Integer>(), 1);
1114         from001.put("001", 0);
1115         Set<String> keySet = from001.keySet();
1116         for (String region : keySet) {
1117             logln(Utility.repeat("\t", from001.get(region)) + "\t" + region
1118                 + "\t" + getRegionName(region));
1119         }
1120 
1121         // Populate mapItems with the core containment
1122         mapItems.clear();
1123         for (String item : mapCore.keySet()) {
1124             mapItems.add(item);
1125             mapItems.addAll(mapCore.getAll(item));
1126         }
1127 
1128         if (!mapItems.equals(keySet)) {
1129             errlnDiff(
1130                 "containment core items that can't be reached from 001: ",
1131                 mapItems, keySet);
1132         }
1133     }
1134 
errlnDiff(String title, Set<String> mapItems, Set<String> keySet)1135     private void errlnDiff(String title, Set<String> mapItems,
1136         Set<String> keySet) {
1137         Set<String> diff = new LinkedHashSet<>(mapItems);
1138         diff.removeAll(keySet);
1139         if (diff.size() != 0) {
1140             errln(title + diff);
1141         }
1142     }
1143 
getRegionName(String region)1144     private String getRegionName(String region) {
1145         return testInfo.getEnglish().getName(CLDRFile.TERRITORY_NAME, region);
1146     }
1147 
getRecursiveContainment(String region, Relation<String, String> map, Map<String, Integer> result, int depth)1148     private Map<String, Integer> getRecursiveContainment(String region,
1149         Relation<String, String> map, Map<String, Integer> result, int depth) {
1150         Set<String> contained = map.getAll(region);
1151         if (contained == null) {
1152             return result;
1153         }
1154         for (String item : contained) {
1155             if (result.containsKey(item)) {
1156                 logln("Duplicate containment " + item + "\t"
1157                     + getRegionName(item));
1158                 continue;
1159             }
1160             result.put(item, depth);
1161             getRecursiveContainment(item, map, result, depth + 1);
1162         }
1163         return result;
1164     }
1165 
TestMacrolanguages()1166     public void TestMacrolanguages() {
1167         Set<String> languageCodes = STANDARD_CODES
1168             .getAvailableCodes("language");
1169         Map<String, Map<String, R2<List<String>, String>>> typeToTagToReplacement = SUPPLEMENTAL
1170             .getLocaleAliasInfo();
1171         Map<String, R2<List<String>, String>> tagToReplacement = typeToTagToReplacement
1172             .get("language");
1173 
1174         Relation<String, String> replacementToReplaced = Relation.of(
1175             new TreeMap<String, Set<String>>(), TreeSet.class);
1176         for (String language : tagToReplacement.keySet()) {
1177             List<String> replacements = tagToReplacement.get(language).get0();
1178             if (replacements != null) {
1179                 replacementToReplaced.putAll(replacements, language);
1180             }
1181         }
1182         replacementToReplaced.freeze();
1183 
1184         Map<String, Map<String, Map<String, String>>> lstreg = StandardCodes
1185             .getLStreg();
1186         Map<String, Map<String, String>> lstregLanguageInfo = lstreg
1187             .get("language");
1188 
1189         Relation<Scope, String> scopeToCodes = Relation.of(
1190             new TreeMap<Scope, Set<String>>(), TreeSet.class);
1191         // the invariant is that every macrolanguage has exactly 1 encompassed
1192         // language that maps to it
1193 
1194         main: for (String language : Builder.with(new TreeSet<String>())
1195             .addAll(languageCodes).addAll(Iso639Data.getAvailable()).get()) {
1196             if (language.equals("no") || language.equals("sh"))
1197                 continue; // special cases
1198             Scope languageScope = getScope(language, lstregLanguageInfo);
1199             if (languageScope == Scope.Macrolanguage) {
1200                 if (Iso639Data.getHeirarchy(language) != null) {
1201                     continue main; // is real family
1202                 }
1203                 Set<String> replacements = replacementToReplaced
1204                     .getAll(language);
1205                 if (replacements == null || replacements.size() == 0) {
1206                     scopeToCodes.put(languageScope, language);
1207                 } else {
1208                     // it still might be bad, if we don't have a mapping to a
1209                     // regular language
1210                     for (String replacement : replacements) {
1211                         Scope replacementScope = getScope(replacement,
1212                             lstregLanguageInfo);
1213                         if (replacementScope == Scope.Individual) {
1214                             continue main;
1215                         }
1216                     }
1217                     scopeToCodes.put(languageScope, language);
1218                 }
1219             }
1220         }
1221         // now show the items we found
1222         for (Scope scope : scopeToCodes.keySet()) {
1223             for (String language : scopeToCodes.getAll(scope)) {
1224                 String name = testInfo.getEnglish().getName(language);
1225                 if (name == null || name.equals(language)) {
1226                     Set<String> set = Iso639Data.getNames(language);
1227                     if (set != null) {
1228                         name = set.iterator().next();
1229                     } else {
1230                         Map<String, String> languageInfo = lstregLanguageInfo
1231                             .get(language);
1232                         if (languageInfo != null) {
1233                             name = languageInfo.get("Description");
1234                         }
1235                     }
1236                 }
1237                 errln(scope + "\t" + language + "\t" + name + "\t"
1238                     + Iso639Data.getType(language));
1239             }
1240         }
1241     }
1242 
getScope(String language, Map<String, Map<String, String>> lstregLanguageInfo)1243     private Scope getScope(String language,
1244         Map<String, Map<String, String>> lstregLanguageInfo) {
1245         Scope languageScope = Iso639Data.getScope(language);
1246         Map<String, String> languageInfo = lstregLanguageInfo.get(language);
1247         if (languageInfo == null) {
1248             // System.out.println("Couldn't get lstreg info for " + language);
1249         } else {
1250             String lstregScope = languageInfo.get("Scope");
1251             if (lstregScope != null) {
1252                 Scope scope2 = Scope.fromString(lstregScope);
1253                 if (languageScope != scope2) {
1254                     // System.out.println("Mismatch in scope between LSTR and ISO 639:\t"
1255                     // + scope2 + "\t" +
1256                     // languageScope);
1257                     languageScope = scope2;
1258                 }
1259             }
1260         }
1261         return languageScope;
1262     }
1263 
1264     static final boolean LOCALES_FIXED = true;
1265 
TestPopulation()1266     public void TestPopulation() {
1267         Set<String> languages = SUPPLEMENTAL
1268             .getLanguagesForTerritoriesPopulationData();
1269         Relation<String, String> baseToLanguages = Relation.of(
1270             new TreeMap<String, Set<String>>(), TreeSet.class);
1271         LanguageTagParser ltp = new LanguageTagParser();
1272         LanguageTagCanonicalizer ltc = new LanguageTagCanonicalizer(false);
1273 
1274         for (String language : languages) {
1275             if (LOCALES_FIXED) {
1276                 String canonicalForm = ltc.transform(language);
1277                 if (!assertEquals("Canonical form", canonicalForm, language)) {
1278                     int debug = 0;
1279                 }
1280             }
1281 
1282             String base = ltp.set(language).getLanguage();
1283             String script = ltp.getScript();
1284             baseToLanguages.put(base, language);
1285 
1286             // add basic data, basically just for wo!
1287             // if there are primary scripts, they must include script (if not
1288             // empty)
1289             Set<String> primaryScripts = Collections.emptySet();
1290             Set<String> secondaryScripts = Collections.emptySet();
1291             Map<Type, BasicLanguageData> basicData = SUPPLEMENTAL
1292                 .getBasicLanguageDataMap(base);
1293             if (basicData != null) {
1294                 BasicLanguageData s = basicData
1295                     .get(BasicLanguageData.Type.primary);
1296                 if (s != null) {
1297                     primaryScripts = s.getScripts();
1298                 }
1299                 s = basicData.get(BasicLanguageData.Type.secondary);
1300                 if (s != null) {
1301                     secondaryScripts = s.getScripts();
1302                 }
1303             }
1304 
1305             // do some consistency tests; if there is a script, it must be in
1306             // primaryScripts or secondaryScripts
1307             if (!script.isEmpty() && !primaryScripts.contains(script) && !secondaryScripts.contains(script)) {
1308                 errln(base + ": Script found in territory data (" + script
1309                     + ") is not in primary scripts :\t" + primaryScripts
1310                     + " and not in secondary scripts :\t" + secondaryScripts);
1311             }
1312 
1313             // if there are multiple primary scripts, they will be in
1314             // baseToLanguages
1315             if (primaryScripts.size() > 1) {
1316                 for (String script2 : primaryScripts) {
1317                     baseToLanguages.put(base, base + "_" + script2);
1318                 }
1319             }
1320         }
1321 
1322         if (!LOCALES_FIXED) {
1323             // the invariants are that if we have a base, we must not have a script.
1324             // and if we don't have a base, we must have two items
1325             for (String base : baseToLanguages.keySet()) {
1326                 Set<String> languagesForBase = baseToLanguages.getAll(base);
1327                 if (languagesForBase.contains(base)) {
1328                     if (languagesForBase.size() > 1) {
1329                         errln("Cannot have base alone with other scripts:\t"
1330                             + languagesForBase);
1331                     }
1332                 } else {
1333                     if (languagesForBase.size() == 1) {
1334                         errln("Cannot have only one script for language:\t"
1335                             + languagesForBase);
1336                     }
1337                 }
1338             }
1339         }
1340     }
1341 
TestCompleteness()1342     public void TestCompleteness() {
1343         if (SUPPLEMENTAL.getSkippedElements().size() > 0) {
1344             logln("SupplementalDataInfo API doesn't support: "
1345                 + SUPPLEMENTAL.getSkippedElements().toString());
1346         }
1347     }
1348 
1349     // these are settings for exceptional cases we want to allow
1350     private static final Set<String> EXCEPTION_CURRENCIES_WITH_NEW = new TreeSet<>(
1351         Arrays.asList("ILS", "NZD", "PGK", "TWD"));
1352 
1353     // ok since there is no problem with confusion
1354     private static final Set<String> OK_TO_NOT_HAVE_OLD = new TreeSet<>(
1355         Arrays.asList("ADP", "ATS", "BEF", "CYP", "DEM", "ESP", "FIM",
1356             "FRF", "GRD", "IEP", "ITL", "LUF", "MTL", "MTP", "NLG",
1357             "PTE", "YUM", "ARA", "BAD", "BGL", "BOP", "BRC", "BRN",
1358             "BRR", "BUK", "CSK", "ECS", "GEK", "GNS", "GQE", "HRD",
1359             "ILP", "LTT", "LVR", "MGF", "MLF", "MZE", "NIC", "PEI",
1360             "PES", "SIT", "SRG", "SUR", "TJR", "TPE", "UAK", "YUD",
1361             "YUN", "ZRZ", "GWE"));
1362 
1363     private static final Date LIMIT_FOR_NEW_CURRENCY = new Date(
1364         new Date().getYear() - 5, 1, 1);
1365     private static final Date NOW = new Date();
1366 
1367     private Matcher oldMatcher = Pattern.compile(
1368         "\\bold\\b|\\([0-9]{4}-[0-9]{4}\\)", Pattern.CASE_INSENSITIVE)
1369         .matcher("");
1370     private Matcher newMatcher = Pattern.compile("\\bnew\\b",
1371         Pattern.CASE_INSENSITIVE).matcher("");
1372 
1373     /**
1374      * Test that access to currency info in supplemental data is ok. At this
1375      * point just a simple test.
1376      *
1377      * @param args
1378      */
TestCurrency()1379     public void TestCurrency() {
1380         IsoCurrencyParser isoCodes = IsoCurrencyParser.getInstance();
1381         Set<String> currencyCodes = STANDARD_CODES
1382             .getGoodAvailableCodes("currency");
1383         Relation<String, Pair<String, CurrencyDateInfo>> nonModernCurrencyCodes = Relation
1384             .of(new TreeMap<String, Set<Pair<String, CurrencyDateInfo>>>(),
1385                 TreeSet.class);
1386         Relation<String, Pair<String, CurrencyDateInfo>> modernCurrencyCodes = Relation
1387             .of(new TreeMap<String, Set<Pair<String, CurrencyDateInfo>>>(),
1388                 TreeSet.class);
1389         Set<String> territoriesWithoutModernCurrencies = new TreeSet<>(
1390             STANDARD_CODES.getGoodAvailableCodes("territory"));
1391         Map<String, Date> currencyFirstValid = new TreeMap<>();
1392         Map<String, Date> currencyLastValid = new TreeMap<>();
1393         territoriesWithoutModernCurrencies.remove("ZZ");
1394 
1395         for (String territory : STANDARD_CODES
1396             .getGoodAvailableCodes("territory")) {
1397             /* "EU" behaves like a country for purposes of this test */
1398             if ((SUPPLEMENTAL.getContained(territory) != null)
1399                 && !territory.equals("EU")) {
1400                 territoriesWithoutModernCurrencies.remove(territory);
1401                 continue;
1402             }
1403             Set<CurrencyDateInfo> currencyInfo = SUPPLEMENTAL
1404                 .getCurrencyDateInfo(territory);
1405             if (currencyInfo == null) {
1406                 continue; // error, but will pick up below.
1407             }
1408             for (CurrencyDateInfo dateInfo : currencyInfo) {
1409                 final String currency = dateInfo.getCurrency();
1410                 final Date start = dateInfo.getStart();
1411                 final Date end = dateInfo.getEnd();
1412                 if (dateInfo.getErrors().length() != 0) {
1413                     logln("parsing " + territory + "\t" + dateInfo.toString()
1414                     + "\t" + dateInfo.getErrors());
1415                 }
1416                 Date firstValue = currencyFirstValid.get(currency);
1417                 if (firstValue == null || firstValue.compareTo(start) < 0) {
1418                     currencyFirstValid.put(currency, start);
1419                 }
1420                 Date lastValue = currencyLastValid.get(currency);
1421                 if (lastValue == null || lastValue.compareTo(end) > 0) {
1422                     currencyLastValid.put(currency, end);
1423                 }
1424                 if (start.compareTo(NOW) < 0 && end.compareTo(NOW) >= 0) { // Non-tender
1425                     // is
1426                     // OK...
1427                     modernCurrencyCodes.put(currency,
1428                         new Pair<>(territory,
1429                             dateInfo));
1430                     territoriesWithoutModernCurrencies.remove(territory);
1431                 } else {
1432                     nonModernCurrencyCodes.put(currency,
1433                         new Pair<>(territory,
1434                             dateInfo));
1435                 }
1436                 logln(territory
1437                     + "\t"
1438                     + dateInfo.toString()
1439                     + "\t"
1440                     + testInfo.getEnglish().getName(CLDRFile.CURRENCY_NAME,
1441                         currency));
1442             }
1443         }
1444         // fix up
1445         nonModernCurrencyCodes.removeAll(modernCurrencyCodes.keySet());
1446         Relation<String, String> isoCurrenciesToCountries = Relation.of(
1447             new TreeMap<String, Set<String>>(), TreeSet.class)
1448             .addAllInverted(isoCodes.getCountryToCodes());
1449         // now print error messages
1450         logln("Modern Codes: " + modernCurrencyCodes.size() + "\t"
1451             + modernCurrencyCodes);
1452         Set<String> missing = new TreeSet<>(
1453             isoCurrenciesToCountries.keySet());
1454         missing.removeAll(modernCurrencyCodes.keySet());
1455         if (missing.size() != 0) {
1456             errln("Missing codes compared to ISO: " + missing.toString());
1457         }
1458 
1459         for (String currency : modernCurrencyCodes.keySet()) {
1460             Set<Pair<String, CurrencyDateInfo>> data = modernCurrencyCodes
1461                 .getAll(currency);
1462             final String name = testInfo.getEnglish().getName(
1463                 CLDRFile.CURRENCY_NAME, currency);
1464 
1465             Set<String> isoCountries = isoCurrenciesToCountries
1466                 .getAll(currency);
1467             if (isoCountries == null) {
1468                 isoCountries = new TreeSet<>();
1469             }
1470 
1471             TreeSet<String> cldrCountries = new TreeSet<>();
1472             for (Pair<String, CurrencyDateInfo> x : data) {
1473                 cldrCountries.add(x.getFirst());
1474             }
1475             if (!isoCountries.equals(cldrCountries)) {
1476                 if (!logKnownIssue("cldrbug:10765", "Missing codes compared to ISO: " + missing.toString())) {
1477 
1478                     errln("Mismatch between ISO and Cldr modern currencies for "
1479                         + currency + "\tISO:" + isoCountries + "\tCLDR:"
1480                         + cldrCountries);
1481                     showCountries("iso-cldr", isoCountries, cldrCountries, missing);
1482                     showCountries("cldr-iso", cldrCountries, isoCountries, missing);
1483                 }
1484             }
1485 
1486             if (oldMatcher.reset(name).find()) {
1487                 errln("Has 'old' in name but still used " + "\t" + currency
1488                     + "\t" + name + "\t" + data);
1489             }
1490             if (newMatcher.reset(name).find()
1491                 && !EXCEPTION_CURRENCIES_WITH_NEW.contains(currency)) {
1492                 // find the first use. If older than 5 years, flag as error
1493                 if (currencyFirstValid.get(currency).compareTo(
1494                     LIMIT_FOR_NEW_CURRENCY) < 0) {
1495                     errln("Has 'new' in name but used since "
1496                         + CurrencyDateInfo.formatDate(currencyFirstValid
1497                             .get(currency))
1498                         + "\t" + currency + "\t"
1499                         + name + "\t" + data);
1500                 } else {
1501                     logln("Has 'new' in name but used since "
1502                         + CurrencyDateInfo.formatDate(currencyFirstValid
1503                             .get(currency))
1504                         + "\t" + currency + "\t"
1505                         + name + "\t" + data);
1506                 }
1507             }
1508         }
1509         logln("Non-Modern Codes (with dates): " + nonModernCurrencyCodes.size()
1510         + "\t" + nonModernCurrencyCodes);
1511         for (String currency : nonModernCurrencyCodes.keySet()) {
1512             final String name = testInfo.getEnglish().getName(
1513                 CLDRFile.CURRENCY_NAME, currency);
1514             if (newMatcher.reset(name).find()
1515                 && !EXCEPTION_CURRENCIES_WITH_NEW.contains(currency)) {
1516                 logln("Has 'new' in name but NOT used since "
1517                     + CurrencyDateInfo.formatDate(currencyLastValid
1518                         .get(currency))
1519                     + "\t" + currency + "\t" + name
1520                     + "\t" + nonModernCurrencyCodes.getAll(currency));
1521             } else if (!oldMatcher.reset(name).find()
1522                 && !OK_TO_NOT_HAVE_OLD.contains(currency)) {
1523                 logln("Doesn't have 'old' or date range in name but NOT used since "
1524                     + CurrencyDateInfo.formatDate(currencyLastValid
1525                         .get(currency))
1526                     + "\t"
1527                     + currency
1528                     + "\t"
1529                     + name
1530                     + "\t" + nonModernCurrencyCodes.getAll(currency));
1531                 for (Pair<String, CurrencyDateInfo> pair : nonModernCurrencyCodes
1532                     .getAll(currency)) {
1533                     final String territory = pair.getFirst();
1534                     Set<CurrencyDateInfo> currencyInfo = SUPPLEMENTAL
1535                         .getCurrencyDateInfo(territory);
1536                     for (CurrencyDateInfo dateInfo : currencyInfo) {
1537                         if (dateInfo.getEnd().compareTo(NOW) < 0) {
1538                             continue;
1539                         }
1540                         logln("\tCurrencies used instead: "
1541                             + territory
1542                             + "\t"
1543                             + dateInfo
1544                             + "\t"
1545                             + testInfo.getEnglish().getName(
1546                                 CLDRFile.CURRENCY_NAME,
1547                                 dateInfo.getCurrency()));
1548 
1549                     }
1550                 }
1551 
1552             }
1553         }
1554         Set<String> remainder = new TreeSet<>();
1555         remainder.addAll(currencyCodes);
1556         remainder.removeAll(nonModernCurrencyCodes.keySet());
1557         // TODO make this an error, except for allowed exceptions.
1558         logln("Currencies without Territories: " + remainder);
1559         if (territoriesWithoutModernCurrencies.size() != 0) {
1560             errln("Modern territory missing currency: "
1561                 + territoriesWithoutModernCurrencies);
1562         }
1563     }
1564 
showCountries(final String title, Set<String> isoCountries, Set<String> cldrCountries, Set<String> missing)1565     private void showCountries(final String title, Set<String> isoCountries,
1566         Set<String> cldrCountries, Set<String> missing) {
1567         missing.clear();
1568         missing.addAll(isoCountries);
1569         missing.removeAll(cldrCountries);
1570         for (String country : missing) {
1571             logln("\t\tExtra in " + title + "\t" + country + " - "
1572                 + getRegionName(country));
1573         }
1574     }
1575 
TestCurrencyDecimalPlaces()1576     public void TestCurrencyDecimalPlaces() {
1577         IsoCurrencyParser isoCodes = IsoCurrencyParser.getInstance();
1578         Relation<String, IsoCurrencyParser.Data> codeList = isoCodes
1579             .getCodeList();
1580         Set<String> currencyCodes = STANDARD_CODES
1581             .getGoodAvailableCodes("currency");
1582         for (String cc : currencyCodes) {
1583             Set<IsoCurrencyParser.Data> d = codeList.get(cc);
1584             if (d != null) {
1585                 for (IsoCurrencyParser.Data x : d) {
1586                     CurrencyNumberInfo cni = SUPPLEMENTAL.getCurrencyNumberInfo(cc);
1587                     if (cni.digits != x.getMinorUnit()) {
1588                         logln("Mismatch between ISO/CLDR for decimal places for currency => " + cc +
1589                             ". ISO = " + x.getMinorUnit() + " CLDR = " + cni.digits);
1590                     }
1591                 }
1592             }
1593         }
1594     }
1595 
1596     /**
1597      * Verify that we have a default script for every CLDR base language
1598      */
TestDefaultScripts()1599     public void TestDefaultScripts() {
1600         SupplementalDataInfo supp = SUPPLEMENTAL;
1601         Map<String, String> likelyData = supp.getLikelySubtags();
1602         Map<String, String> baseToDefaultContentScript = new HashMap<>();
1603         for (CLDRLocale locale : supp.getDefaultContentCLDRLocales()) {
1604             String script = locale.getScript();
1605             if (!script.isEmpty() && locale.getCountry().isEmpty()) {
1606                 baseToDefaultContentScript.put(locale.getLanguage(), script);
1607             }
1608         }
1609         for (String locale : testInfo.getCldrFactory().getAvailableLanguages()) {
1610             if ("root".equals(locale)) {
1611                 continue;
1612             }
1613             CLDRLocale loc = CLDRLocale.getInstance(locale);
1614             String baseLanguage = loc.getLanguage();
1615             String defaultScript = supp.getDefaultScript(baseLanguage);
1616 
1617             String defaultContentScript = baseToDefaultContentScript
1618                 .get(baseLanguage);
1619             if (defaultContentScript != null) {
1620                 assertEquals(loc + " defaultContentScript = default",
1621                     defaultScript, defaultContentScript);
1622             }
1623             String likely = likelyData.get(baseLanguage);
1624             String likelyScript = likely == null ? null : CLDRLocale
1625                 .getInstance(likely).getScript();
1626             Map<Type, BasicLanguageData> scriptInfo = supp
1627                 .getBasicLanguageDataMap(baseLanguage);
1628             if (scriptInfo == null) {
1629                 errln(loc + ": has no BasicLanguageData");
1630             } else {
1631                 BasicLanguageData data = scriptInfo.get(Type.primary);
1632                 if (data == null) {
1633                     data = scriptInfo.get(Type.secondary);
1634                 }
1635                 if (data == null) {
1636                     errln(loc + ": has no scripts in BasicLanguageData");
1637                 } else if (!data.getScripts().contains(defaultScript)) {
1638                     errln(loc + ": " + defaultScript
1639                         + " not in BasicLanguageData " + data.getScripts());
1640                 }
1641             }
1642 
1643             assertEquals(loc + " likely = default", defaultScript, likelyScript);
1644 
1645             assertNotNull(loc + ": needs default script", defaultScript);
1646 
1647             if (!loc.getScript().isEmpty()) {
1648                 if (!loc.getScript().equals(defaultScript)) {
1649                     assertNotEquals(locale
1650                         + ": only include script if not default",
1651                         loc.getScript(), defaultScript);
1652                 }
1653             }
1654 
1655         }
1656     }
1657 
1658     enum CoverageIssue {
1659         log, warn, error
1660     }
1661 
TestPluralCompleteness()1662     public void TestPluralCompleteness() {
1663         // Set<String> cardinalLocales = new
1664         // TreeSet<String>(SUPPLEMENTAL.getPluralLocales(PluralType.cardinal));
1665         // Set<String> ordinalLocales = new
1666         // TreeSet<String>(SUPPLEMENTAL.getPluralLocales(PluralType.ordinal));
1667         // Map<ULocale, PluralRulesFactory.SamplePatterns> sampleCardinals =
1668         // PluralRulesFactory.getLocaleToSamplePatterns();
1669         // Set<ULocale> sampleCardinalLocales = PluralRulesFactory.getLocales();
1670         // // new HashSet(PluralRulesFactory.getSampleCounts(uLocale,
1671         // type).keySet());
1672         // Map<ULocale, PluralRules> overrideCardinals =
1673         // PluralRulesFactory.getPluralOverrides();
1674         // Set<ULocale> overrideCardinalLocales = new
1675         // HashSet<ULocale>(overrideCardinals.keySet());
1676 
1677         Set<String> testLocales = STANDARD_CODES.getLocaleCoverageLocales(
1678             Organization.google, EnumSet.of(Level.MODERN));
1679         Set<String> allLocales = testInfo.getCldrFactory().getAvailable();
1680         LanguageTagParser ltp = new LanguageTagParser();
1681         for (String locale : allLocales) {
1682             // the only known case where plural rules depend on region or script
1683             // is pt_PT
1684             if (locale.equals("root")) {
1685                 continue;
1686             }
1687             ltp.set(locale);
1688             if (!ltp.getRegion().isEmpty() || !ltp.getScript().isEmpty()) {
1689                 continue;
1690             }
1691             CoverageIssue needsCoverage = testLocales.contains(locale)
1692                 ? CoverageIssue.error
1693                     : CoverageIssue.log;
1694             CoverageIssue needsCoverage2 = needsCoverage == CoverageIssue.error ? CoverageIssue.warn : needsCoverage;
1695 
1696             //            if (logKnownIssue("Cldrbug:8809", "Missing plural rules/samples be and ga locales")) {
1697             //                if (locale.equals("be") || locale.equals("ga")) {
1698             //                    needsCoverage = CoverageIssue.warn;
1699             //                }
1700             //            }
1701             PluralRulesFactory prf = PluralRulesFactory
1702                 .getInstance(CLDRConfig.getInstance()
1703                     .getSupplementalDataInfo());
1704 
1705             for (PluralType type : PluralType.values()) {
1706                 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(type, locale,
1707                     false);
1708                 if (pluralInfo == null) {
1709                     errOrLog(needsCoverage, locale + "\t" + type + " \tmissing plural rules", "Cldrbug:7839", "Missing plural data for modern locales");
1710                     continue;
1711                 }
1712                 Set<Count> counts = pluralInfo.getCounts();
1713                 // if (counts.size() == 1) {
1714                 // continue; // skip checking samples
1715                 // }
1716                 HashSet<String> samples = new HashSet<>();
1717                 EnumSet<Count> countsWithNoSamples = EnumSet
1718                     .noneOf(Count.class);
1719                 Relation<String, Count> samplesToCounts = Relation.of(
1720                     new HashMap(), LinkedHashSet.class);
1721                 Set<Count> countsFound = prf.getSampleCounts(locale,
1722                     type.standardType);
1723                 StringBuilder failureCases = new StringBuilder();
1724                 for (Count count : counts) {
1725                     String pattern = PluralRulesFactory.getSamplePattern(locale, type.standardType, count);
1726                     final String rangeLine = getRangeLine(count, pluralInfo.getPluralRules(), pattern);
1727                     failureCases.append('\n').append(locale).append('\t').append(type).append('\t').append(rangeLine);
1728                     if (countsFound == null || !countsFound.contains(count)) {
1729                         countsWithNoSamples.add(count);
1730                     } else {
1731                         samplesToCounts.put(pattern, count);
1732                         logln(locale + "\t" + type + "\t" + count + "\t"
1733                             + pattern);
1734                     }
1735                 }
1736                 if (!countsWithNoSamples.isEmpty()) {
1737                     errOrLog(needsCoverage, locale + "\t" + type + "\t missing samples:\t" + countsWithNoSamples,
1738                         "cldrbug:7075", "Missing ordinal minimal pairs");
1739                     errOrLog(needsCoverage2, failureCases.toString());
1740                 }
1741                 for (Entry<String, Set<Count>> entry : samplesToCounts
1742                     .keyValuesSet()) {
1743                     if (entry.getValue().size() != 1) {
1744                         errOrLog(needsCoverage, locale + "\t" + type + "\t duplicate samples: " + entry.getValue()
1745                         + " => «" + entry.getKey() + "»", "cldrbug:7119", "Some duplicate minimal pairs");
1746                         errOrLog(needsCoverage2, failureCases.toString());
1747                     }
1748                 }
1749             }
1750         }
1751     }
1752 
errOrLog(CoverageIssue causeError, String message, String logTicket, String logComment)1753     public void errOrLog(CoverageIssue causeError, String message, String logTicket, String logComment) {
1754         switch (causeError) {
1755         case error:
1756             if (logTicket == null) {
1757                 errln(message);
1758                 break;
1759             }
1760             logKnownIssue(logTicket, logComment);
1761             // fall through
1762         case warn:
1763             warnln(message);
1764             break;
1765         case log:
1766             logln(message);
1767             break;
1768         }
1769     }
1770 
errOrLog(CoverageIssue causeError, String message)1771     public void errOrLog(CoverageIssue causeError, String message) {
1772         errOrLog(causeError, message, null, null);
1773     }
1774 
TestNumberingSystemDigits()1775     public void TestNumberingSystemDigits() {
1776 
1777         // Don't worry about digits from supplemental planes yet ( ICU can't
1778         // handle them anyways )
1779         // hanidec is the only known non codepoint order numbering system
1780         // TODO: Fix so that it works properly on non-BMP digit strings.
1781         String[] knownExceptions = { "brah", "cakm", "hanidec", "osma", "shrd",
1782             "sora", "takr" };
1783         List<String> knownExceptionList = Arrays.asList(knownExceptions);
1784         for (String ns : SUPPLEMENTAL.getNumericNumberingSystems()) {
1785             if (knownExceptionList.contains(ns)) {
1786                 continue;
1787             }
1788             String digits = SUPPLEMENTAL.getDigits(ns);
1789             int previousChar = 0;
1790             int ch;
1791 
1792             for (int i = 0; i < digits.length(); i += Character.charCount(ch)) {
1793                 ch = digits.codePointAt(i);
1794                 if (i > 0 && ch != previousChar + 1) {
1795                     errln("Digits for numbering system "
1796                         + ns
1797                         + " are not in code point order. Previous char = U+"
1798                         + Utility.hex(previousChar, 4)
1799                         + " Current char = U+" + Utility.hex(ch, 4));
1800                     break;
1801                 }
1802                 previousChar = ch;
1803             }
1804         }
1805     }
1806 
TestNumberingSystemDigitCompleteness()1807     public void TestNumberingSystemDigitCompleteness() {
1808         List<Integer> unicodeDigits = new ArrayList<>();
1809         for (int cp = UCharacter.MIN_CODE_POINT; cp <= UCharacter.MAX_CODE_POINT; cp++) {
1810             if (UCharacter.getType(cp) == UCharacterEnums.ECharacterCategory.DECIMAL_DIGIT_NUMBER) {
1811                 unicodeDigits.add(Integer.valueOf(cp));
1812             }
1813         }
1814 
1815         for (String ns : SUPPLEMENTAL.getNumericNumberingSystems()) {
1816             String digits = SUPPLEMENTAL.getDigits(ns);
1817             int ch;
1818 
1819             for (int i = 0; i < digits.length(); i += Character.charCount(ch)) {
1820                 ch = digits.codePointAt(i);
1821                 unicodeDigits.remove(Integer.valueOf(ch));
1822             }
1823         }
1824 
1825         if (unicodeDigits.size() > 0) {
1826             for (Integer i : unicodeDigits) {
1827                 errln("Unicode digit: " + UCharacter.getName(i) + " is not in any numbering system. Script = "
1828                     + UScript.getShortName(UScript.getScript(i)));
1829             }
1830         }
1831     }
1832 
TestMetazones()1833     public void TestMetazones() {
1834         Date goalMin = new Date(70, 0, 1);
1835         Date goalMax = new Date(300, 0, 2);
1836         ImmutableSet<String> knownTZWithoutMetazone = ImmutableSet.of("America/Montreal", "Asia/Barnaul", "Asia/Tomsk", "Europe/Kirov");
1837         for (String timezoneRaw : TimeZone.getAvailableIDs()) {
1838             String timezone = TimeZone.getCanonicalID(timezoneRaw);
1839             String region = TimeZone.getRegion(timezone);
1840             if (!timezone.equals(timezoneRaw) || "001".equals(region)) {
1841                 continue;
1842             }
1843             if (knownTZWithoutMetazone.contains(timezone)) {
1844                 continue;
1845             }
1846             final Set<MetaZoneRange> ranges = SUPPLEMENTAL
1847                 .getMetaZoneRanges(timezone);
1848 
1849             if (assertNotNull("metazones for " + timezone, ranges)) {
1850                 long min = Long.MAX_VALUE;
1851                 long max = Long.MIN_VALUE;
1852                 for (MetaZoneRange range : ranges) {
1853                     if (range.dateRange.from != DateRange.START_OF_TIME) {
1854                         min = Math.min(min, range.dateRange.from);
1855                     }
1856                     if (range.dateRange.to != DateRange.END_OF_TIME) {
1857                         max = Math.max(max, range.dateRange.to);
1858                     }
1859                 }
1860                 assertRelation(timezone + " has metazone before 1970?", true,
1861                     goalMin, LEQ, new Date(min));
1862                 assertRelation(timezone
1863                     + " has metazone until way in the future?", true,
1864                     goalMax, GEQ, new Date(max));
1865             }
1866         }
1867         com.google.common.collect.Interners i;
1868     }
1869 
Test9924()1870     public void Test9924() {
1871         Boolean b = org.unicode.cldr.unittest.TestSupplementalInfo.LOCALES_FIXED;
1872         PopulationData zhCNData = SUPPLEMENTAL.getLanguageAndTerritoryPopulationData(b ? "zh" : "zh_Hans", "CN");
1873         PopulationData yueCNData = SUPPLEMENTAL.getLanguageAndTerritoryPopulationData("yue_Hans", "CN");
1874         assertTrue("yue*10 < zh", yueCNData.getPopulation() < zhCNData.getPopulation());
1875     }
1876 
Test10765()1877     public void Test10765() { //
1878         Set<String> surveyToolLanguages = SUPPLEMENTAL.getCLDRLanguageCodes(); // codes that show up in Survey Tool
1879         Set<String> mainLanguages = new TreeSet<>();
1880         LanguageTagParser ltp = new LanguageTagParser();
1881         for (String locale : testInfo.getCldrFactory().getAvailableLanguages()) {
1882             mainLanguages.add(ltp.set(locale).getLanguage());
1883         }
1884         // add special codes we want to see anyway
1885         mainLanguages.add("und");
1886         mainLanguages.add("mul");
1887         mainLanguages.add("zxx");
1888 
1889         if (!mainLanguages.containsAll(surveyToolLanguages)) {
1890             CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(SUPPLEMENTAL, "ja"); // pick "neutral" locale
1891             Set<String> temp = new TreeSet<>(surveyToolLanguages);
1892             temp.removeAll(mainLanguages);
1893             Set<String> modern = new TreeSet<>();
1894             Set<String> comprehensive = new TreeSet<>();
1895             for (String lang : temp) {
1896                 Level level = coverageLevel.getLevel(CLDRFile.getKey(CLDRFile.LANGUAGE_NAME, lang));
1897                 if (level.compareTo(Level.MODERN) <= 0) {
1898                     modern.add(lang);
1899                 } else {
1900                     comprehensive.add(lang);
1901                 }
1902             }
1903             warnln("«Modern» Languages in <variable id='$language' type='choice'> that aren't in main/* : " + getNames(modern));
1904             logln("«Comprehensive» Languages in <variable id='$language' type='choice'> that aren't in main/* : " + getNames(comprehensive));
1905         }
1906         if (!surveyToolLanguages.containsAll(mainLanguages)) {
1907             mainLanguages.removeAll(surveyToolLanguages);
1908             // TODO: See https://unicode-org.atlassian.net/browse/CLDR-14974
1909             // Currently there is a requirement that all locales in main/* are in attributeValueValidity.xml
1910             assertEquals("main/* languages missing from <variable id='$language'/> in attributeValueValidity.xml",
1911                 Collections.EMPTY_SET, mainLanguages);
1912         }
1913     }
1914 
getNames(Set<String> temp)1915     private Set<String> getNames(Set<String> temp) {
1916         Set<String> tempNames = new TreeSet<>();
1917         for (String langCode : temp) {
1918             tempNames.add(testInfo.getEnglish().getName(CLDRFile.LANGUAGE_NAME, langCode) + " (" + langCode + ")");
1919         }
1920         return tempNames;
1921     }
1922 
TestGrammarInfo()1923     public void TestGrammarInfo() {
1924         final Logger logger = getLogger();
1925         Multimap<String,String> allValues = TreeMultimap.create();
1926         for (String locale : SUPPLEMENTAL.hasGrammarInfo()) {
1927             if (locale.contentEquals("tr")) {
1928                 int debug = 0;
1929             }
1930             GrammarInfo grammarInfo = SUPPLEMENTAL.getGrammarInfo(locale);
1931             for (GrammaticalTarget target : GrammaticalTarget.values()) {
1932                 for (GrammaticalFeature feature : GrammaticalFeature.values()) {
1933                     Collection<String> general = grammarInfo.get(target, feature, GrammaticalScope.general);
1934                     for (GrammaticalScope scope : GrammaticalScope.values()) {
1935                         Collection<String> units = grammarInfo.get(target, feature, scope);
1936                         allValues.putAll(target + "/" + feature + "/" + scope, units);
1937                         if (scope != GrammaticalScope.general) {
1938                             assertTrue(general + " > " + scope + " " + units, general.containsAll(units));
1939                         }
1940                     }
1941                 }
1942             }
1943             logger.fine(grammarInfo.toString("\n" + locale + "\t"));
1944         }
1945         if (logger.isLoggable(java.util.logging.Level.FINE)) {  // if level is at least FINE
1946             logger.fine("");
1947             for (Entry<String, Collection<String>> entry : allValues.asMap().entrySet()) {
1948                 logger.fine(entry.getKey() + "\t" + Joiner.on(", ").join(entry.getValue()));
1949             }
1950         }
1951     }
1952 }
1953