• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.unittest;
2 
3 import java.util.ArrayList;
4 import java.util.Arrays;
5 import java.util.Collection;
6 import java.util.Collections;
7 import java.util.Date;
8 import java.util.EnumMap;
9 import java.util.EnumSet;
10 import java.util.HashMap;
11 import java.util.HashSet;
12 import java.util.Iterator;
13 import java.util.LinkedHashMap;
14 import java.util.LinkedHashSet;
15 import java.util.List;
16 import java.util.Locale;
17 import java.util.Map;
18 import java.util.Map.Entry;
19 import java.util.Set;
20 import java.util.TreeMap;
21 import java.util.TreeSet;
22 import java.util.logging.Logger;
23 import java.util.regex.Matcher;
24 import java.util.regex.Pattern;
25 
26 import org.unicode.cldr.draft.ScriptMetadata;
27 import org.unicode.cldr.test.CoverageLevel2;
28 import org.unicode.cldr.tool.LikelySubtags;
29 import org.unicode.cldr.tool.PluralMinimalPairs;
30 import org.unicode.cldr.tool.PluralRulesFactory;
31 import org.unicode.cldr.util.Builder;
32 import org.unicode.cldr.util.CLDRConfig;
33 import org.unicode.cldr.util.CLDRFile;
34 import org.unicode.cldr.util.CLDRFile.WinningChoice;
35 import org.unicode.cldr.util.CLDRLocale;
36 import org.unicode.cldr.util.CldrUtility;
37 import org.unicode.cldr.util.GrammarInfo;
38 import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature;
39 import org.unicode.cldr.util.GrammarInfo.GrammaticalScope;
40 import org.unicode.cldr.util.GrammarInfo.GrammaticalTarget;
41 import org.unicode.cldr.util.Iso639Data;
42 import org.unicode.cldr.util.Iso639Data.Scope;
43 import org.unicode.cldr.util.IsoCurrencyParser;
44 import org.unicode.cldr.util.LanguageTagCanonicalizer;
45 import org.unicode.cldr.util.LanguageTagParser;
46 import org.unicode.cldr.util.Level;
47 import org.unicode.cldr.util.Organization;
48 import org.unicode.cldr.util.Pair;
49 import org.unicode.cldr.util.PluralRanges;
50 import org.unicode.cldr.util.PreferredAndAllowedHour;
51 import org.unicode.cldr.util.PreferredAndAllowedHour.HourStyle;
52 import org.unicode.cldr.util.StandardCodes;
53 import org.unicode.cldr.util.StandardCodes.CodeType;
54 import org.unicode.cldr.util.StandardCodes.LstrType;
55 import org.unicode.cldr.util.SupplementalDataInfo;
56 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData;
57 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type;
58 import org.unicode.cldr.util.SupplementalDataInfo.ContainmentStyle;
59 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo;
60 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyNumberInfo;
61 import org.unicode.cldr.util.SupplementalDataInfo.DateRange;
62 import org.unicode.cldr.util.SupplementalDataInfo.MetaZoneRange;
63 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus;
64 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
65 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
66 import org.unicode.cldr.util.SupplementalDataInfo.PluralType;
67 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData;
68 import org.unicode.cldr.util.SupplementalDataInfo.SampleList;
69 import org.unicode.cldr.util.Validity;
70 import org.unicode.cldr.util.Validity.Status;
71 
72 import com.google.common.base.Joiner;
73 import com.google.common.collect.ImmutableSet;
74 import com.google.common.collect.Multimap;
75 import com.google.common.collect.TreeMultimap;
76 import com.ibm.icu.impl.Relation;
77 import com.ibm.icu.impl.Row;
78 import com.ibm.icu.impl.Row.R2;
79 import com.ibm.icu.impl.Row.R3;
80 import com.ibm.icu.impl.Utility;
81 import com.ibm.icu.impl.number.DecimalQuantity;
82 import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD;
83 import com.ibm.icu.lang.UCharacter;
84 import com.ibm.icu.lang.UCharacterEnums;
85 import com.ibm.icu.lang.UScript;
86 import com.ibm.icu.text.PluralRules;
87 import com.ibm.icu.text.PluralRules.DecimalQuantitySamples;
88 import com.ibm.icu.text.PluralRules.DecimalQuantitySamplesRange;
89 import com.ibm.icu.text.PluralRules.SampleType;
90 import com.ibm.icu.text.StringTransform;
91 import com.ibm.icu.text.UnicodeSet;
92 import com.ibm.icu.util.Output;
93 import com.ibm.icu.util.TimeZone;
94 import com.ibm.icu.util.ULocale;
95 
96 public class TestSupplementalInfo extends TestFmwkPlus {
97     static CLDRConfig testInfo = CLDRConfig.getInstance();
98 
99     private static final StandardCodes STANDARD_CODES = StandardCodes.make();
100 
101     private static final SupplementalDataInfo SUPPLEMENTAL = testInfo
102         .getSupplementalDataInfo();
103 
main(String[] args)104     public static void main(String[] args) {
105         new TestSupplementalInfo().run(args);
106     }
107 
TestPluralSampleOrder()108     public void TestPluralSampleOrder() {
109         HashSet<PluralInfo> seen = new HashSet<>();
110         for (String locale : SUPPLEMENTAL.getPluralLocales()) {
111             if (locale.equals("root")) {
112                 continue;
113             }
114             PluralInfo pi = SUPPLEMENTAL.getPlurals(locale);
115             if (seen.contains(pi)) {
116                 continue;
117             }
118             seen.add(pi);
119             for (SampleType s : SampleType.values()) {
120                 for (Count c : pi.getCounts(s)) {
121                     DecimalQuantitySamples sSamples = pi.getPluralRules()
122                         .getDecimalSamples(c.toString(), s);
123                     if (sSamples == null) {
124                         errln(locale + " no sample for " + c);
125                         continue;
126                     }
127                     if (s == SampleType.DECIMAL) {
128                         continue; // skip
129                     }
130                     DecimalQuantitySamplesRange lastSample = null;
131                     for (DecimalQuantitySamplesRange sample : sSamples.getSamples()) {
132                         if (lastSample != null) {
133                             if (compare(lastSample.start,sample.start) > 0) {
134                                 errln(locale + ":" + c + ": out of order with "
135                                     + lastSample + " > " + sample);
136                             } else if (false) {
137                                 logln(locale + ":" + c + ": in order with "
138                                     + lastSample + " < " + sample);
139                             }
140                         }
141                         lastSample = sample;
142                     }
143                 }
144             }
145         }
146     }
147 
compare(DecimalQuantity me, DecimalQuantity other)148     public static int compare(DecimalQuantity me, DecimalQuantity other) {
149         // We place exponent notation samples entirely after ones without exponent
150         if (me.getExponent() != other.getExponent()) {
151             return me.getExponent() < other.getExponent() ? -1 : 1;
152         }
153 
154         return (int) (me.toDouble() - other.toDouble());
155     }
156 
TestPluralRanges()157     public void TestPluralRanges() {
158         PluralRulesFactory prf = PluralRulesFactory.getInstance(SUPPLEMENTAL);
159         Set<String> localesToTest = new TreeSet<>(
160             SUPPLEMENTAL.getPluralRangesLocales());
161         for (String locale : StandardCodes.make().getLocaleCoverageLocales(
162             "google")) { // superset
163             if (locale.equals("*") || locale.contains("_")) {
164                 continue;
165             }
166             localesToTest.add(locale);
167         }
168         Set<String> modernLocales = StandardCodes.make()
169             .getLocaleCoverageLocales(Organization.cldr,
170                 EnumSet.of(Level.MODERN));
171 
172         Output<DecimalQuantity> maxSample = new Output<>();
173         Output<DecimalQuantity> minSample = new Output<>();
174 
175         for (String locale : localesToTest) {
176             final String templateLine = "Template for " + ULocale.getDisplayName(locale, "en") + " (" + locale + ") translators to fix:";
177             PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(locale);
178             Set<Count> counts = pluralInfo.getCounts();
179 
180             final PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(new ULocale(locale).toString());
181 
182             // check that there are no null values
183             PluralRanges pluralRanges = SUPPLEMENTAL.getPluralRanges(locale);
184             if (pluralRanges == null) {
185                 if (!modernLocales.contains(locale)) {
186                     logln("Missing plural ranges for " + locale);
187                 } else {
188                     errOrLog(CoverageIssue.error, locale + "\tMissing plural ranges", "Cldrbug:7839", "Missing plural data for modern locales");
189                     StringBuilder failureCases = new StringBuilder(templateLine);
190                     for (Count start : counts) {
191                         for (Count end : counts) {
192                             pluralInfo.rangeExists(start, end, minSample, maxSample);
193                             final String rangeLine = getRangeLine(start, end, null, maxSample, minSample, samplePatterns);
194                             failureCases.append("\n" + locale + "\t" + rangeLine);
195                         }
196                     }
197                     errOrLog(CoverageIssue.warn, failureCases.toString());
198                 }
199                 continue;
200             }
201             EnumSet<Count> found = EnumSet.noneOf(Count.class);
202             for (Count count : Count.values()) {
203                 if (pluralRanges.isExplicitlySet(count)
204                     && !counts.contains(count)) {
205                     assertTrue(
206                         locale
207                         + "\t pluralRanges categories must be valid for locale:\t"
208                         + count + " must be in " + counts,
209                         !pluralRanges.isExplicitlySet(count));
210                 }
211                 for (Count end : Count.values()) {
212                     Count result = pluralRanges.getExplicit(count, end);
213                     if (result != null) {
214                         found.add(result);
215                     }
216                 }
217             }
218 
219             // check empty range results
220             if (found.isEmpty()) {
221                 errOrLog(CoverageIssue.error, "Empty range results for " + locale, "Cldrbug:7839", "Missing plural data for modern locales");
222             } else {
223                 if (samplePatterns == null) {
224                     errOrLog(CoverageIssue.error, locale + "\tMissing sample patterns", "Cldrbug:7839", "Missing plural data for modern locales");
225                 } else {
226                     for (Count result : found) {
227                         String samplePattern = samplePatterns.get(
228                             PluralRules.PluralType.CARDINAL, result);
229                         if (samplePattern != null && !samplePattern.contains("{0}")) {
230                             errln("Plural Ranges cannot have results that don't use {0} in samples: "
231                                 + locale
232                                 + ", "
233                                 + result
234                                 + "\t«"
235                                 + samplePattern + "»");
236                         }
237                     }
238                 }
239                 if (isVerbose()) {
240                     logln("Range results for " + locale + ":\t" + found);
241                 }
242             }
243 
244             // check for missing values
245             boolean failure = false;
246             StringBuilder failureCases = new StringBuilder(templateLine);
247             for (Count start : counts) {
248                 for (Count end : counts) {
249                     boolean needsValue = pluralInfo.rangeExists(start, end,
250                         minSample, maxSample);
251                     Count explicitValue = pluralRanges.getExplicit(start, end);
252                     final String rangeLine = getRangeLine(start, end, explicitValue, maxSample, minSample, samplePatterns);
253                     failureCases.append("\n" + locale + "\t" + rangeLine);
254                     if (needsValue && explicitValue == null) {
255                         errOrLog(CoverageIssue.error, locale + "\tNo explicit value for range: "
256                             + rangeLine,
257                             "Cldrbug:7839", "Missing plural data for modern locales");
258                         failure = true;
259                         failureCases.append("\tError — need explicit result");
260                     } else if (!needsValue && explicitValue != null) {
261                         errOrLog(CoverageIssue.error, locale + "\tDoesn't need explicit value, but has one: "
262                             + PluralRanges.showRange(start, end, explicitValue),
263                             "Cldrbug:7839", "Missing plural data for modern locales");
264                         failureCases.append("\tUnnecessary");
265                         failure = true;
266                     } else {
267                         failureCases.append("\tOK");
268                     }
269                 }
270             }
271             if (failure) {
272                 errOrLog(CoverageIssue.warn, failureCases.toString());
273             }
274         }
275     }
276 
getRangeLine(Count start, Count end, Count result, Output<DecimalQuantity> maxSample, Output<DecimalQuantity> minSample, PluralMinimalPairs samplePatterns)277     private String getRangeLine(Count start, Count end, Count result,
278         Output<DecimalQuantity> maxSample, Output<DecimalQuantity> minSample,
279         PluralMinimalPairs samplePatterns) {
280         final String range = minSample + "–" + maxSample;
281         String example = range;
282         if (samplePatterns != null) {
283             example = "";
284             if (result != null) {
285                 String pat = samplePatterns.get(PluralRules.PluralType.CARDINAL, result);
286                 example += "«" + (pat == null ? "MISSING-PATTERN" : pat.replace("{0}", range)) + "»";
287             } else {
288                 for (Count c : new TreeSet<>(Arrays.asList(start, end, Count.other))) {
289                     String pat = samplePatterns.get(PluralRules.PluralType.CARDINAL, c);
290                     example += c + ":«" + (pat == null ? "MISSING-PATTERN" : pat.replace("{0}", range)) + "»" + "?\tOR ";
291                 }
292                 example += " …";
293             }
294         }
295         return start + "\t" + end + "\t" + (result == null ? "?" : result.toString()) + "\t" + example;
296     }
297 
getRangeLine(Count count, PluralRules pluralRules, String pattern)298     private String getRangeLine(Count count, PluralRules pluralRules, String pattern) {
299         String sample = "?";
300         DecimalQuantitySamples exampleList = pluralRules.getDecimalSamples(count.toString(), PluralRules.SampleType.INTEGER);
301         if (exampleList == null) {
302             exampleList = pluralRules.getDecimalSamples(count.toString(), PluralRules.SampleType.DECIMAL);
303         }
304         DecimalQuantity sampleDecimal = PluralInfo.getNonZeroSampleIfPossible(exampleList);
305         sample = sampleDecimal.toString();
306 
307         String example = pattern == null ? "NO-SAMPLE!" : "«" + pattern.replace("{0}", sample) + "»";
308         return count + "\t" + example;
309     }
310 
TestPluralSamples()311     public void TestPluralSamples() {
312         String[][] test = { { "en", "ordinal", "1", "one" },
313             { "en", "ordinal", "2", "two" },
314             { "en", "ordinal", "3", "few" },
315             { "en", "ordinal", "4", "other" },
316             { "sl", "cardinal", "2", "two" }, };
317         for (String[] row : test) {
318             checkPluralSamples(row);
319         }
320     }
321 
TestPluralSamples2()322     public void TestPluralSamples2() {
323         PluralRulesFactory prf = PluralRulesFactory.getInstance(SUPPLEMENTAL);
324         for (String locale : prf.getLocales()) {
325             if (locale.equals("und")) {
326                 continue;
327             }
328             if (locale.equals("pl")) {
329                 int debug = 0;
330             }
331             final PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(locale);
332             for (PluralRules.PluralType type : PluralRules.PluralType.values()) {
333                 PluralInfo rules = SUPPLEMENTAL.getPlurals(
334                     SupplementalDataInfo.PluralType.fromStandardType(type),
335                     locale.toString());
336                 if (rules.getCounts().size() == 1) {
337                     continue; // don't require rules for unary cases
338                 }
339                 Multimap<String, Count> sampleToCount = TreeMultimap.create();
340 
341                 for (Count count : rules.getCounts()) {
342                     String sample = samplePatterns.get(type, count);
343                     if (sample == null) {
344                         errOrLog(CoverageIssue.error, locale + "\t" + type + " \tmissing samples for " + count, "cldrbug:7075",
345                             "Missing ordinal minimal pairs");
346                     } else {
347                         sampleToCount.put(sample, count);
348                         PluralRules pRules = rules.getPluralRules();
349                         double unique = pRules.getUniqueKeywordValue(count
350                             .toString());
351                         if (unique == PluralRules.NO_UNIQUE_VALUE
352                             && !sample.contains("{0}")) {
353                             errln("Missing {0} in sample: " + locale + ", " + type + ", " + count + " «" + sample + "»");
354                         }
355                     }
356                 }
357                 for (Entry<String, Collection<Count>> entry : sampleToCount.asMap().entrySet()) {
358                     if (entry.getValue().size() > 1) {
359                         errln("Colliding minimal pair samples: " + locale + ", " + type + ", " + entry.getValue() + " «" + entry.getKey() + "»");
360                     }
361                 }
362             }
363         }
364     }
365 
TestCldrScriptCodes()366     public void TestCldrScriptCodes() {
367         Set<String> codes = SUPPLEMENTAL.getCLDRScriptCodes();
368 
369         Set<String> unicodeScripts = ScriptMetadata.getScripts();
370         assertRelation("getCLDRScriptCodes contains Unicode Scripts", true, codes, CONTAINS_ALL, unicodeScripts);
371 
372         ImmutableSet<String> allSpecials = ImmutableSet.of("Zinh", "Zmth", "Zsye", "Zsym", "Zxxx", "Zyyy", "Zzzz");
373         assertRelation("getCLDRScriptCodes contains allSpecials", true, codes, CONTAINS_ALL, allSpecials);
374 
375         ImmutableSet<String> allCompos = ImmutableSet.of("Hanb", "Hrkt", "Jamo", "Jpan", "Kore");
376         assertRelation("getCLDRScriptCodes contains allCompos", true, codes, CONTAINS_ALL, allCompos);
377 
378         Map<Status, Set<String>> scripts = Validity.getInstance().getStatusToCodes(LstrType.script);
379         for (Entry<Status, Set<String>> e : scripts.entrySet()) {
380             switch (e.getKey()) {
381             case regular:
382             case special:
383             case unknown:
384                 assertRelation("getCLDRScriptCodes contains " + e.getKey(), true, codes, CONTAINS_ALL, e.getValue());
385                 break;
386             default:
387                 break; // do nothin
388             }
389         }
390 
391         ImmutableSet<String> variants = ImmutableSet.of("Cyrs", "Geok", "Latf", "Latg", "Syre", "Syrj", "Syrn");
392         assertRelation("getCLDRScriptCodes contains variants", false, codes, CONTAINS_SOME, variants);
393     }
394 
checkPluralSamples(String... row)395     public void checkPluralSamples(String... row) {
396         PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(
397             PluralType.valueOf(row[1]), row[0]);
398         Count count = pluralInfo.getCount(DecimalQuantity_DualStorageBCD.fromExponentString(row[2]));
399         assertEquals(String.join(", ", row),
400             Count.valueOf(row[3]), count);
401     }
402 
TestPluralLocales()403     public void TestPluralLocales() {
404         // get the unique rules
405         for (PluralType type : PluralType.values()) {
406             Relation<PluralInfo, String> pluralsToLocale = Relation.of(
407                 new HashMap<PluralInfo, Set<String>>(), TreeSet.class);
408             for (String locale : new TreeSet<>(
409                 SUPPLEMENTAL.getPluralLocales(type))) {
410                 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(type, locale);
411                 pluralsToLocale.put(pluralInfo, locale);
412             }
413 
414             String[][] equivalents = { { "mo", "ro" }, { "tl", "fil" },
415                 { "he", "iw" }, { "in", "id" }, { "jw", "jv" },
416                 { "ji", "yi" }, { "sh", "sr" }, };
417             for (Entry<PluralInfo, Set<String>> pluralInfoEntry : pluralsToLocale
418                 .keyValuesSet()) {
419                 PluralInfo pluralInfo2 = pluralInfoEntry.getKey();
420                 Set<String> locales = pluralInfoEntry.getValue();
421                 // check that equivalent locales are either both in or both out
422                 for (String[] row : equivalents) {
423                     assertEquals(
424                         type + " must be equivalent: " + Arrays.asList(row),
425                         locales.contains(row[0]), locales.contains(row[1]));
426                 }
427                 // check that no rules contain 'within'
428                 for (Count count : pluralInfo2.getCounts()) {
429                     String rule = pluralInfo2.getRule(count);
430                     if (rule == null) {
431                         continue;
432                     }
433                     assertFalse(
434                         "Rule '" + rule + "' for " + Arrays.asList(locales)
435                         + " doesn't contain 'within'",
436                         rule.contains("within"));
437                 }
438             }
439         }
440     }
441 
TestDigitPluralCases()442     public void TestDigitPluralCases() {
443         String[][] tests = {
444             { "en", "one", "1", "1" },
445             { "en", "one", "2", "" },
446             { "en", "one", "3", "" },
447             { "en", "one", "4", "" },
448             { "en", "other", "1", "0, 2-9, 0.0, 0.1, 0.2, …" },
449             { "en", "other", "2", "10-99, 10.0, 10.1, 10.2, …" },
450             { "en", "other", "3", "100-999, 100.0, 100.1, 100.2, …" },
451             { "en", "other", "4", "1000-9999, 1000.0, 1000.1, 1000.2, …" },
452             { "hr", "one", "1", "1, 0.1, 2.10, 1.1, …" },
453             { "hr", "one", "2",
454             "21, 31, 41, 51, 61, 71, …, 10.1, 12.10, 11.1, …" },
455             { "hr", "one", "3",
456             "101, 121, 131, 141, 151, 161, …, 100.1, 102.10, 101.1, …" },
457             { "hr", "one", "4",
458             "1001, 1021, 1031, 1041, 1051, 1061, …, 1000.1, 1002.10, 1001.1, …" },
459             { "hr", "few", "1", "2-4, 0.2, 0.3, 0.4, …" },
460             { "hr", "few", "2",
461             "22-24, 32-34, 42-44, …, 10.2, 10.3, 10.4, …" },
462             { "hr", "few", "3",
463             "102-104, 122-124, 132-134, …, 100.2, 100.3, 100.4, …" },
464             { "hr", "few", "4",
465             "1002-1004, 1022-1024, 1032-1034, …, 1000.2, 1000.3, 1000.4, …" },
466             { "hr", "other", "1", "0, 5-9, 0.0, 0.5, 0.6, …" },
467             { "hr", "other", "2",
468             "10-20, 25-30, 35-40, …, 10.0, 10.5, 10.6, …" },
469             { "hr", "other", "3",
470             "100, 105-120, 125-130, 135-140, …, 100.0, 100.5, 100.6, …" },
471             { "hr", "other", "4",
472             "1000, 1005-1020, 1025-1030, 1035-1040, …, 1000.0, 1000.5, 1000.6, …" }, };
473         for (String[] row : tests) {
474             PluralInfo plurals = SUPPLEMENTAL.getPlurals(row[0]);
475             SampleList uset = plurals.getSamples9999(Count.valueOf(row[1]),
476                 Integer.parseInt(row[2]));
477             assertEquals(row[0] + ", " + row[1] + ", " + row[2], row[3],
478                 uset.toString());
479         }
480     }
481 
TestDigitPluralCompleteness()482     public void TestDigitPluralCompleteness() {
483         String[][] exceptionStrings = {
484             // defaults
485             { "*", "zero", "0,00,000,0000" },
486             { "*", "one", "0" },
487             { "*", "two", "0,00,000,0000" },
488             { "*", "few", "0,00,000,0000" },
489             { "*", "many", "0,00,000,0000" },
490             { "*", "other", "0,00,000,0000" },
491             // others
492             { "mo", "other", "00,000,0000" }, //
493             { "ro", "other", "00,000,0000" }, //
494             { "cs", "few", "0" }, // j in 2..4
495             { "sk", "few", "0" }, // j in 2..4
496             { "da", "one", "0" }, // j is 1 or t is not 0 and n within 0..2
497             { "is", "one", "0,00,000,0000" }, // j is 1 or f is 1
498             { "sv", "one", "0" }, // j is 1
499             { "he", "two", "0" }, // j is 2
500             { "ru", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
501             // is not 11
502             { "uk", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
503             // is not 11
504             { "bs", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
505             // is not 11 or f mod 10 is
506             // 1 and f mod 100 is not 11
507             { "hr", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
508             // is not 11 or f mod 10 is
509             // 1 and f mod 100 is not 11
510             { "sh", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
511             // is not 11 or f mod 10 is
512             // 1 and f mod 100 is not 11
513             { "sr", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
514             // is not 11 or f mod 10 is
515             // 1 and f mod 100 is not 11
516             { "mk", "one", "0,00,000,0000" }, // j mod 10 is 1 or f mod 10
517             // is 1
518             { "sl", "one", "0,000,0000" }, // j mod 100 is 1
519             { "sl", "two", "0,000,0000" }, // j mod 100 is 2
520             { "he", "many", "00,000,0000" }, // j not in 0..10 and j mod 10
521             // is 0
522             { "tzm", "one", "0,00" }, // n in 0..1 or n in 11..99
523             { "gd", "one", "0,00" }, // n in 1,11
524             { "gd", "two", "0,00" }, // n in 2,12
525             { "shi", "few", "0,00" }, // n in 2..10
526             { "gd", "few", "0,00" }, // n in 3..10,13..19
527             { "ga", "few", "0" }, // n in 3..6
528             { "ga", "many", "0,00" }, // n in 7..10
529             { "ar", "zero", "0" }, // n is 0
530             { "cy", "zero", "0" }, // n is 0
531             { "ksh", "zero", "0" }, // n is 0
532             { "lag", "zero", "0" }, // n is 0
533             { "pt", "one", "0" }, // i = 1 and v = 0 or i = 0 and t = 1
534             { "pt_PT", "one", "0" }, // n = 1 and v = 0
535             { "ar", "two", "0" }, // n is 2
536             { "cy", "two", "0" }, // n is 2
537             { "ga", "two", "0" }, // n is 2
538             { "iu", "two", "0" }, // n is 2
539             { "naq", "two", "0" }, // n is 2
540             { "se", "two", "0" }, // n is 2
541             { "sma", "two", "0" }, // n is 2
542             { "smi", "two", "0" }, // n is 2
543             { "smj", "two", "0" }, // n is 2
544             { "smn", "two", "0" }, // n is 2
545             { "sms", "two", "0" }, // n is 2
546             { "cy", "few", "0" }, // n is 3
547             { "cy", "many", "0" }, // n is 6
548             { "br", "many", "" }, // n is not 0 and n mod 1000000 is 0
549             { "gv", "one", "0,00,000,0000" }, // n mod 10 is 1
550             { "be", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100
551             // is not 11
552             { "lv", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100
553             // is not 11 or v is 2 and f
554             // mod 10 is 1 and f mod 100
555             // is not 11 or v is not 2
556             // and f mod 10 is 1
557             { "br", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100
558             // not in 11,71,91
559             { "lt", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100
560             // not in 11..19
561             { "fil", "one", "0,00,000,0000" }, // v = 0 and i = 1,2,3 or v =
562             // 0 and i % 10 != 4,6,9 or
563             // v != 0 and f % 10 !=
564             // 4,6,9
565             { "tl", "one", "0,00,000,0000" }, // v = 0 and i = 1,2,3 or v =
566             // 0 and i % 10 != 4,6,9 or
567             // v != 0 and f % 10 !=
568             // 4,6,9
569             { "dsb", "one", "0,00,000,0000" }, // v = 0 and i % 100 = 1 or f
570             // % 100 = 1
571             {"kw", "many", "00,000,0000"},  // n != 1 and n % 100 = 1,21,41,61,81
572             {"kw", "zero", "0"},    // n = 0
573             {"mt", "two", "0"},
574             {"fr", "many", ""},    // e is special
575             {"ca", "many", ""},    // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5
576             {"es", "many", ""},    // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5
577             {"it", "many", ""},    // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5
578             {"pt", "many", ""},    // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5
579             {"pt_PT", "many", ""}, // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5
580         };
581         // parse out the exceptions
582         Map<PluralInfo, Relation<Count, Integer>> exceptions = new HashMap<>();
583         Relation<Count, Integer> fallback = Relation.of(
584             new EnumMap<Count, Set<Integer>>(Count.class), TreeSet.class);
585         for (String[] row : exceptionStrings) {
586             Relation<Count, Integer> countToDigits;
587             if (row[0].equals("*")) {
588                 countToDigits = fallback;
589             } else {
590                 PluralInfo plurals = SUPPLEMENTAL.getPlurals(row[0]);
591                 countToDigits = exceptions.get(plurals);
592                 if (countToDigits == null) {
593                     exceptions.put(
594                         plurals,
595                         countToDigits = Relation.of(
596                             new EnumMap<Count, Set<Integer>>(
597                                 Count.class),
598                             TreeSet.class));
599                 }
600             }
601             Count c = Count.valueOf(row[1]);
602             for (String digit : row[2].split(",")) {
603                 // "99" is special, just to have the result be non-empty
604                 countToDigits.put(c, digit.length());
605             }
606         }
607         Set<PluralInfo> seen = new HashSet<>();
608         Set<String> sorted = new TreeSet<>(
609             SUPPLEMENTAL.getPluralLocales(PluralType.cardinal));
610         Relation<String, String> ruleToExceptions = Relation.of(
611             new TreeMap<String, Set<String>>(), TreeSet.class);
612 
613         for (String locale : sorted) {
614             PluralInfo plurals = SUPPLEMENTAL.getPlurals(locale);
615             if (seen.contains(plurals)) { // skip identicals
616                 continue;
617             }
618             Relation<Count, Integer> countToDigits = exceptions.get(plurals);
619             if (countToDigits == null) {
620                 countToDigits = fallback;
621             }
622             for (Count c : plurals.getCounts()) {
623                 List<String> compose = new ArrayList<>();
624                 boolean needLine = false;
625                 Set<Integer> digitSet = countToDigits.get(c);
626                 if (digitSet == null) {
627                     digitSet = fallback.get(c);
628                 }
629                 for (int digits = 1; digits < 5; ++digits) {
630                     boolean expected = digitSet.contains(digits);
631                     boolean hasSamples = plurals.hasSamples(c, digits);
632                     if (hasSamples) {
633                         compose.add(Utility.repeat("0", digits));
634                     }
635                     if (!assertEquals(locale + ", " + digits + ", " + c,
636                         expected, hasSamples)) {
637                         needLine = true;
638                     }
639                 }
640                 if (needLine) {
641                     String countRules = plurals.getPluralRules().getRules(
642                         c.toString());
643                     ruleToExceptions.put(countRules == null ? "" : countRules,
644                         "{\"" + locale + "\", \"" + c + "\", \""
645                             + Joiner.on(",").join(compose)
646                             + "\"},");
647                 }
648             }
649         }
650         if (!ruleToExceptions.isEmpty()) {
651             System.out
652             .println("To fix the above, review the following, then replace in TestDigitPluralCompleteness");
653             for (Entry<String, String> entry : ruleToExceptions.entrySet()) {
654                 System.out.println(entry.getValue() + "\t// " + entry.getKey());
655             }
656         }
657     }
658 
TestLikelyCode()659     public void TestLikelyCode() {
660         Map<String, String> likely = SUPPLEMENTAL.getLikelySubtags();
661         String[][] tests = { { "it_AQ", "it_Latn_AQ" },
662             { "it_Arab", "it_Arab_IT" }, { "az_Cyrl", "az_Cyrl_AZ" }, };
663         for (String[] pair : tests) {
664             String newMax = LikelySubtags.maximize(pair[0], likely);
665             assertEquals("Likely", pair[1], newMax);
666         }
667 
668     }
669 
TestLikelySubtagCompleteness()670     public void TestLikelySubtagCompleteness() {
671         Map<String, String> likely = SUPPLEMENTAL.getLikelySubtags();
672 
673         for (String language : SUPPLEMENTAL.getCLDRLanguageCodes()) {
674             if (!likely.containsKey(language)) {
675                 logln("WARNING: No likely subtag for CLDR language code ("
676                     + language + ")");
677             }
678         }
679         for (String script : SUPPLEMENTAL.getCLDRScriptCodes()) {
680             if (!likely.containsKey("und_" + script)
681                 && !script.equals("Latn")
682                 && !script.equals("Zinh")
683                 && !script.equals("Zyyy")
684                 && ScriptMetadata.getInfo(script) != null
685                 && ScriptMetadata.getInfo(script).idUsage != ScriptMetadata.IdUsage.EXCLUSION
686                 && ScriptMetadata.getInfo(script).idUsage != ScriptMetadata.IdUsage.UNKNOWN) {
687                 errln("No likely subtag for CLDR script code (und_" + script
688                     + ")");
689             }
690         }
691 
692     }
693 
TestEquivalentLocales()694     public void TestEquivalentLocales() {
695         Set<Set<String>> seen = new HashSet<>();
696         Set<String> toTest = new TreeSet<>(testInfo.getCldrFactory()
697             .getAvailable());
698         toTest.addAll(SUPPLEMENTAL.getLikelySubtags().keySet());
699         toTest.addAll(SUPPLEMENTAL.getLikelySubtags().values());
700         toTest.addAll(SUPPLEMENTAL.getDefaultContentLocales());
701         LanguageTagParser ltp = new LanguageTagParser();
702         main: for (String locale : toTest) {
703             if (locale.startsWith("und") || locale.equals("root")) {
704                 continue;
705             }
706             Set<String> s = SUPPLEMENTAL.getEquivalentsForLocale(locale);
707             if (seen.contains(s)) {
708                 continue;
709             }
710 
711             List<String> ss = new ArrayList<>(s);
712             String last = ss.get(ss.size() - 1);
713             ltp.set(last);
714             if (!ltp.getVariants().isEmpty() || !ltp.getExtensions().isEmpty()) {
715                 continue; // skip variants for now.
716             }
717             String language = ltp.getLanguage();
718             String script = ltp.getScript();
719             String region = ltp.getRegion();
720             if (!script.isEmpty() && !region.isEmpty()) {
721                 String noScript = ltp.setScript("").toString();
722                 String noRegion = ltp.setScript(script).setRegion("")
723                     .toString();
724                 switch (s.size()) {
725                 case 1: // ok if already maximized and strange script/country,
726                     // eg it_Arab_JA
727                     continue main;
728                 case 2: // ok if adds default country/script, eg {en_Cyrl,
729                     // en_Cyrl_US} or {en_GB, en_Latn_GB}
730                     String first = ss.get(0);
731                     if (first.equals(noScript) || first.equals(noRegion)) {
732                         continue main;
733                     }
734                     break;
735                 case 3: // ok if different script in different country, eg
736                     // {az_IR, az_Arab, az_Arab_IR}
737                     if (noScript.equals(ss.get(0))
738                         && noRegion.equals(ss.get(1))) {
739                         continue main;
740                     }
741                     break;
742                 case 4: // ok if all combinations, eg {en, en_US, en_Latn,
743                     // en_Latn_US}
744                     if (language.equals(ss.get(0))
745                         && noScript.equals(ss.get(1))
746                         && noRegion.equals(ss.get(2))) {
747                         continue main;
748                     }
749                     break;
750                 }
751             }
752             errln("Strange size or composition:\t" + s + " \t"
753                 + showLocaleParts(s));
754             seen.add(s);
755         }
756     }
757 
showLocaleParts(Set<String> s)758     private String showLocaleParts(Set<String> s) {
759         LanguageTagParser ltp = new LanguageTagParser();
760         Set<String> b = new LinkedHashSet<>();
761         for (String ss : s) {
762             ltp.set(ss);
763             addName(CLDRFile.LANGUAGE_NAME, ltp.getLanguage(), b);
764             addName(CLDRFile.SCRIPT_NAME, ltp.getScript(), b);
765             addName(CLDRFile.TERRITORY_NAME, ltp.getRegion(), b);
766         }
767         return Joiner.on("; ").join(b);
768     }
769 
addName(int languageName, String code, Set<String> b)770     private void addName(int languageName, String code, Set<String> b) {
771         if (code.isEmpty()) {
772             return;
773         }
774         String name = testInfo.getEnglish().getName(languageName, code);
775         if (!code.equals(name)) {
776             b.add(code + "=" + name);
777         }
778     }
779 
TestDefaultScriptCompleteness()780     public void TestDefaultScriptCompleteness() {
781         Relation<String, String> scriptToBase = Relation.of(
782             new LinkedHashMap<String, Set<String>>(), TreeSet.class);
783         main: for (String locale : testInfo.getCldrFactory()
784             .getAvailableLanguages()) {
785             if (!locale.contains("_") && !"root".equals(locale)) {
786                 String defaultScript = SUPPLEMENTAL.getDefaultScript(locale);
787                 if (defaultScript != null) {
788                     continue;
789                 }
790                 CLDRFile cldrFile = testInfo.getCLDRFile(locale,
791                     false);
792                 UnicodeSet set = cldrFile.getExemplarSet("",
793                     WinningChoice.NORMAL);
794                 for (String s : set) {
795                     int script = UScript.getScript(s.codePointAt(0));
796                     if (script != UScript.UNKNOWN && script != UScript.COMMON
797                         && script != UScript.INHERITED) {
798                         scriptToBase.put(UScript.getShortName(script), locale);
799                         continue main;
800                     }
801                 }
802                 scriptToBase.put(UScript.getShortName(UScript.UNKNOWN), locale);
803             }
804         }
805         if (scriptToBase.size() != 0) {
806             for (Entry<String, Set<String>> entry : scriptToBase.keyValuesSet()) {
807                 errln("Default Scripts missing:\t" + entry.getKey() + "\t"
808                     + entry.getValue());
809             }
810         }
811     }
812 
TestTimeData()813     public void TestTimeData() {
814         Map<String, PreferredAndAllowedHour> timeData = SUPPLEMENTAL
815             .getTimeData();
816         Set<String> regionsSoFar = new HashSet<>();
817         Set<String> current24only = new HashSet<>();
818         Set<String> current12preferred = new HashSet<>();
819 
820         boolean haveWorld = false;
821 
822         ImmutableSet<HourStyle> oldSchool = ImmutableSet.copyOf(EnumSet.of(HourStyle.H, HourStyle.h, HourStyle.K, HourStyle.k));
823 
824         for (Entry<String, PreferredAndAllowedHour> e : timeData.entrySet()) {
825             String region = e.getKey();
826             if (region.equals("001")) {
827                 haveWorld = true;
828             }
829             regionsSoFar.add(region);
830             PreferredAndAllowedHour preferredAndAllowedHour = e.getValue();
831             assertNotNull("Preferred must not be null", preferredAndAllowedHour.preferred);
832 
833             // find first h or H
834             HourStyle found = null;
835 
836             for (HourStyle item : preferredAndAllowedHour.allowed) {
837                 if (oldSchool.contains(item)) {
838                     found = item;
839                     if (item != preferredAndAllowedHour.preferred) {
840                         String message = "Inconsistent values for " + region + ": preferred=" + preferredAndAllowedHour.preferred
841                             + " but that isn't the first " + oldSchool + " in allowed: " + preferredAndAllowedHour.allowed;
842                         //if (!logKnownIssue("cldrbug:11448", message)) {
843                         errln(message);
844                         //}
845                     }
846                     break;
847                 }
848             }
849             if (found == null) {
850                 errln(region + ": preferred " + preferredAndAllowedHour.preferred
851                     + " not in " + preferredAndAllowedHour.allowed);
852             }
853 //            final HourStyle firstAllowed = preferredAndAllowedHour.allowed.iterator().next();
854 //            if (preferredAndAllowedHour.preferred == HourStyle.H && firstAllowed == HourStyle.h
855 //                || preferredAndAllowedHour.preferred == HourStyle.H && firstAllowed == HourStyle.hb
856 //                || preferredAndAllowedHour.preferred == HourStyle.h && firstAllowed == HourStyle.H) {
857 //                errln(region + ": allowed " + preferredAndAllowedHour.allowed
858 //                    + " starts with preferred " + preferredAndAllowedHour.preferred);
859 //            } else if (isVerbose()) {
860 //                logln(region + ": allowed " + preferredAndAllowedHour.allowed
861 //                    + " starts with preferred " + preferredAndAllowedHour.preferred);
862 //            }
863             // for (HourStyle c : preferredAndAllowedHour.allowed) {
864             // if (!PreferredAndAllowedHour.HOURS.contains(c)) {
865             // errln(region + ": illegal character in " +
866             // preferredAndAllowedHour.allowed + ". It contains " + c
867             // + " which is not in " + PreferredAndAllowedHour.HOURS);
868             // }
869             // }
870             if (!preferredAndAllowedHour.allowed.contains(HourStyle.h)
871                 && !preferredAndAllowedHour.allowed.contains(HourStyle.hb)) {
872                 current24only.add(region);
873             }
874             if (preferredAndAllowedHour.preferred == HourStyle.h) {
875                 current12preferred.add(region);
876             }
877         }
878         Set<String> missing = new TreeSet<>(
879             STANDARD_CODES.getGoodAvailableCodes(CodeType.territory));
880         missing.removeAll(regionsSoFar);
881         for (Iterator<String> it = missing.iterator(); it.hasNext();) {
882             if (!StandardCodes.isCountry(it.next())) {
883                 it.remove();
884             }
885         }
886 
887         // if we don't have 001, then we can't miss any regions
888         if (!missing.isEmpty()) {
889             if (haveWorld) {
890                 logln("Implicit regions: " + missing);
891             } else {
892                 errln("Missing regions: " + missing);
893             }
894         }
895 
896         // The feedback gathered from our translators is that the following use
897         // 24 hour time ONLY:
898         Set<String> only24lang = new TreeSet<>(
899             Arrays.asList(("sq, br, bu, ca, hr, cs, da, de, nl, et, eu, fi, "
900                 + "fr, gl, he, is, id, it, no, pt, ro, ru, sr, sk, sl, sv, tr, hy")
901                 .split(",\\s*")));
902         // With the new preferences, this is changed
903         Set<String> only24region = new TreeSet<>();
904         Set<String> either24or12region = new TreeSet<>();
905 
906         // get all countries where official or de-facto official
907         // add them two one of two lists, based on the above list of languages
908         for (String language : SUPPLEMENTAL
909             .getLanguagesForTerritoriesPopulationData()) {
910             boolean a24lang = only24lang.contains(language);
911             for (String region : SUPPLEMENTAL
912                 .getTerritoriesForPopulationData(language)) {
913                 PopulationData pop = SUPPLEMENTAL
914                     .getLanguageAndTerritoryPopulationData(language, region);
915                 if (pop.getOfficialStatus().compareTo(
916                     OfficialStatus.de_facto_official) < 0) {
917                     continue;
918                 }
919                 if (a24lang) {
920                     only24region.add(region);
921                 } else {
922                     either24or12region.add(region);
923                 }
924             }
925         }
926         // if we have a case like CA, where en uses 12/24 but fr uses 24, remove
927         // it for safety
928         only24region.removeAll(either24or12region);
929         // There are always exceptions... Remove SM (San Marino) and VA (Vatican),
930         // since they allows 12/24 but the de facto langauge is Italian.
931         only24region.remove("SM");
932         only24region.remove("VA");
933         // also remove all the regions where 'h' is preferred
934         only24region.removeAll(current12preferred);
935         // now verify
936         if (!current24only.containsAll(only24region)) {
937             Set<String> missing24only = new TreeSet<>(only24region);
938             missing24only.removeAll(current24only);
939 
940             errln("24-hour-only doesn't include needed items:\n"
941                 + " add "
942                 + CldrUtility.join(missing24only, " ")
943                 + "\n\t\t"
944                 + CldrUtility.join(missing24only, "\n\t\t",
945                     new NameCodeTransform(testInfo.getEnglish(),
946                         CLDRFile.TERRITORY_NAME)));
947         }
948     }
949 
950     public static class NameCodeTransform implements StringTransform {
951         private final CLDRFile file;
952         private final int codeType;
953 
NameCodeTransform(CLDRFile file, int code)954         public NameCodeTransform(CLDRFile file, int code) {
955             this.file = file;
956             this.codeType = code;
957         }
958 
959         @Override
transform(String code)960         public String transform(String code) {
961             return file.getName(codeType, code) + " [" + code + "]";
962         }
963     }
964 
TestAliases()965     public void TestAliases() {
966         StandardCodes.make();
967         Map<String, Map<String, Map<String, String>>> bcp47Data = StandardCodes
968             .getLStreg();
969         Map<String, Map<String, R2<List<String>, String>>> aliases = SUPPLEMENTAL
970             .getLocaleAliasInfo();
971 
972         for (Entry<String, Map<String, R2<List<String>, String>>> typeMap : aliases
973             .entrySet()) {
974             String type = typeMap.getKey();
975             Map<String, R2<List<String>, String>> codeReplacement = typeMap
976                 .getValue();
977 
978             Map<String, Map<String, String>> bcp47DataTypeData = bcp47Data
979                 .get(type.equals("territory") ? "region" : type);
980             if (bcp47DataTypeData == null) {
981                 logln("skipping BCP47 test for " + type);
982             } else {
983                 for (Entry<String, Map<String, String>> codeData : bcp47DataTypeData
984                     .entrySet()) {
985                     String code = codeData.getKey();
986                     if (codeReplacement.containsKey(code)
987                         || codeReplacement.containsKey(code
988                             .toUpperCase(Locale.ENGLISH))) {
989                         continue;
990                         // TODO, check the value
991                     }
992                     Map<String, String> data = codeData.getValue();
993                     if (data.containsKey("Deprecated")
994                         && SUPPLEMENTAL.getCLDRLanguageCodes().contains(
995                             code)) {
996                         errln("supplementalMetadata.xml: alias is missing <languageAlias type=\""
997                             + code + "\" ... /> " + "\t" + data);
998                     }
999                 }
1000             }
1001 
1002             Set<R3<String, List<String>, List<String>>> failures = new LinkedHashSet<>();
1003             Set<String> nullReplacements = new TreeSet<>();
1004             for (Entry<String, R2<List<String>, String>> codeRep : codeReplacement
1005                 .entrySet()) {
1006                 String code = codeRep.getKey();
1007                 List<String> replacements = codeRep.getValue().get0();
1008                 if (replacements == null) {
1009                     nullReplacements.add(code);
1010                     continue;
1011                 }
1012                 Set<String> fixedReplacements = new LinkedHashSet<>();
1013                 for (String replacement : replacements) {
1014                     R2<List<String>, String> newReplacement = codeReplacement
1015                         .get(replacement);
1016                     if (newReplacement != null) {
1017                         List<String> list = newReplacement.get0();
1018                         if (list != null) {
1019                             fixedReplacements.addAll(list);
1020                         }
1021                     } else {
1022                         fixedReplacements.add(replacement);
1023                     }
1024                 }
1025                 List<String> fixedList = new ArrayList<>(
1026                     fixedReplacements);
1027                 if (!replacements.equals(fixedList)) {
1028                     R3<String, List<String>, List<String>> row = Row.of(code,
1029                         replacements, fixedList);
1030                     System.out.println(row.toString());
1031                     failures.add(row);
1032                 }
1033             }
1034 
1035             if (failures.size() != 0) {
1036                 for (R3<String, List<String>, List<String>> item : failures) {
1037                     String code = item.get0();
1038                     List<String> oldReplacement = item.get1();
1039                     List<String> newReplacement = item.get2();
1040 
1041                     errln(code + "\t=>\t" + oldReplacement + "\tshould be:\n\t"
1042                         + "<" + type + "Alias type=\"" + code
1043                         + "\" replacement=\""
1044                         + Joiner.on(" ").join(newReplacement)
1045                         + "\" reason=\"XXX\"/> <!-- YYY -->\n");
1046                 }
1047             }
1048             if (nullReplacements.size() != 0) {
1049                 logln("No Replacements\t" + type + "\t" + nullReplacements);
1050             }
1051         }
1052     }
1053 
1054     static final List<String> oldRegions = Arrays
1055         .asList("NT, YD, QU, SU, DD, FX, ZR, AN, BU, TP, CS, YU"
1056             .split(", "));
1057 
TestTerritoryContainment()1058     public void TestTerritoryContainment() {
1059         Relation<String, String> map = SUPPLEMENTAL
1060             .getTerritoryToContained(ContainmentStyle.all);
1061         Relation<String, String> mapCore = SUPPLEMENTAL.getContainmentCore();
1062         Set<String> mapItems = new LinkedHashSet<>();
1063         // get all the items
1064         for (String item : map.keySet()) {
1065             mapItems.add(item);
1066             mapItems.addAll(map.getAll(item));
1067         }
1068         Map<String, Map<String, String>> bcp47RegionData = StandardCodes
1069             .getLStreg().get("region");
1070 
1071         // verify that all regions are covered
1072         Set<String> bcp47Regions = new LinkedHashSet<>(
1073             bcp47RegionData.keySet());
1074         bcp47Regions.remove("ZZ"); // We don't care about ZZ since it is the
1075         // unknown region...
1076         for (Iterator<String> it = bcp47Regions.iterator(); it.hasNext();) {
1077             String region = it.next();
1078             Map<String, String> data = bcp47RegionData.get(region);
1079             if (data.containsKey("Deprecated")) {
1080                 logln("Removing deprecated " + region);
1081                 it.remove();
1082             }
1083             if ("Private use".equals(data.get("Description"))) {
1084                 it.remove();
1085             }
1086         }
1087 
1088         if (!mapItems.equals(bcp47Regions)) {
1089             mapItems.removeAll(oldRegions);
1090             errlnDiff("containment items not in bcp47 regions: ", mapItems,
1091                 bcp47Regions);
1092             errlnDiff("bcp47 regions not in containment items: ", bcp47Regions,
1093                 mapItems);
1094         }
1095 
1096         // verify that everything in the containment core can be reached
1097         // downwards from 001.
1098 
1099         Map<String, Integer> from001 = getRecursiveContainment("001", map,
1100             new LinkedHashMap<String, Integer>(), 1);
1101         from001.put("001", 0);
1102         Set<String> keySet = from001.keySet();
1103         for (String region : keySet) {
1104             logln(Utility.repeat("\t", from001.get(region)) + "\t" + region
1105                 + "\t" + getRegionName(region));
1106         }
1107 
1108         // Populate mapItems with the core containment
1109         mapItems.clear();
1110         for (String item : mapCore.keySet()) {
1111             mapItems.add(item);
1112             mapItems.addAll(mapCore.getAll(item));
1113         }
1114 
1115         if (!mapItems.equals(keySet)) {
1116             errlnDiff(
1117                 "containment core items that can't be reached from 001: ",
1118                 mapItems, keySet);
1119         }
1120     }
1121 
errlnDiff(String title, Set<String> mapItems, Set<String> keySet)1122     private void errlnDiff(String title, Set<String> mapItems,
1123         Set<String> keySet) {
1124         Set<String> diff = new LinkedHashSet<>(mapItems);
1125         diff.removeAll(keySet);
1126         if (diff.size() != 0) {
1127             errln(title + diff);
1128         }
1129     }
1130 
getRegionName(String region)1131     private String getRegionName(String region) {
1132         return testInfo.getEnglish().getName(CLDRFile.TERRITORY_NAME, region);
1133     }
1134 
getRecursiveContainment(String region, Relation<String, String> map, Map<String, Integer> result, int depth)1135     private Map<String, Integer> getRecursiveContainment(String region,
1136         Relation<String, String> map, Map<String, Integer> result, int depth) {
1137         Set<String> contained = map.getAll(region);
1138         if (contained == null) {
1139             return result;
1140         }
1141         for (String item : contained) {
1142             if (result.containsKey(item)) {
1143                 logln("Duplicate containment " + item + "\t"
1144                     + getRegionName(item));
1145                 continue;
1146             }
1147             result.put(item, depth);
1148             getRecursiveContainment(item, map, result, depth + 1);
1149         }
1150         return result;
1151     }
1152 
TestMacrolanguages()1153     public void TestMacrolanguages() {
1154         Set<String> languageCodes = STANDARD_CODES
1155             .getAvailableCodes("language");
1156         Map<String, Map<String, R2<List<String>, String>>> typeToTagToReplacement = SUPPLEMENTAL
1157             .getLocaleAliasInfo();
1158         Map<String, R2<List<String>, String>> tagToReplacement = typeToTagToReplacement
1159             .get("language");
1160 
1161         Relation<String, String> replacementToReplaced = Relation.of(
1162             new TreeMap<String, Set<String>>(), TreeSet.class);
1163         for (String language : tagToReplacement.keySet()) {
1164             List<String> replacements = tagToReplacement.get(language).get0();
1165             if (replacements != null) {
1166                 replacementToReplaced.putAll(replacements, language);
1167             }
1168         }
1169         replacementToReplaced.freeze();
1170 
1171         Map<String, Map<String, Map<String, String>>> lstreg = StandardCodes
1172             .getLStreg();
1173         Map<String, Map<String, String>> lstregLanguageInfo = lstreg
1174             .get("language");
1175 
1176         Relation<Scope, String> scopeToCodes = Relation.of(
1177             new TreeMap<Scope, Set<String>>(), TreeSet.class);
1178         // the invariant is that every macrolanguage has exactly 1 encompassed
1179         // language that maps to it
1180 
1181         main: for (String language : Builder.with(new TreeSet<String>())
1182             .addAll(languageCodes).addAll(Iso639Data.getAvailable()).get()) {
1183             if (language.equals("no") || language.equals("sh"))
1184                 continue; // special cases
1185             Scope languageScope = getScope(language, lstregLanguageInfo);
1186             if (languageScope == Scope.Macrolanguage) {
1187                 if (Iso639Data.getHeirarchy(language) != null) {
1188                     continue main; // is real family
1189                 }
1190                 Set<String> replacements = replacementToReplaced
1191                     .getAll(language);
1192                 if (replacements == null || replacements.size() == 0) {
1193                     scopeToCodes.put(languageScope, language);
1194                 } else {
1195                     // it still might be bad, if we don't have a mapping to a
1196                     // regular language
1197                     for (String replacement : replacements) {
1198                         Scope replacementScope = getScope(replacement,
1199                             lstregLanguageInfo);
1200                         if (replacementScope == Scope.Individual) {
1201                             continue main;
1202                         }
1203                     }
1204                     scopeToCodes.put(languageScope, language);
1205                 }
1206             }
1207         }
1208         // now show the items we found
1209         for (Scope scope : scopeToCodes.keySet()) {
1210             for (String language : scopeToCodes.getAll(scope)) {
1211                 String name = testInfo.getEnglish().getName(language);
1212                 if (name == null || name.equals(language)) {
1213                     Set<String> set = Iso639Data.getNames(language);
1214                     if (set != null) {
1215                         name = set.iterator().next();
1216                     } else {
1217                         Map<String, String> languageInfo = lstregLanguageInfo
1218                             .get(language);
1219                         if (languageInfo != null) {
1220                             name = languageInfo.get("Description");
1221                         }
1222                     }
1223                 }
1224                 errln(scope + "\t" + language + "\t" + name + "\t"
1225                     + Iso639Data.getType(language));
1226             }
1227         }
1228     }
1229 
getScope(String language, Map<String, Map<String, String>> lstregLanguageInfo)1230     private Scope getScope(String language,
1231         Map<String, Map<String, String>> lstregLanguageInfo) {
1232         Scope languageScope = Iso639Data.getScope(language);
1233         Map<String, String> languageInfo = lstregLanguageInfo.get(language);
1234         if (languageInfo == null) {
1235             // System.out.println("Couldn't get lstreg info for " + language);
1236         } else {
1237             String lstregScope = languageInfo.get("Scope");
1238             if (lstregScope != null) {
1239                 Scope scope2 = Scope.fromString(lstregScope);
1240                 if (languageScope != scope2) {
1241                     // System.out.println("Mismatch in scope between LSTR and ISO 639:\t"
1242                     // + scope2 + "\t" +
1243                     // languageScope);
1244                     languageScope = scope2;
1245                 }
1246             }
1247         }
1248         return languageScope;
1249     }
1250 
1251     static final boolean LOCALES_FIXED = true;
1252 
TestPopulation()1253     public void TestPopulation() {
1254         Set<String> languages = SUPPLEMENTAL
1255             .getLanguagesForTerritoriesPopulationData();
1256         Relation<String, String> baseToLanguages = Relation.of(
1257             new TreeMap<String, Set<String>>(), TreeSet.class);
1258         LanguageTagParser ltp = new LanguageTagParser();
1259         LanguageTagCanonicalizer ltc = new LanguageTagCanonicalizer(false);
1260 
1261         for (String language : languages) {
1262             if (LOCALES_FIXED) {
1263                 String canonicalForm = ltc.transform(language);
1264                 if (!assertEquals("Canonical form", canonicalForm, language)) {
1265                     int debug = 0;
1266                 }
1267             }
1268 
1269             String base = ltp.set(language).getLanguage();
1270             String script = ltp.getScript();
1271             baseToLanguages.put(base, language);
1272 
1273             // add basic data, basically just for wo!
1274             // if there are primary scripts, they must include script (if not
1275             // empty)
1276             Set<String> primaryScripts = Collections.emptySet();
1277             Set<String> secondaryScripts = Collections.emptySet();
1278             Map<Type, BasicLanguageData> basicData = SUPPLEMENTAL
1279                 .getBasicLanguageDataMap(base);
1280             if (basicData != null) {
1281                 BasicLanguageData s = basicData
1282                     .get(BasicLanguageData.Type.primary);
1283                 if (s != null) {
1284                     primaryScripts = s.getScripts();
1285                 }
1286                 s = basicData.get(BasicLanguageData.Type.secondary);
1287                 if (s != null) {
1288                     secondaryScripts = s.getScripts();
1289                 }
1290             }
1291 
1292             // do some consistency tests; if there is a script, it must be in
1293             // primaryScripts or secondaryScripts
1294             if (!script.isEmpty() && !primaryScripts.contains(script) && !secondaryScripts.contains(script)) {
1295                 errln(base + ": Script found in territory data (" + script
1296                     + ") is not in primary scripts :\t" + primaryScripts
1297                     + " and not in secondary scripts :\t" + secondaryScripts);
1298             }
1299 
1300             // if there are multiple primary scripts, they will be in
1301             // baseToLanguages
1302             if (primaryScripts.size() > 1) {
1303                 for (String script2 : primaryScripts) {
1304                     baseToLanguages.put(base, base + "_" + script2);
1305                 }
1306             }
1307         }
1308 
1309         if (!LOCALES_FIXED) {
1310             // the invariants are that if we have a base, we must not have a script.
1311             // and if we don't have a base, we must have two items
1312             for (String base : baseToLanguages.keySet()) {
1313                 Set<String> languagesForBase = baseToLanguages.getAll(base);
1314                 if (languagesForBase.contains(base)) {
1315                     if (languagesForBase.size() > 1) {
1316                         errln("Cannot have base alone with other scripts:\t"
1317                             + languagesForBase);
1318                     }
1319                 } else {
1320                     if (languagesForBase.size() == 1) {
1321                         errln("Cannot have only one script for language:\t"
1322                             + languagesForBase);
1323                     }
1324                 }
1325             }
1326         }
1327     }
1328 
TestCompleteness()1329     public void TestCompleteness() {
1330         if (SUPPLEMENTAL.getSkippedElements().size() > 0) {
1331             logln("SupplementalDataInfo API doesn't support: "
1332                 + SUPPLEMENTAL.getSkippedElements().toString());
1333         }
1334     }
1335 
1336     // these are settings for exceptional cases we want to allow
1337     private static final Set<String> EXCEPTION_CURRENCIES_WITH_NEW = new TreeSet<>(
1338         Arrays.asList("ILS", "NZD", "PGK", "TWD"));
1339 
1340     // ok since there is no problem with confusion
1341     private static final Set<String> OK_TO_NOT_HAVE_OLD = new TreeSet<>(
1342         Arrays.asList("ADP", "ATS", "BEF", "CYP", "DEM", "ESP", "FIM",
1343             "FRF", "GRD", "IEP", "ITL", "LUF", "MTL", "MTP", "NLG",
1344             "PTE", "YUM", "ARA", "BAD", "BGL", "BOP", "BRC", "BRN",
1345             "BRR", "BUK", "CSK", "ECS", "GEK", "GNS", "GQE", "HRD",
1346             "ILP", "LTT", "LVR", "MGF", "MLF", "MZE", "NIC", "PEI",
1347             "PES", "SIT", "SRG", "SUR", "TJR", "TPE", "UAK", "YUD",
1348             "YUN", "ZRZ", "GWE"));
1349 
1350     private static final Date LIMIT_FOR_NEW_CURRENCY = new Date(
1351         new Date().getYear() - 5, 1, 1);
1352     private static final Date NOW = new Date();
1353 
1354     private Matcher oldMatcher = Pattern.compile(
1355         "\\bold\\b|\\([0-9]{4}-[0-9]{4}\\)", Pattern.CASE_INSENSITIVE)
1356         .matcher("");
1357     private Matcher newMatcher = Pattern.compile("\\bnew\\b",
1358         Pattern.CASE_INSENSITIVE).matcher("");
1359 
1360     /**
1361      * Test that access to currency info in supplemental data is ok. At this
1362      * point just a simple test.
1363      *
1364      * @param args
1365      */
TestCurrency()1366     public void TestCurrency() {
1367         IsoCurrencyParser isoCodes = IsoCurrencyParser.getInstance();
1368         Set<String> currencyCodes = STANDARD_CODES
1369             .getGoodAvailableCodes("currency");
1370         Relation<String, Pair<String, CurrencyDateInfo>> nonModernCurrencyCodes = Relation
1371             .of(new TreeMap<String, Set<Pair<String, CurrencyDateInfo>>>(),
1372                 TreeSet.class);
1373         Relation<String, Pair<String, CurrencyDateInfo>> modernCurrencyCodes = Relation
1374             .of(new TreeMap<String, Set<Pair<String, CurrencyDateInfo>>>(),
1375                 TreeSet.class);
1376         Set<String> territoriesWithoutModernCurrencies = new TreeSet<>(
1377             STANDARD_CODES.getGoodAvailableCodes("territory"));
1378         Map<String, Date> currencyFirstValid = new TreeMap<>();
1379         Map<String, Date> currencyLastValid = new TreeMap<>();
1380         territoriesWithoutModernCurrencies.remove("ZZ");
1381 
1382         for (String territory : STANDARD_CODES
1383             .getGoodAvailableCodes("territory")) {
1384             /* "EU" behaves like a country for purposes of this test */
1385             if ((SUPPLEMENTAL.getContained(territory) != null)
1386                 && !territory.equals("EU")) {
1387                 territoriesWithoutModernCurrencies.remove(territory);
1388                 continue;
1389             }
1390             Set<CurrencyDateInfo> currencyInfo = SUPPLEMENTAL
1391                 .getCurrencyDateInfo(territory);
1392             if (currencyInfo == null) {
1393                 continue; // error, but will pick up below.
1394             }
1395             for (CurrencyDateInfo dateInfo : currencyInfo) {
1396                 final String currency = dateInfo.getCurrency();
1397                 final Date start = dateInfo.getStart();
1398                 final Date end = dateInfo.getEnd();
1399                 if (dateInfo.getErrors().length() != 0) {
1400                     logln("parsing " + territory + "\t" + dateInfo.toString()
1401                     + "\t" + dateInfo.getErrors());
1402                 }
1403                 Date firstValue = currencyFirstValid.get(currency);
1404                 if (firstValue == null || firstValue.compareTo(start) < 0) {
1405                     currencyFirstValid.put(currency, start);
1406                 }
1407                 Date lastValue = currencyLastValid.get(currency);
1408                 if (lastValue == null || lastValue.compareTo(end) > 0) {
1409                     currencyLastValid.put(currency, end);
1410                 }
1411                 if (start.compareTo(NOW) < 0 && end.compareTo(NOW) >= 0) { // Non-tender
1412                     // is
1413                     // OK...
1414                     modernCurrencyCodes.put(currency,
1415                         new Pair<>(territory,
1416                             dateInfo));
1417                     territoriesWithoutModernCurrencies.remove(territory);
1418                 } else {
1419                     nonModernCurrencyCodes.put(currency,
1420                         new Pair<>(territory,
1421                             dateInfo));
1422                 }
1423                 logln(territory
1424                     + "\t"
1425                     + dateInfo.toString()
1426                     + "\t"
1427                     + testInfo.getEnglish().getName(CLDRFile.CURRENCY_NAME,
1428                         currency));
1429             }
1430         }
1431         // fix up
1432         nonModernCurrencyCodes.removeAll(modernCurrencyCodes.keySet());
1433         Relation<String, String> isoCurrenciesToCountries = Relation.of(
1434             new TreeMap<String, Set<String>>(), TreeSet.class)
1435             .addAllInverted(isoCodes.getCountryToCodes());
1436         // now print error messages
1437         logln("Modern Codes: " + modernCurrencyCodes.size() + "\t"
1438             + modernCurrencyCodes);
1439         Set<String> missing = new TreeSet<>(
1440             isoCurrenciesToCountries.keySet());
1441         missing.removeAll(modernCurrencyCodes.keySet());
1442         if (missing.size() != 0) {
1443             errln("Missing codes compared to ISO: " + missing.toString());
1444         }
1445 
1446         for (String currency : modernCurrencyCodes.keySet()) {
1447             Set<Pair<String, CurrencyDateInfo>> data = modernCurrencyCodes
1448                 .getAll(currency);
1449             final String name = testInfo.getEnglish().getName(
1450                 CLDRFile.CURRENCY_NAME, currency);
1451 
1452             Set<String> isoCountries = isoCurrenciesToCountries
1453                 .getAll(currency);
1454             if (isoCountries == null) {
1455                 isoCountries = new TreeSet<>();
1456             }
1457 
1458             TreeSet<String> cldrCountries = new TreeSet<>();
1459             for (Pair<String, CurrencyDateInfo> x : data) {
1460                 cldrCountries.add(x.getFirst());
1461             }
1462             if (!isoCountries.equals(cldrCountries)) {
1463                 if (!logKnownIssue("cldrbug:10765", "Missing codes compared to ISO: " + missing.toString())) {
1464 
1465                     errln("Mismatch between ISO and Cldr modern currencies for "
1466                         + currency + "\tISO:" + isoCountries + "\tCLDR:"
1467                         + cldrCountries);
1468                     showCountries("iso-cldr", isoCountries, cldrCountries, missing);
1469                     showCountries("cldr-iso", cldrCountries, isoCountries, missing);
1470                 }
1471             }
1472 
1473             if (oldMatcher.reset(name).find()) {
1474                 errln("Has 'old' in name but still used " + "\t" + currency
1475                     + "\t" + name + "\t" + data);
1476             }
1477             if (newMatcher.reset(name).find()
1478                 && !EXCEPTION_CURRENCIES_WITH_NEW.contains(currency)) {
1479                 // find the first use. If older than 5 years, flag as error
1480                 if (currencyFirstValid.get(currency).compareTo(
1481                     LIMIT_FOR_NEW_CURRENCY) < 0) {
1482                     errln("Has 'new' in name but used since "
1483                         + CurrencyDateInfo.formatDate(currencyFirstValid
1484                             .get(currency))
1485                         + "\t" + currency + "\t"
1486                         + name + "\t" + data);
1487                 } else {
1488                     logln("Has 'new' in name but used since "
1489                         + CurrencyDateInfo.formatDate(currencyFirstValid
1490                             .get(currency))
1491                         + "\t" + currency + "\t"
1492                         + name + "\t" + data);
1493                 }
1494             }
1495         }
1496         logln("Non-Modern Codes (with dates): " + nonModernCurrencyCodes.size()
1497         + "\t" + nonModernCurrencyCodes);
1498         for (String currency : nonModernCurrencyCodes.keySet()) {
1499             final String name = testInfo.getEnglish().getName(
1500                 CLDRFile.CURRENCY_NAME, currency);
1501             if (name == null) {
1502                 errln("No English name for currency " + currency);
1503                 continue;
1504             }
1505             if (newMatcher.reset(name).find()
1506                 && !EXCEPTION_CURRENCIES_WITH_NEW.contains(currency)) {
1507                 logln("Has 'new' in name but NOT used since "
1508                     + CurrencyDateInfo.formatDate(currencyLastValid
1509                         .get(currency))
1510                     + "\t" + currency + "\t" + name
1511                     + "\t" + nonModernCurrencyCodes.getAll(currency));
1512             } else if (!oldMatcher.reset(name).find()
1513                 && !OK_TO_NOT_HAVE_OLD.contains(currency)) {
1514                 logln("Doesn't have 'old' or date range in name but NOT used since "
1515                     + CurrencyDateInfo.formatDate(currencyLastValid
1516                         .get(currency))
1517                     + "\t"
1518                     + currency
1519                     + "\t"
1520                     + name
1521                     + "\t" + nonModernCurrencyCodes.getAll(currency));
1522                 for (Pair<String, CurrencyDateInfo> pair : nonModernCurrencyCodes
1523                     .getAll(currency)) {
1524                     final String territory = pair.getFirst();
1525                     Set<CurrencyDateInfo> currencyInfo = SUPPLEMENTAL
1526                         .getCurrencyDateInfo(territory);
1527                     for (CurrencyDateInfo dateInfo : currencyInfo) {
1528                         if (dateInfo.getEnd().compareTo(NOW) < 0) {
1529                             continue;
1530                         }
1531                         logln("\tCurrencies used instead: "
1532                             + territory
1533                             + "\t"
1534                             + dateInfo
1535                             + "\t"
1536                             + testInfo.getEnglish().getName(
1537                                 CLDRFile.CURRENCY_NAME,
1538                                 dateInfo.getCurrency()));
1539 
1540                     }
1541                 }
1542 
1543             }
1544         }
1545         Set<String> remainder = new TreeSet<>();
1546         remainder.addAll(currencyCodes);
1547         remainder.removeAll(nonModernCurrencyCodes.keySet());
1548         // TODO make this an error, except for allowed exceptions.
1549         logln("Currencies without Territories: " + remainder);
1550         if (territoriesWithoutModernCurrencies.size() != 0) {
1551             errln("Modern territory missing currency: "
1552                 + territoriesWithoutModernCurrencies);
1553         }
1554     }
1555 
showCountries(final String title, Set<String> isoCountries, Set<String> cldrCountries, Set<String> missing)1556     private void showCountries(final String title, Set<String> isoCountries,
1557         Set<String> cldrCountries, Set<String> missing) {
1558         missing.clear();
1559         missing.addAll(isoCountries);
1560         missing.removeAll(cldrCountries);
1561         for (String country : missing) {
1562             logln("\t\tExtra in " + title + "\t" + country + " - "
1563                 + getRegionName(country));
1564         }
1565     }
1566 
TestCurrencyDecimalPlaces()1567     public void TestCurrencyDecimalPlaces() {
1568         IsoCurrencyParser isoCodes = IsoCurrencyParser.getInstance();
1569         Relation<String, IsoCurrencyParser.Data> codeList = isoCodes
1570             .getCodeList();
1571         Set<String> currencyCodes = STANDARD_CODES
1572             .getGoodAvailableCodes("currency");
1573         for (String cc : currencyCodes) {
1574             Set<IsoCurrencyParser.Data> d = codeList.get(cc);
1575             if (d != null) {
1576                 for (IsoCurrencyParser.Data x : d) {
1577                     CurrencyNumberInfo cni = SUPPLEMENTAL.getCurrencyNumberInfo(cc);
1578                     if (cni.digits != x.getMinorUnit()) {
1579                         logln("Mismatch between ISO/CLDR for decimal places for currency => " + cc +
1580                             ". ISO = " + x.getMinorUnit() + " CLDR = " + cni.digits);
1581                     }
1582                 }
1583             }
1584         }
1585     }
1586 
1587     /**
1588      * Verify that we have a default script for every CLDR base language
1589      */
TestDefaultScripts()1590     public void TestDefaultScripts() {
1591         SupplementalDataInfo supp = SUPPLEMENTAL;
1592         Map<String, String> likelyData = supp.getLikelySubtags();
1593         Map<String, String> baseToDefaultContentScript = new HashMap<>();
1594         for (CLDRLocale locale : supp.getDefaultContentCLDRLocales()) {
1595             String script = locale.getScript();
1596             if (!script.isEmpty() && locale.getCountry().isEmpty()) {
1597                 baseToDefaultContentScript.put(locale.getLanguage(), script);
1598             }
1599         }
1600         for (String locale : testInfo.getCldrFactory().getAvailableLanguages()) {
1601             if ("root".equals(locale)) {
1602                 continue;
1603             }
1604             CLDRLocale loc = CLDRLocale.getInstance(locale);
1605             String baseLanguage = loc.getLanguage();
1606             String defaultScript = supp.getDefaultScript(baseLanguage);
1607 
1608             String defaultContentScript = baseToDefaultContentScript
1609                 .get(baseLanguage);
1610             if (defaultContentScript != null) {
1611                 assertEquals(loc + " defaultContentScript = default",
1612                     defaultScript, defaultContentScript);
1613             }
1614             String likely = likelyData.get(baseLanguage);
1615             String likelyScript = likely == null ? null : CLDRLocale
1616                 .getInstance(likely).getScript();
1617             Map<Type, BasicLanguageData> scriptInfo = supp
1618                 .getBasicLanguageDataMap(baseLanguage);
1619             if (scriptInfo == null) {
1620                 errln(loc + ": has no BasicLanguageData");
1621             } else {
1622                 BasicLanguageData data = scriptInfo.get(Type.primary);
1623                 if (data == null) {
1624                     data = scriptInfo.get(Type.secondary);
1625                 }
1626                 if (data == null) {
1627                     errln(loc + ": has no scripts in BasicLanguageData");
1628                 } else if (!data.getScripts().contains(defaultScript)) {
1629                     errln(loc + ": " + defaultScript
1630                         + " not in BasicLanguageData - check <languages> in supplementalData.xml and language_script_raw.txt  " + data.getScripts());
1631                 }
1632             }
1633 
1634             assertEquals(loc + " likely = default", defaultScript, likelyScript);
1635 
1636             assertNotNull(loc + ": needs default script", defaultScript);
1637 
1638             if (!loc.getScript().isEmpty()) {
1639                 if (!loc.getScript().equals(defaultScript)) {
1640                     assertNotEquals(locale
1641                         + ": only include script if not default",
1642                         loc.getScript(), defaultScript);
1643                 }
1644             }
1645 
1646         }
1647     }
1648 
1649     enum CoverageIssue {
1650         log, warn, error
1651     }
1652 
TestPluralCompleteness()1653     public void TestPluralCompleteness() {
1654         // Set<String> cardinalLocales = new
1655         // TreeSet<String>(SUPPLEMENTAL.getPluralLocales(PluralType.cardinal));
1656         // Set<String> ordinalLocales = new
1657         // TreeSet<String>(SUPPLEMENTAL.getPluralLocales(PluralType.ordinal));
1658         // Map<ULocale, PluralRulesFactory.SamplePatterns> sampleCardinals =
1659         // PluralRulesFactory.getLocaleToSamplePatterns();
1660         // Set<ULocale> sampleCardinalLocales = PluralRulesFactory.getLocales();
1661         // // new HashSet(PluralRulesFactory.getSampleCounts(uLocale,
1662         // type).keySet());
1663         // Map<ULocale, PluralRules> overrideCardinals =
1664         // PluralRulesFactory.getPluralOverrides();
1665         // Set<ULocale> overrideCardinalLocales = new
1666         // HashSet<ULocale>(overrideCardinals.keySet());
1667 
1668         Set<String> testLocales = STANDARD_CODES.getLocaleCoverageLocales(
1669             Organization.google, EnumSet.of(Level.MODERN));
1670         Set<String> allLocales = testInfo.getCldrFactory().getAvailable();
1671         LanguageTagParser ltp = new LanguageTagParser();
1672         for (String locale : allLocales) {
1673             // the only known case where plural rules depend on region or script
1674             // is pt_PT
1675             if (locale.equals("root")) {
1676                 continue;
1677             }
1678             ltp.set(locale);
1679             if (!ltp.getRegion().isEmpty() || !ltp.getScript().isEmpty()) {
1680                 continue;
1681             }
1682             CoverageIssue needsCoverage = testLocales.contains(locale)
1683                 ? CoverageIssue.error
1684                     : CoverageIssue.log;
1685             CoverageIssue needsCoverage2 = needsCoverage == CoverageIssue.error ? CoverageIssue.warn : needsCoverage;
1686 
1687             //            if (logKnownIssue("Cldrbug:8809", "Missing plural rules/samples be and ga locales")) {
1688             //                if (locale.equals("be") || locale.equals("ga")) {
1689             //                    needsCoverage = CoverageIssue.warn;
1690             //                }
1691             //            }
1692             PluralRulesFactory prf = PluralRulesFactory
1693                 .getInstance(CLDRConfig.getInstance()
1694                     .getSupplementalDataInfo());
1695 
1696             for (PluralType type : PluralType.values()) {
1697                 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(type, locale,
1698                     false);
1699                 if (pluralInfo == null) {
1700                     errOrLog(needsCoverage, locale + "\t" + type + " \tmissing plural rules", "Cldrbug:7839", "Missing plural data for modern locales");
1701                     continue;
1702                 }
1703                 Set<Count> counts = pluralInfo.getCounts();
1704                 // if (counts.size() == 1) {
1705                 // continue; // skip checking samples
1706                 // }
1707                 HashSet<String> samples = new HashSet<>();
1708                 EnumSet<Count> countsWithNoSamples = EnumSet
1709                     .noneOf(Count.class);
1710                 Relation<String, Count> samplesToCounts = Relation.of(
1711                     new HashMap(), LinkedHashSet.class);
1712                 Set<Count> countsFound = prf.getSampleCounts(locale,
1713                     type.standardType);
1714                 StringBuilder failureCases = new StringBuilder();
1715                 for (Count count : counts) {
1716                     String pattern = PluralRulesFactory.getSamplePattern(locale, type.standardType, count);
1717                     final String rangeLine = getRangeLine(count, pluralInfo.getPluralRules(), pattern);
1718                     failureCases.append('\n').append(locale).append('\t').append(type).append('\t').append(rangeLine);
1719                     if (countsFound == null || !countsFound.contains(count)) {
1720                         countsWithNoSamples.add(count);
1721                     } else {
1722                         samplesToCounts.put(pattern, count);
1723                         logln(locale + "\t" + type + "\t" + count + "\t"
1724                             + pattern);
1725                     }
1726                 }
1727                 if (!countsWithNoSamples.isEmpty()) {
1728                     errOrLog(needsCoverage, locale + "\t" + type + "\t missing samples:\t" + countsWithNoSamples,
1729                         "cldrbug:7075", "Missing ordinal minimal pairs");
1730                     errOrLog(needsCoverage2, failureCases.toString());
1731                 }
1732                 for (Entry<String, Set<Count>> entry : samplesToCounts
1733                     .keyValuesSet()) {
1734                     if (entry.getValue().size() != 1) {
1735                         errOrLog(needsCoverage, locale + "\t" + type + "\t duplicate samples: " + entry.getValue()
1736                         + " => «" + entry.getKey() + "»", "cldrbug:7119", "Some duplicate minimal pairs");
1737                         errOrLog(needsCoverage2, failureCases.toString());
1738                     }
1739                 }
1740             }
1741         }
1742     }
1743 
errOrLog(CoverageIssue causeError, String message, String logTicket, String logComment)1744     public void errOrLog(CoverageIssue causeError, String message, String logTicket, String logComment) {
1745         switch (causeError) {
1746         case error:
1747             if (logTicket == null) {
1748                 errln(message);
1749                 break;
1750             }
1751             logKnownIssue(logTicket, logComment);
1752             // fall through
1753         case warn:
1754             warnln(message);
1755             break;
1756         case log:
1757             logln(message);
1758             break;
1759         }
1760     }
1761 
errOrLog(CoverageIssue causeError, String message)1762     public void errOrLog(CoverageIssue causeError, String message) {
1763         errOrLog(causeError, message, null, null);
1764     }
1765 
TestNumberingSystemDigits()1766     public void TestNumberingSystemDigits() {
1767 
1768         // Don't worry about digits from supplemental planes yet ( ICU can't
1769         // handle them anyways )
1770         // hanidec is the only known non codepoint order numbering system
1771         // TODO: Fix so that it works properly on non-BMP digit strings.
1772         String[] knownExceptions = { "brah", "cakm", "hanidec", "osma", "shrd",
1773             "sora", "takr" };
1774         List<String> knownExceptionList = Arrays.asList(knownExceptions);
1775         for (String ns : SUPPLEMENTAL.getNumericNumberingSystems()) {
1776             if (knownExceptionList.contains(ns)) {
1777                 continue;
1778             }
1779             String digits = SUPPLEMENTAL.getDigits(ns);
1780             int previousChar = 0;
1781             int ch;
1782 
1783             for (int i = 0; i < digits.length(); i += Character.charCount(ch)) {
1784                 ch = digits.codePointAt(i);
1785                 if (i > 0 && ch != previousChar + 1) {
1786                     errln("Digits for numbering system "
1787                         + ns
1788                         + " are not in code point order. Previous char = U+"
1789                         + Utility.hex(previousChar, 4)
1790                         + " Current char = U+" + Utility.hex(ch, 4));
1791                     break;
1792                 }
1793                 previousChar = ch;
1794             }
1795         }
1796     }
1797 
TestNumberingSystemDigitCompleteness()1798     public void TestNumberingSystemDigitCompleteness() {
1799         List<Integer> unicodeDigits = new ArrayList<>();
1800         for (int cp = UCharacter.MIN_CODE_POINT; cp <= UCharacter.MAX_CODE_POINT; cp++) {
1801             if (UCharacter.getType(cp) == UCharacterEnums.ECharacterCategory.DECIMAL_DIGIT_NUMBER) {
1802                 unicodeDigits.add(Integer.valueOf(cp));
1803             }
1804         }
1805 
1806         for (String ns : SUPPLEMENTAL.getNumericNumberingSystems()) {
1807             String digits = SUPPLEMENTAL.getDigits(ns);
1808             int ch;
1809 
1810             for (int i = 0; i < digits.length(); i += Character.charCount(ch)) {
1811                 ch = digits.codePointAt(i);
1812                 unicodeDigits.remove(Integer.valueOf(ch));
1813             }
1814         }
1815 
1816         if (unicodeDigits.size() > 0) {
1817             for (Integer i : unicodeDigits) {
1818                 errln("Unicode digit: " + UCharacter.getName(i) + " is not in any numbering system. Script = "
1819                     + UScript.getShortName(UScript.getScript(i)));
1820             }
1821         }
1822     }
1823 
TestMetazones()1824     public void TestMetazones() {
1825         Date goalMin = new Date(70, 0, 1);
1826         Date goalMax = new Date(300, 0, 2);
1827         ImmutableSet<String> knownTZWithoutMetazone = ImmutableSet.of("America/Montreal", "Asia/Barnaul", "Asia/Tomsk", "Europe/Kirov");
1828         for (String timezoneRaw : TimeZone.getAvailableIDs()) {
1829             String timezone = TimeZone.getCanonicalID(timezoneRaw);
1830             String region = TimeZone.getRegion(timezone);
1831             if (!timezone.equals(timezoneRaw) || "001".equals(region)) {
1832                 continue;
1833             }
1834             if (knownTZWithoutMetazone.contains(timezone)) {
1835                 continue;
1836             }
1837             final Set<MetaZoneRange> ranges = SUPPLEMENTAL
1838                 .getMetaZoneRanges(timezone);
1839 
1840             if (assertNotNull("metazones for " + timezone, ranges)) {
1841                 long min = Long.MAX_VALUE;
1842                 long max = Long.MIN_VALUE;
1843                 for (MetaZoneRange range : ranges) {
1844                     if (range.dateRange.from != DateRange.START_OF_TIME) {
1845                         min = Math.min(min, range.dateRange.from);
1846                     }
1847                     if (range.dateRange.to != DateRange.END_OF_TIME) {
1848                         max = Math.max(max, range.dateRange.to);
1849                     }
1850                 }
1851                 assertRelation(timezone + " has metazone before 1970?", true,
1852                     goalMin, LEQ, new Date(min));
1853                 assertRelation(timezone
1854                     + " has metazone until way in the future?", true,
1855                     goalMax, GEQ, new Date(max));
1856             }
1857         }
1858         com.google.common.collect.Interners i;
1859     }
1860 
Test9924()1861     public void Test9924() {
1862         Boolean b = org.unicode.cldr.unittest.TestSupplementalInfo.LOCALES_FIXED;
1863         PopulationData zhCNData = SUPPLEMENTAL.getLanguageAndTerritoryPopulationData(b ? "zh" : "zh_Hans", "CN");
1864         PopulationData yueCNData = SUPPLEMENTAL.getLanguageAndTerritoryPopulationData("yue_Hans", "CN");
1865         assertTrue("yue*10 < zh", yueCNData.getPopulation() < zhCNData.getPopulation());
1866     }
1867 
Test10765()1868     public void Test10765() { //
1869         Set<String> surveyToolLanguages = SUPPLEMENTAL.getCLDRLanguageCodes(); // codes that show up in Survey Tool
1870         Set<String> mainLanguages = new TreeSet<>();
1871         LanguageTagParser ltp = new LanguageTagParser();
1872         for (String locale : testInfo.getCldrFactory().getAvailableLanguages()) {
1873             mainLanguages.add(ltp.set(locale).getLanguage());
1874         }
1875         // add special codes we want to see anyway
1876         mainLanguages.add("und");
1877         mainLanguages.add("mul");
1878         mainLanguages.add("zxx");
1879 
1880         if (!mainLanguages.containsAll(surveyToolLanguages)) {
1881             CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(SUPPLEMENTAL, "ja"); // pick "neutral" locale
1882             Set<String> temp = new TreeSet<>(surveyToolLanguages);
1883             temp.removeAll(mainLanguages);
1884             Set<String> modern = new TreeSet<>();
1885             Set<String> comprehensive = new TreeSet<>();
1886             for (String lang : temp) {
1887                 Level level = coverageLevel.getLevel(CLDRFile.getKey(CLDRFile.LANGUAGE_NAME, lang));
1888                 if (level.compareTo(Level.MODERN) <= 0) {
1889                     modern.add(lang);
1890                 } else {
1891                     comprehensive.add(lang);
1892                 }
1893             }
1894             warnln("«Modern» Languages in <variable id='$language' type='choice'> that aren't in main/* : " + getNames(modern));
1895             logln("«Comprehensive» Languages in <variable id='$language' type='choice'> that aren't in main/* : " + getNames(comprehensive));
1896         }
1897         if (!surveyToolLanguages.containsAll(mainLanguages)) {
1898             mainLanguages.removeAll(surveyToolLanguages);
1899             // TODO: See https://unicode-org.atlassian.net/browse/CLDR-14974
1900             // Currently there is a requirement that all locales in main/* are in attributeValueValidity.xml
1901             assertEquals("main/* languages missing from <variable id='$language'/> in attributeValueValidity.xml",
1902                 Collections.EMPTY_SET, mainLanguages);
1903         }
1904     }
1905 
getNames(Set<String> temp)1906     private Set<String> getNames(Set<String> temp) {
1907         Set<String> tempNames = new TreeSet<>();
1908         for (String langCode : temp) {
1909             tempNames.add(testInfo.getEnglish().getName(CLDRFile.LANGUAGE_NAME, langCode) + " (" + langCode + ")");
1910         }
1911         return tempNames;
1912     }
1913 
TestGrammarInfo()1914     public void TestGrammarInfo() {
1915         final Logger logger = getLogger();
1916         Multimap<String,String> allValues = TreeMultimap.create();
1917         for (String locale : SUPPLEMENTAL.hasGrammarInfo()) {
1918             if (locale.contentEquals("tr")) {
1919                 int debug = 0;
1920             }
1921             GrammarInfo grammarInfo = SUPPLEMENTAL.getGrammarInfo(locale);
1922             for (GrammaticalTarget target : GrammaticalTarget.values()) {
1923                 for (GrammaticalFeature feature : GrammaticalFeature.values()) {
1924                     Collection<String> general = grammarInfo.get(target, feature, GrammaticalScope.general);
1925                     for (GrammaticalScope scope : GrammaticalScope.values()) {
1926                         Collection<String> units = grammarInfo.get(target, feature, scope);
1927                         allValues.putAll(target + "/" + feature + "/" + scope, units);
1928                         if (scope != GrammaticalScope.general) {
1929                             assertTrue(general + " > " + scope + " " + units, general.containsAll(units));
1930                         }
1931                     }
1932                 }
1933             }
1934             logger.fine(grammarInfo.toString("\n" + locale + "\t"));
1935         }
1936         if (logger.isLoggable(java.util.logging.Level.FINE)) {  // if level is at least FINE
1937             logger.fine("");
1938             for (Entry<String, Collection<String>> entry : allValues.asMap().entrySet()) {
1939                 logger.fine(entry.getKey() + "\t" + Joiner.on(", ").join(entry.getValue()));
1940             }
1941         }
1942     }
1943 }
1944