• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ******************************************************************************
3  * Copyright (C) 2004, International Business Machines Corporation and        *
4  * others. All Rights Reserved.                                               *
5  ******************************************************************************
6  */
7 package org.unicode.cldr.test;
8 
9 import static org.unicode.cldr.util.PathUtilities.getNormalizedPath;
10 
11 import java.io.File;
12 import java.io.IOException;
13 import java.io.PrintWriter;
14 import java.util.Arrays;
15 import java.util.Calendar;
16 import java.util.Collection;
17 import java.util.Date;
18 import java.util.HashMap;
19 import java.util.HashSet;
20 import java.util.Iterator;
21 import java.util.LinkedHashSet;
22 import java.util.List;
23 import java.util.Map;
24 import java.util.Set;
25 import java.util.TreeMap;
26 import java.util.TreeSet;
27 
28 import org.unicode.cldr.draft.FileUtilities;
29 import org.unicode.cldr.test.DisplayAndInputProcessor.NumericType;
30 import org.unicode.cldr.util.CLDRFile;
31 import org.unicode.cldr.util.CLDRPaths;
32 import org.unicode.cldr.util.CldrUtility;
33 import org.unicode.cldr.util.Factory;
34 import org.unicode.cldr.util.LanguageTagParser;
35 import org.unicode.cldr.util.PathUtilities;
36 import org.unicode.cldr.util.SimpleFactory;
37 import org.unicode.cldr.util.StandardCodes;
38 import org.unicode.cldr.util.TimezoneFormatter;
39 import org.unicode.cldr.util.XPathParts;
40 import org.xml.sax.SAXException;
41 
42 import com.ibm.icu.dev.test.TestFmwk;
43 import com.ibm.icu.text.BreakIterator;
44 import com.ibm.icu.text.DecimalFormat;
45 import com.ibm.icu.text.NumberFormat;
46 import com.ibm.icu.text.UTF16;
47 import com.ibm.icu.text.UnicodeSet;
48 import com.ibm.icu.util.ULocale;
49 
50 /**
51  * Initial version of CLDR tests. Each test is named TextXXX. To run all the tests, use the options
52  * <blockquote>-nothrow</blockquote>
53  * To run a particular set of tests, include their names, like
54  * <blockquote>-nothrow TestForIllegalAttributeValues TestMinimalLocalization</blockquote>
55  * To show more information (logln), add -verbose
56  * <p>
57  * There are some environment variables that can be used with the test. <br>
58  * -DSHOW_FILES=<anything> shows all create/open of files. <br>
59  * -DXML_MATCH=<regular expression> skips all locales that don't match the regular expression <br>
60  * -DXML_MAIN_DIR=<filesystem directory> resets to a different main directory (eg not cldr/common/main. For example,
61  * some of the tools generate into a locale directory like -DXML_MAIN_DIR=C:\Unicode-CVS2\cldr\common\gen\main\ so this
62  * can be used to check that directory. <br>
63  * -DSKIP_DRAFT=<boolean> skips draft locales if <boolean> is a string starting with T or t
64  */
65 public class CLDRTest extends TestFmwk {
66     /**
67      * privates
68      */
69     private static String MATCH;
70     private static String MAIN_DIR;
71     private static boolean SKIP_DRAFT;
72     private Set<String> locales;
73     private Set<String> languageLocales;
74     private Factory cldrFactory;
75     private CLDRFile resolvedRoot;
76     private CLDRFile resolvedEnglish;
77     private final UnicodeSet commonAndInherited = new UnicodeSet(
78         "[[:script=common:][:script=inherited:][:alphabetic=false:]]");
79     private static final String[] WIDTHS = { "narrow", "wide", "abbreviated", "short" };
80     private static final String[] MONTHORDAYS = { "day", "month" };
81     private Map<String, String> localeNameCache = new HashMap<>();
82     private CLDRFile english = null;
83 
84     private Set<String> surveyInfo = new TreeSet<>();
85 
86     /**
87      * TestFmwk boilerplate
88      */
main(String[] args)89     public static void main(String[] args) throws Exception {
90         MATCH = System.getProperty("XML_MATCH");
91         if (MATCH == null)
92             MATCH = ".*";
93         else
94             System.out.println("Resetting MATCH:" + MATCH);
95         MAIN_DIR = System.getProperty("XML_MAIN_DIR");
96         if (MAIN_DIR == null)
97             MAIN_DIR = CLDRPaths.MAIN_DIRECTORY;
98         else
99             System.out.println("Resetting MAIN_DIR:" + MAIN_DIR);
100         SKIP_DRAFT = System.getProperty("XML_SKIP_DRAFT") != null;
101         if (SKIP_DRAFT) System.out.println("Skipping Draft locales");
102 
103         double deltaTime = System.currentTimeMillis();
104         new CLDRTest().run(args);
105         deltaTime = System.currentTimeMillis() - deltaTime;
106         System.out.println("Seconds: " + deltaTime / 1000);
107 
108     }
109 
TestZZZZHack()110     public void TestZZZZHack() throws IOException {
111         // hack to get file written at the end of run.
112         PrintWriter surveyFile = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "surveyInfo.txt");
113         for (String s : surveyInfo) {
114             surveyFile.println(s);
115         }
116         surveyFile.close();
117     }
118 
119     /**
120      * TestFmwk boilerplate
121      */
CLDRTest()122     public CLDRTest() throws SAXException, IOException {
123         // TODO parameterize the directory and filter
124         cldrFactory = Factory.make(MAIN_DIR, MATCH);
125         // CLDRKey.main(new String[]{"-mde.*"});
126         locales = cldrFactory.getAvailable();
127         languageLocales = cldrFactory.getAvailableLanguages();
128         resolvedRoot = cldrFactory.make("root", true);
129         /*
130          * PrintWriter out = FileUtilities.openUTF8Writer(Utility.GEN_DIRECTORY + "resolved/", "root.xml");
131          * CLDRFile temp = (CLDRFile) resolvedRoot.clone();
132          * temp.write(out);
133          * out.close();
134          */
135         resolvedEnglish = cldrFactory.make("en", true);
136     }
137 
138     /**
139      * Check to make sure that the currency formats are kosher.
140      */
TestCurrencyFormats()141     public void TestCurrencyFormats() {
142         // String decimal = "//ldml/numbers/decimalFormats/decimalFormatLength/decimalFormat[@type=\"standard\"]/";
143         // String currency = "//ldml/numbers/currencyFormats/currencyFormatLength/currencyFormat[@type=\"standard\"]/";
144         for (String locale : locales) {
145             boolean isPOSIX = locale.indexOf("POSIX") >= 0;
146             logln("Testing: " + locale);
147             CLDRFile item = cldrFactory.make(locale, false);
148             for (String xpath : item) {
149                 NumericType type = NumericType.getNumericType(xpath);
150                 if (type == NumericType.NOT_NUMERIC) continue;
151                 String value = item.getStringValue(xpath);
152                 // at this point, we only have currency formats
153                 String pattern = DisplayAndInputProcessor.getCanonicalPattern(value, type, isPOSIX);
154                 if (!pattern.equals(value)) {
155                     String draft = "";
156                     if (item.getFullXPath(xpath).indexOf("[@draft=\"unconfirmed\"]") >= 0) draft = " [draft]";
157                     assertEquals(getLocaleAndName(locale) + draft + " " + type + " pattern incorrect", pattern, value);
158                 }
159             }
160         }
161     }
162 
163     /**
164      * Internal class
165      */
166     private static class ValueCount {
167         int count = 1;
168         String value;
169         String fullxpath;
170     }
171 
172     /**
173      * Verify that if all the children of a language locale do not have the same value for the same key.
174      */
TestCommonChildren()175     public void TestCommonChildren() {
176         if (disableUntilLater("TestCommonChildren")) return;
177 
178         Map<String, ValueCount> currentValues = new TreeMap<>();
179         Set<String> okValues = new TreeSet<>();
180 
181         for (String parent : languageLocales) {
182             logln("Testing: " + parent);
183             currentValues.clear();
184             okValues.clear();
185             Set<String> availableWithParent = cldrFactory.getAvailableWithParent(parent, true);
186             for (String locale : availableWithParent) {
187                 logln("\tTesting: " + locale);
188                 CLDRFile item = cldrFactory.make(locale, false);
189                 // Walk through all the xpaths, adding to currentValues
190                 // Whenever two values for the same xpath are different, we remove from currentValues, and add to
191                 // okValues
192                 for (String xpath : item) {
193                     if (okValues.contains(xpath)) continue;
194                     if (xpath.startsWith("//ldml/identity/")) continue; // skip identity elements
195                     String v = item.getStringValue(xpath);
196                     ValueCount last = currentValues.get(xpath);
197                     if (last == null) {
198                         ValueCount vc = new ValueCount();
199                         vc.value = v;
200                         vc.fullxpath = item.getFullXPath(xpath);
201                         currentValues.put(xpath, vc);
202                     } else if (v.equals(last.value)) {
203                         last.count++;
204                     } else {
205                         okValues.add(xpath);
206                         currentValues.remove(xpath);
207                     }
208                 }
209                 // at the end, only the keys left in currentValues are (possibly) faulty
210                 // they are actually bad IFF either
211                 // (a) the count is equal to the total (thus all children are the same), or
212                 // (b) their value is the same as the parent's resolved value (thus all children are the same or the
213                 // same
214                 // as the inherited parent value).
215             }
216             if (currentValues.size() == 0) continue;
217             int size = availableWithParent.size();
218             CLDRFile parentCLDR = cldrFactory.make(parent, true);
219             for (String xpath : currentValues.keySet()) {
220                 ValueCount vc = currentValues.get(xpath);
221                 if (vc.count == size || (vc.value.equals(parentCLDR.getStringValue(xpath))
222                     && vc.fullxpath.equals(parentCLDR.getStringValue(xpath)))) {
223                     String draft = "";
224                     if (vc.fullxpath.indexOf("[@draft=\"unconfirmed\"]") >= 0) draft = " [draft]";
225                     String count = (vc.count == size ? "" : vc.count + "/") + size;
226                     warnln(getLocaleAndName(parent) + draft +
227                         "\tall children (" + count + ") have same value for:\t"
228                         + xpath + ";\t" + vc.value);
229                 }
230             }
231         }
232     }
233 
234     static String[] EXEMPLAR_SKIPS = { "/hourFormat", "/exemplarCharacters", "/pattern", "/localizedPatternChars" };
235 
236     /**
237      * Check that the exemplars include all characters in the data.
238      */
TestThatExemplarsContainAll()239     public void TestThatExemplarsContainAll() {
240         UnicodeSet allExemplars = new UnicodeSet();
241         if (disableUntilLater("TestThatExemplarsContainAll")) return;
242         Set<String> counts = new TreeSet<>();
243         int totalCount = 0;
244         UnicodeSet localeMissing = new UnicodeSet();
245         for (String locale : locales) {
246             if (locale.equals("root")) continue;
247             CLDRFile resolved = cldrFactory.make(locale, false); // FIX LATER
248             UnicodeSet exemplars = getFixedExemplarSet(locale, resolved);
249             CLDRFile plain = cldrFactory.make(locale, false);
250             int count = 0;
251             localeMissing.clear();
252             file: for (String xpath : plain) {
253                 for (int i = 0; i < EXEMPLAR_SKIPS.length; ++i) {
254                     if (xpath.indexOf(EXEMPLAR_SKIPS[i]) > 0) continue file; // skip some items.
255                 }
256                 if (SKIP_DRAFT) {
257                     String fullxpath = plain.getFullXPath(xpath);
258                     if (fullxpath.indexOf("[@draft=\"unconfirmed\"") > 0) continue;
259                 }
260                 if (xpath.startsWith("//ldml/posix/messages")) continue;
261                 String value = plain.getStringValue(xpath);
262                 allExemplars.addAll(value);
263                 if (!exemplars.containsAll(value)) {
264                     count++;
265                     UnicodeSet missing = new UnicodeSet().addAll(value).removeAll(exemplars);
266                     localeMissing.addAll(missing);
267                     logln(getLocaleAndName(locale) + "\t" + xpath + "\t<" + value + "> contains " + missing
268                         + ", not in exemplars");
269                     surveyInfo.add(locale + "\t" + xpath + "\t'" + value + "' contains characters "
270                         + missing.toPattern(false) + ", which are not in exemplars");
271                 }
272             }
273             NumberFormat nf = new DecimalFormat("000");
274             if (count != 0) {
275                 totalCount += count;
276                 counts.add(nf.format(count) + "\t" + getLocaleAndName(locale) + "\t" + localeMissing);
277             }
278             if (localeMissing.size() != 0) {
279                 errln(getLocaleAndName(locale) + "\t uses " + localeMissing + ", not in exemplars");
280             }
281         }
282         for (String c : counts) {
283             logln(c);
284         }
285         logln("Total Count: " + totalCount);
286         System.out.println("All exemplars: " + allExemplars.toPattern(true));
287     }
288 
289     // Get Date-Time in milliseconds
getDateTimeinMillis(int year, int month, int date)290     private static long getDateTimeinMillis(int year, int month, int date) {
291         Calendar cal = Calendar.getInstance();
292         cal.set(year, month, date);
293         return cal.getTimeInMillis();
294     }
295 
296     static final long disableDate = getDateTimeinMillis(2005, 6 - 1, 3);
297 
298     /**
299      *
300      */
disableUntilLater(String string)301     private boolean disableUntilLater(String string) {
302         if (new Date().getTime() >= disableDate) return false;
303         warnln("Disabling " + string + " until " + new Date(disableDate));
304         return true;
305     }
306 
307     /**
308      * Internal
309      */
getFixedExemplarSet(String locale, CLDRFile cldrfile)310     private UnicodeSet getFixedExemplarSet(String locale, CLDRFile cldrfile) {
311         UnicodeSet exemplars = getExemplarSet(cldrfile, "");
312         if (exemplars.size() == 0) {
313             errln(getLocaleAndName(locale) + " has empty exemplar set");
314         }
315         exemplars.addAll(getExemplarSet(cldrfile, "standard"));
316         UnicodeSet auxiliary = getExemplarSet(cldrfile, "auxiliary");
317         if (exemplars.containsSome(auxiliary)) {
318             errln(getLocaleAndName(locale) + "Auxiliary & main exemplars should be disjoint, but overlap with " +
319                 new UnicodeSet(exemplars).retainAll(auxiliary) +
320                 ": change auxiliary to " + auxiliary.removeAll(exemplars));
321         }
322         exemplars.addAll(auxiliary);
323         exemplars.addAll(commonAndInherited);
324         return exemplars;
325     }
326 
327     /**
328      * @return Gets an exemplar set. Also verifies that the set contains no properties.
329      */
getExemplarSet(CLDRFile cldrfile, String type)330     public UnicodeSet getExemplarSet(CLDRFile cldrfile, String type) {
331         if (type.length() != 0) type = "[@type=\"" + type + "\"]";
332         String v = cldrfile.getStringValue("//ldml/characters/exemplarCharacters" + type);
333         if (v == null) return new UnicodeSet();
334         String pattern = v;
335         if (pattern.indexOf("[:") >= 0 || pattern.indexOf("\\p{") > 0) {
336             errln(getLocaleName(cldrfile.getLocaleID()) + " exemplar pattern contains property: " + pattern);
337         }
338         try {
339             UnicodeSet result = new UnicodeSet(v, UnicodeSet.CASE);
340             result.remove(0x20);
341             return result;
342         } catch (RuntimeException e) {
343             e.printStackTrace();
344             errln(getLocaleAndName(cldrfile.getLocaleID()) + " has illegal exemplar set: <" + v + ">");
345             return new UnicodeSet();
346         }
347         // if (type.length() != 0) System.out.println("fetched set for " + type);
348     }
349 
getLocaleAndName(String locale)350     public String getLocaleAndName(String locale) {
351         return locale + " (" + getLocaleName(locale) + ")";
352     }
353 
354     /**
355      * @return the ID plus its localization (for language, script, and territory IDs only)
356      */
getIDAndLocalization(String id)357     public String getIDAndLocalization(String id) {
358         return id + " " + getLocalization(id);
359     }
360 
361     /**
362      * @return the localization (for language, script, and territory IDs only)
363      */
getLocalization(String id)364     public String getLocalization(String id) {
365         if (english == null) english = cldrFactory.make("en", true);
366         if (id.length() == 0) return "?";
367         // pick on basis of case
368         char ch = id.charAt(0);
369         if ('a' <= ch && ch <= 'z') return getName(english, "languages/language", id);
370         if (id.length() == 4 && 'A' <= ch && ch <= 'Z') return getName(english, "scripts/script", id);
371         return getName(english, "territories/territory", id);
372     }
373 
374     /**
375      * Internal
376      */
getIDAndLocalization(Set<String> missing)377     private String getIDAndLocalization(Set<String> missing) {
378         StringBuffer buffer = new StringBuffer();
379         for (String next : missing) {
380             if (buffer.length() != 0) buffer.append("; ");
381             buffer.append(getIDAndLocalization(next));
382         }
383         return buffer.toString();
384     }
385 
getLocaleName(String locale)386     public String getLocaleName(String locale) {
387         String name = localeNameCache.get(locale);
388         if (name != null) return name;
389         if (english == null) english = cldrFactory.make("en", true);
390         String result = english.getName(locale);
391         /*
392          * Collection c = Utility.splitList(locale, '_', false, null);
393          * String[] pieces = new String[c.size()];
394          * c.toArray(pieces);
395          * int i = 0;
396          * String result = getName(english, "languages/language", pieces[i++]);
397          * if (pieces[i].length() == 0) return result;
398          * if (pieces[i].length() == 4) {
399          * result += " " + getName(english, "scripts/script", pieces[i++]);
400          * }
401          * if (pieces[i].length() == 0) return result;
402          * result += " " + getName(english, "territories/territory", pieces[i++]);
403          * if (pieces[i].length() == 0) return result;
404          * result += " " + getName(english, "variant/variants", pieces[i++]);
405          */
406         localeNameCache.put(locale, result);
407         return result;
408     }
409 
410     /**
411      * Internal
412      */
getName(CLDRFile english, String kind, String type)413     private String getName(CLDRFile english, String kind, String type) {
414         String v = english.getStringValue("//ldml/localeDisplayNames/" + kind + "[@type=\"" + type + "\"]");
415         if (v == null) return "<" + type + ">";
416         return v;
417     }
418 
419     /**
420      * Make sure we are only using attribute values that are in RFC3066bis, the Olson database (with aliases removed)
421      * or ISO 4217
422      *
423      * @throws IOException
424      */
TestForIllegalAttributeValues()425     public void TestForIllegalAttributeValues() {
426         // check for illegal attribute values that are not in the DTD
427         Map<String, Set<String>> result = new TreeMap<>();
428         Map<String, Set<String>> totalResult = new TreeMap<>();
429         for (String locale : locales) {
430             logln("Testing: " + locale);
431             CLDRFile item = cldrFactory.make(locale, false);
432             result.clear();
433             Set<String> xpathFailures = null; // don't collect
434             // XPathParts parts;
435             // String xpath;
436             // CLDRFile.StringValue value;
437             // String element;
438             // Map attributes;
439             checkAttributeValidity(item, result, xpathFailures);
440 
441             // now show
442             //String localeName = getLocaleAndName(locale);
443             for (Iterator<String> it3 = result.keySet().iterator(); it3.hasNext();) {
444                 String code = it3.next();
445                 Set<String> avalues = result.get(code);
446                 errln(getLocaleAndName(locale) + "\tillegal attribute value for " + code + ", value:\t" + show(avalues));
447                 Set<String> totalvalues = totalResult.get(code);
448                 if (totalvalues == null) totalResult.put(code, totalvalues = new TreeSet<>());
449                 totalvalues.addAll(avalues);
450             }
451         }
452         for (Iterator<String> it3 = totalResult.keySet().iterator(); it3.hasNext();) {
453             String code = it3.next();
454             Set<String> avalues = totalResult.get(code);
455             errln("All illegal attribute values for " + code + ", value:\t" + show(avalues));
456         }
457     }
458 
459     /**
460      * Tests whether the display names have any collisions, e.g. if in the fully resolved
461      * locale $ is used for both USD and UAD.
462      *
463      */
TestDisplayNameCollisions()464     public void TestDisplayNameCollisions() {
465         if (disableUntilLater("TestDisplayNameCollisions")) return;
466 
467         Map<String, String>[] maps = new HashMap[CLDRFile.LIMIT_TYPES];
468         for (int i = 0; i < maps.length; ++i) {
469             maps[i] = new HashMap<>();
470         }
471         Set<String> collisions = new TreeSet<>();
472         for (Iterator<String> it = locales.iterator(); it.hasNext();) {
473             String locale = it.next();
474             CLDRFile item = cldrFactory.make(locale, true);
475             for (int i = 0; i < maps.length; ++i) {
476                 maps[i].clear();
477             }
478             collisions.clear();
479 
480             for (Iterator<String> it2 = item.iterator(); it2.hasNext();) {
481                 String xpath = it2.next();
482                 int nameType = CLDRFile.getNameType(xpath);
483                 if (nameType < 0) continue;
484                 String value = item.getStringValue(xpath);
485                 String xpath2 = maps[nameType].get(value);
486                 if (xpath2 == null) {
487                     maps[nameType].put(value, xpath);
488                     continue;
489                 }
490                 collisions.add(CLDRFile.getNameTypeName(nameType) + "\t" + value + "\t" + xpath + "\t" + xpath2);
491                 surveyInfo.add(locale + "\t" + xpath + "\t'" + value + "' is a duplicate of what is in " + xpath2);
492             }
493             String name = getLocaleAndName(locale) + "\t";
494             for (Iterator<String> it2 = collisions.iterator(); it2.hasNext();) {
495                 errln(name + it2.next());
496             }
497         }
498     }
499 
500     /**
501      * Checks the validity of attributes, based on StandardCodes.
502      * The invalid codes are added to badCodes, and the failing xpaths are added to xpathFailures.
503      *
504      * @param item
505      * @param badCodes
506      * @param xpathFailures
507      */
checkAttributeValidity(CLDRFile item, Map<String, Set<String>> badCodes, Set<String> xpathFailures)508     public static void checkAttributeValidity(CLDRFile item, Map<String, Set<String>> badCodes, Set<String> xpathFailures) {
509         for (Iterator<String> it2 = item.iterator(); it2.hasNext();) {
510             String xpath = it2.next();
511             XPathParts parts = XPathParts.getFrozenInstance(item.getFullXPath(xpath));
512             for (int i = 0; i < parts.size(); ++i) {
513                 if (parts.getAttributeCount(i) == 0) {
514                     continue;
515                 }
516                 String element = parts.getElement(i);
517                 Map<String, String> attributes = parts.getAttributes(i);
518                 for (Iterator<String> it3 = attributes.keySet().iterator(); it3.hasNext();) {
519                     String attribute = it3.next();
520                     String avalue = attributes.get(attribute);
521                     checkValidity(xpath, element, attribute, avalue, badCodes, xpathFailures);
522                 }
523             }
524         }
525     }
526 
527     /**
528      * Internal
529      */
show(Collection<String> avalues)530     private String show(Collection<String> avalues) {
531         StringBuffer result = new StringBuffer("{");
532         boolean first = true;
533         for (Iterator<String> it3 = avalues.iterator(); it3.hasNext();) {
534             if (first)
535                 first = false;
536             else
537                 result.append(", ");
538             result.append(it3.next().toString());
539         }
540         result.append("}");
541         return result.toString();
542     }
543 
544     /**
545      * Internal function
546      */
checkValidity(String xpath, String element, String attribute, String avalue, Map<String, Set<String>> results, Set<String> xpathsFailing)547     private static void checkValidity(String xpath, String element, String attribute, String avalue, Map<String, Set<String>> results,
548         Set<String> xpathsFailing) {
549         StandardCodes codes = StandardCodes.make();
550         if (attribute.equals("type")) {
551             boolean checkReplacements = xpath.indexOf("/identity") < 0;
552             if (element.equals("currency"))
553                 checkCodes(xpath, "currency", avalue, codes, results, xpathsFailing, checkReplacements);
554             else if (element.equals("script"))
555                 checkCodes(xpath, "script", avalue, codes, results, xpathsFailing, checkReplacements);
556             else if (element.equals("territory"))
557                 checkCodes(xpath, "territory", avalue, codes, results, xpathsFailing, checkReplacements);
558             else if (element.equals("language"))
559                 checkCodes(xpath, "language", avalue, codes, results, xpathsFailing, checkReplacements);
560             else if (element.equals("zone"))
561                 checkCodes(xpath, "tzid", avalue, codes, results, xpathsFailing, checkReplacements);
562         }
563     }
564 
565     /**
566      * Internal function
567      *
568      * @param checkReplacements
569      *            TODO
570      */
571     private static void checkCodes(String xpath, String code, String avalue, StandardCodes codes, Map<String, Set<String>> results,
572         Set<String> xpathFailures, boolean checkReplacements) {
573         // ok if code is found AND it has no replacement
574         if (codes.getData(code, avalue) != null
575             && (!checkReplacements || codes.getReplacement(code, avalue) == null)) return;
576 
577         if (xpathFailures != null) xpathFailures.add(xpath);
578         if (results == null) return;
579         Set<String> s = results.get(code);
580         if (s == null) {
581             s = new TreeSet<>();
582             results.put(code, s);
583         }
584         s.add(avalue);
585     }
586 
587     /**
588      * Verify that a small set of locales (currently just English) has everything translated.
589      *
590      * @throws IOException
591      */
592     public void TestCompleteLocales() {
593         // just test English for now
594         if (english == null) english = cldrFactory.make("en", true);
595         checkTranslatedCodes(english);
596     }
597 
598     /**
599      * Tests that the file contains codes for all main display name ids: language, script, territory, tzid, currency.
600      */
601     private void checkTranslatedCodes(CLDRFile cldrfile) {
602         StandardCodes codes = StandardCodes.make();
603         checkTranslatedCode(cldrfile, codes, "currency", "//ldml/numbers/currencies/currency", "/displayName");
604         // can't check timezones for English.
605         // checkTranslatedCode(cldrfile, codes, "tzid", "//ldml/dates/timeZoneNames/zone", "");
606         checkTranslatedCode(cldrfile, codes, "language", "//ldml/localeDisplayNames/languages/language", "");
607         checkTranslatedCode(cldrfile, codes, "script", "//ldml/localeDisplayNames/scripts/script", "");
608         checkTranslatedCode(cldrfile, codes, "territory", "//ldml/localeDisplayNames/territories/territory", "");
609         checkTranslatedCode(cldrfile, codes, "variant", "//ldml/localeDisplayNames/variants/variant", "");
610     }
611 
612     /**
613      * @param codes
614      * @param type
615      * @param prefix
616      * @param postfix
617      *            TODO
618      */
619     private void checkTranslatedCode(CLDRFile cldrfile, StandardCodes codes, String type, String prefix, String postfix) {
620 
621         // TODO, expand to other languages
622         Map<String, Set<String>> completionExceptions = new HashMap<>();
623         Set<String> scriptExceptions = new HashSet<>();
624         scriptExceptions.add("Cham");
625         scriptExceptions.add("Thai");
626         scriptExceptions.add("Toto");
627         completionExceptions.put("script", scriptExceptions);
628 
629         Set<String> codeItems = codes.getGoodAvailableCodes(type);
630         int count = 0;
631         Set<String> exceptions = completionExceptions.get(type);
632         for (String code : codeItems) {
633             String rfcname = codes.getData(type, code);
634             // if (rfcname.equals("ZZ")) continue;
635             ++count;
636             if (rfcname.equals("PRIVATE USE")) continue;
637             String fullFragment = prefix + "[@type=\"" + code + "\"]" + postfix;
638             String v = cldrfile.getStringValue(fullFragment);
639             if (v == null) {
640                 errln("Missing translation for:\t<" + type + " type=\"" + code + "\">" + rfcname + "</" + type + ">");
641                 continue;
642             }
643             String translation = v;
644             if (translation.equals(code)) {
645                 if (exceptions != null && exceptions.contains(code)) continue;
646                 errln("Translation = code for:\t<" + type + " type=\"" + code + "\">" + rfcname + "</" + type + ">");
647                 continue;
648             }
649         }
650         logln("Total " + type + ":\t" + count);
651     }
652 
653     // <territoryContainment><group type="001" contains="002 009 019 142 150"/>
654     // <languageData><language type="af" scripts="Latn" territories="ZA"/>
655     void getSupplementalData(Map<String, Set<String>> language_scripts, Map<String, Set<String>> language_territories,
656         Map<String, Set<String>> group_territory,
657         Map<String, Set<String>> territory_currencies, Map<String, Map<String, String>> aliases) {
658 
659         boolean SHOW = false;
660         Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
661         CLDRFile supp = cldrFactory.make(CLDRFile.SUPPLEMENTAL_NAME, false);
662         for (Iterator<String> it = supp.iterator(); it.hasNext();) {
663             String path = it.next();
664             try {
665                 XPathParts parts = XPathParts.getFrozenInstance(supp.getFullXPath(path));
666                 Map<String, String> m;
667                 String type = "";
668                 if (aliases != null && parts.findElement("alias") >= 0) {
669                     m = parts.findAttributes(type = "languageAlias");
670                     if (m == null) m = parts.findAttributes(type = "territoryAlias");
671                     if (m != null) {
672                         Map top = aliases.get(type);
673                         if (top == null) {
674                             aliases.put(type, top = new TreeMap());
675                         }
676                         top.put(m.get("type"), m.get("replacement"));
677                     }
678                 }
679                 if (territory_currencies != null) {
680                     m = parts.findAttributes("region");
681                     if (m != null) {
682                         String region = m.get("iso3166");
683                         Set s = territory_currencies.get(region);
684                         if (s == null) {
685                             territory_currencies.put(region, s = new LinkedHashSet());
686                         }
687                         m = parts.findAttributes("currency");
688                         if (m == null) {
689                             warnln("missing currency for region: " + path);
690                             continue;
691                         }
692                         String currency = m.get("iso4217");
693                         s.add(currency);
694                         m = parts.findAttributes("alternate");
695                         String alternate = m == null ? null : (String) m.get("iso4217");
696                         if (alternate != null) {
697                             s.add(alternate);
698                         }
699                         continue;
700                     }
701                 }
702                 m = parts.findAttributes("group");
703                 if (m != null) {
704                     if (group_territory == null) continue;
705                     type = m.get("type");
706                     String contains = m.get("contains");
707                     group_territory.put(type, new TreeSet(CldrUtility.splitList(contains, ' ', true)));
708                     continue;
709                 }
710                 m = parts.findAttributes("language");
711                 if (m == null) continue;
712                 String language = m.get("type");
713                 String scripts = m.get("scripts");
714                 if (scripts == null)
715                     language_scripts.put(language, new TreeSet<String>());
716                 else {
717                     language_scripts.put(language, new TreeSet<>(CldrUtility.splitList(scripts, ' ', true)));
718                     if (SHOW)
719                         System.out.println(getIDAndLocalization(language) + "\t\t"
720                             + getIDAndLocalization(language_scripts.get(language)));
721                 }
722                 String territories = m.get("territories");
723                 if (territories == null)
724                     language_territories.put(language, new TreeSet<String>());
725                 else {
726                     language_territories.put(language, new TreeSet<>(CldrUtility.splitList(territories, ' ', true)));
727                     if (SHOW)
728                         System.out.println(getIDAndLocalization(language) + "\t\t"
729                             + getIDAndLocalization(language_territories.get(language)));
730                 }
731             } catch (RuntimeException e) {
732                 throw (IllegalArgumentException) new IllegalArgumentException("Failure with: " + path).initCause(e);
733             }
734         }
735     }
736 
737     /**
738      * Verify that the minimal localizations are present.
739      */
TestMinimalLocalization()740     public void TestMinimalLocalization() throws IOException {
741         if (disableUntilLater("TestMinimalLocalization")) return;
742 
743         boolean testDraft = false;
744         Map<String, Set<String>> language_scripts = new HashMap<>();
745         Map<String, Set<String>> language_territories = new HashMap<>();
746         getSupplementalData(language_scripts, language_territories, null, null, null);
747         LanguageTagParser localIDParser = new LanguageTagParser();
748         // see http://oss.software.ibm.com/cvs/icu/~checkout~/locale/docs/design/minimal_requirements.htm
749         int[] failureCount = new int[1];
750         int[] warningCount = new int[1];
751         for (Iterator<String> it = languageLocales.iterator(); it.hasNext();) {
752             String locale = it.next();
753             if (locale.equals("root")) continue;
754             // if (!locale.equals("zh_Hant")) continue;
755 
756             CLDRFile item = cldrFactory.make(locale, true);
757             if (!testDraft && item.isDraft()) {
758                 logln(getLocaleAndName(locale) + "\tskipping draft");
759                 continue;
760             }
761             UnicodeSet exemplars = getFixedExemplarSet(locale, item);
762             CLDRFile missing = SimpleFactory.makeFile(locale);
763             failureCount[0] = 0;
764             warningCount[0] = 0;
765             localIDParser.set(locale);
766             String language = localIDParser.getLanguage();
767             logln("Testing: " + locale);
768             // languages
769             Set<String> languages = new TreeSet<>(CldrUtility.MINIMUM_LANGUAGES);
770             languages.add(language);
771             // LANGUAGE_NAME = 0, SCRIPT_NAME = 1, TERRITORY_NAME = 2, VARIANT_NAME = 3,
772             // CURRENCY_NAME = 4, CURRENCY_SYMBOL = 5, TZID = 6
773 
774             checkForItems(item, languages, CLDRFile.LANGUAGE_NAME, missing, failureCount, null);
775 
776             /*
777              * checkTranslatedCode(cldrfile, codes, "currency", "//ldml/numbers/currencies/currency");
778              * checkTranslatedCode(cldrfile, codes, "tzid", "//ldml/dates/timeZoneNames/zone");
779              * checkTranslatedCode(cldrfile, codes, "variant", "//ldml/localeDisplayNames/variants/variant");
780              */
781 
782             Set<String> scripts = new TreeSet<>();
783             scripts.add("Latn");
784             Set<String> others = language_scripts.get(language);
785             if (others != null) scripts.addAll(others);
786             checkForItems(item, scripts, CLDRFile.SCRIPT_NAME, missing, failureCount, null);
787 
788             Set<String> countries = new TreeSet<>(CldrUtility.MINIMUM_TERRITORIES);
789             others = language_territories.get(language);
790             if (others != null) countries.addAll(others);
791             checkForItems(item, countries, CLDRFile.TERRITORY_NAME, missing, failureCount, null);
792 
793             Set<String> currencies = new TreeSet<>();
794             StandardCodes sc = StandardCodes.make();
795             for (Iterator<String> it2 = countries.iterator(); it2.hasNext();) {
796                 String country = it2.next();
797                 Set<String> countryCurrencies = sc.getMainCurrencies(country);
798                 if (countryCurrencies == null) {
799                     errln("Internal Error: no currencies for " + country + ", locale: " + locale);
800                 } else {
801                     currencies.addAll(countryCurrencies);
802                 }
803             }
804             checkForItems(item, currencies, CLDRFile.CURRENCY_NAME, missing, failureCount, null);
805             checkForItems(item, currencies, CLDRFile.CURRENCY_SYMBOL, missing, failureCount, exemplars);
806 
807             // context=format and width=wide; context=stand-alone & width=abbreviated
808             Set<String> months = new TreeSet<>();
809             for (int i = 1; i <= 12; ++i)
810                 months.add(i + "");
811             Set<String> days = new TreeSet<>(Arrays.asList(new String[] { "sun", "mon", "tue", "wed", "thu", "fri", "sat" }));
812             for (int i = -7; i < 0; ++i) {
813                 checkForItems(item, (i < -4 ? months : days), i, missing, failureCount, null);
814             }
815 
816             String filename = "missing_" + locale + ".xml";
817             if (failureCount[0] > 0 || warningCount[0] > 0) {
818                 PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY + "missing/", filename);
819                 missing.write(out);
820                 out.close();
821                 // String s = getIDAndLocalization(missing);
822                 String message = "missing localizations, creating file"
823                     + getNormalizedPath(CLDRPaths.GEN_DIRECTORY, "missing", filename);
824                 if (failureCount[0] > 0)
825                     warnln(getLocaleAndName(locale) + "\t" + message);
826                 else
827                     logln(getLocaleAndName(locale) + "\tpossibly " + message);
828             } else {
829                 new File(CLDRPaths.GEN_DIRECTORY + "missing/", filename).delete();
830             }
831         }
832     }
833 
834     /**
835      * Internal
836      */
getDateKey(String monthOrDay, String width, String code)837     private String getDateKey(String monthOrDay, String width, String code) {
838         // String context = width.equals("narrow") ? "format" : "stand-alone";
839         return "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/"
840             + monthOrDay + "s/" + monthOrDay + "Context[@type=\"format\"]/"
841             + monthOrDay + "Width[@type=\"" + width + "\"]/" + monthOrDay
842             + "[@type=\"" + code + "\"]";
843     }
844 
845     /**
846      * Internal
847      */
getDateKey(int type, String code)848     private String getDateKey(int type, String code) {
849         // type is 6..4 for months abbrev..narrow, 3..0 for days short..narrow
850         int monthOrDayType = 0, widthType = type;
851         if (type >= 4) {
852             monthOrDayType = 1;
853             widthType -= 4;
854         }
855         return getDateKey(MONTHORDAYS[monthOrDayType], WIDTHS[widthType], code);
856     }
857 
858     /**
859      * @param item
860      * @param codes
861      * @param missing
862      * @param exemplarTest
863      *            TODO
864      *            TODO
865      */
checkForItems(CLDRFile item, Set<String> codes, int type, CLDRFile missing, int failureCount[], UnicodeSet exemplarTest)866     private void checkForItems(CLDRFile item, Set<String> codes, int type, CLDRFile missing, int failureCount[],
867         UnicodeSet exemplarTest) {
868         // check codes
869         for (Iterator<String> it2 = codes.iterator(); it2.hasNext();) {
870             String code = it2.next();
871             String key;
872             if (type >= 0) {
873                 key = CLDRFile.getKey(type, code);
874             } else {
875                 key = getDateKey(-type - 1, code);
876             }
877             String v = item.getStringValue(key);
878             String rootValue = resolvedRoot.getStringValue(key);
879             if (v == null || v.equals(rootValue) && (exemplarTest == null || !exemplarTest.containsAll(rootValue))) {
880                 String englishValue = resolvedEnglish.getStringValue(key);
881                 String transValue;
882                 if (englishValue != null) {
883                     transValue = englishValue;
884                 } else {
885                     transValue = code;
886                 }
887                 missing.add(key, "TODO " + transValue);
888                 failureCount[0]++;
889             } else {
890                 logln("\t" + code + "\t" + v);
891             }
892         }
893     }
894 
895     /*
896      * void showTestStr() {
897      * LocaleIDParser lparser = new LocaleIDParser();
898      * Collection s = split(teststr,',', true, new ArrayList());
899      * for (Iterator it = s.iterator(); it.hasNext();) {
900      * String item = (String)it.next();
901      * lparser.set(item.replace('?', '_'));
902      * String region = lparser.getRegion();
903      * System.out.print(item.replace('?', '-') + " (" + getLocalization(region) + "), ");
904      * //System.out.print(getLocalization(region) + ", ");
905      * }
906      * }
907      * static String teststr =
908      * "en?AG, en?AI, en?AS, en?AU, en?IN, en?BB, en?BE, en?BM, en?BN, en?BS, en?BW, en?BZ, en?CA, en?CK, en?CM, en?DM, en?ER, en?ET, en?FJ, en?FK, en?FM, en?GB, en?GD, en?GH, en?GI, en?GM, en?GU, en?GY, en?HK, en?IE, en?IL, en?IO, en?JM, en?KE, en?KI, en?KN, en?KY, en?LC, en?LR, en?LS, en?MH, en?MP, en?MS, en?MT, en?MU, en?MW, en?NA, en?NF, en?NG, en?NR, en?NU, en?NZ, en?PG, en?PH, en?PK, en?PN, en?PR, en?PW, en?RW, en?SB, en?SC, en?SG, en?SH, en?SL, en?SO, en?SZ, en?TC, en?TK, en?TO, en?TT, en?UG, en?UM, en?US, en?VC, en?VG, en?VI, en?VU, en?WS, en?ZA, en?ZM, en?ZW"
909      * ;
910      */
911 
912     CldrUtility.CollectionTransform EnglishName = new CldrUtility.CollectionTransform() {
913         @Override
914         public Object transform(Object source) {
915             // TODO Auto-generated method stub
916             return getLocalization(source.toString()) + " (" + source + ")";
917         }
918     };
919 
920     CldrUtility.CollectionTransform EnglishCurrencyName = new CldrUtility.CollectionTransform() {
921         @Override
922         public Object transform(Object source) {
923             if (english == null) english = cldrFactory.make("en", true);
924             return english.getName("currency", source.toString()) + " (" + source + ")";
925         }
926     };
927 
928     /**
929      * Tests that the supplemental data is well-formed.
930      *
931      */
TestSupplementalData()932     public void TestSupplementalData() {
933         Map<String, Set<String>> language_scripts = new TreeMap<>();
934         Map<String, Set<String>> language_territories = new TreeMap<>();
935         Map<String, Set<String>> groups = new TreeMap<>();
936         Map<String, Set<String>> territory_currencies = new TreeMap<>();
937         Map<String, Map<String, String>> aliases = new TreeMap<>();
938         getSupplementalData(language_scripts, language_territories, groups, territory_currencies, aliases);
939         Set<String> sTerritories = new TreeSet<>();
940         for (Iterator<Set<String>> it = language_territories.values().iterator(); it.hasNext();) {
941             sTerritories.addAll(it.next());
942         }
943         StandardCodes sc = StandardCodes.make();
944         Set<String> fullTerritories = sc.getAvailableCodes("territory");
945         Set<String> fullLanguages = sc.getAvailableCodes("language");
946 
947         Set<String> allLanguages = new TreeSet<>(language_scripts.keySet());
948         allLanguages.addAll(language_territories.keySet());
949         for (Iterator<String> it = allLanguages.iterator(); it.hasNext();) {
950             Object language = it.next();
951             Set<String> scripts = language_scripts.get(language);
952             Set<String> territories = language_territories.get(language);
953             logln(EnglishName.transform(language)
954                 + " scripts: " + EnglishName.transform(scripts)
955                 + " territories: " + EnglishName.transform(territories));
956         }
957 
958         Map<String, String> changedLanguage = new TreeMap<>();
959         for (Iterator<String> it = fullLanguages.iterator(); it.hasNext();) {
960             String code = it.next();
961             List<String> data = sc.getFullData("language", code);
962             if (data.size() < 3) {
963                 System.out.println("data problem: " + data);
964                 continue;
965             }
966             String replacement = data.get(2);
967             if (!replacement.equals("")) {
968                 if (!replacement.equals("--")) changedLanguage.put(code, replacement);
969                 continue;
970             }
971         }
972 
973         // remove private use, deprecated, groups
974         Set<String> standardTerritories = new TreeSet<>();
975         Map<String, String> changedTerritory = new TreeMap<>();
976         for (Iterator<String> it = fullTerritories.iterator(); it.hasNext();) {
977             String code = it.next();
978             if (code.equals("200")) continue; // || code.equals("YU") || code.equals("PZ")
979             List<String> data = sc.getFullData("territory", code);
980             if (data.get(0).equals("PRIVATE USE")) continue;
981             if (!data.get(2).equals("")) {
982                 if (!data.get(2).equals("--")) changedTerritory.put(code, data.get(2));
983                 continue;
984             }
985             standardTerritories.add(code);
986         }
987         standardTerritories.removeAll(groups.keySet());
988 
989         if (!standardTerritories.containsAll(sTerritories)) {
990             TreeSet<String> extras = new TreeSet<>(sTerritories);
991             extras.removeAll(standardTerritories);
992             errln("Supplemental Language Territories contain illegal values: " + EnglishName.transform(extras));
993         }
994         if (!sTerritories.containsAll(standardTerritories)) {
995             TreeSet<String> extras = new TreeSet<>(standardTerritories);
996             extras.removeAll(sTerritories);
997             warnln("Missing Language Territories: " + EnglishName.transform(extras));
998         }
999 
1000         // now test currencies
1001         logln("Check that no illegal territories are used");
1002         if (!standardTerritories.containsAll(territory_currencies.keySet())) {
1003             TreeSet<String> extras = new TreeSet<>(territory_currencies.keySet());
1004             extras.removeAll(fullTerritories);
1005             if (extras.size() != 0) errln("Currency info -- Illegal Territories: " + EnglishName.transform(extras));
1006             extras = new TreeSet<>(territory_currencies.keySet());
1007             extras.retainAll(fullTerritories);
1008             extras.removeAll(standardTerritories);
1009             if (extras.size() != 0) warnln("Currency info -- Archaic Territories: " + EnglishName.transform(extras));
1010         }
1011         logln("Check that no territories are missing");
1012         if (!territory_currencies.keySet().containsAll(standardTerritories)) {
1013             TreeSet<String> extras = new TreeSet<>(standardTerritories);
1014             extras.removeAll(territory_currencies.keySet());
1015             errln("Currency info -- Missing Territories: " + EnglishName.transform(extras));
1016         }
1017         Set<String> currencies = new TreeSet<>();
1018         for (Iterator<Set<String>> it = territory_currencies.values().iterator(); it.hasNext();) {
1019             currencies.addAll(it.next());
1020         }
1021         logln("Check that no illegal currencies are used");
1022         Set<String> legalCurrencies = new TreeSet<>(sc.getAvailableCodes("currency"));
1023         // first remove non-ISO
1024         for (Iterator<String> it = legalCurrencies.iterator(); it.hasNext();) {
1025             String code = it.next();
1026             List<String> data = sc.getFullData("currency", code);
1027             if ("X".equals(data.get(3))) it.remove();
1028         }
1029         if (!legalCurrencies.containsAll(currencies)) {
1030             TreeSet<String> extras = new TreeSet<>(currencies);
1031             extras.removeAll(legalCurrencies);
1032             errln("Currency info -- Illegal Currencies: " + EnglishCurrencyName.transform(extras));
1033         }
1034         logln("Check that there are no missing currencies");
1035         if (!currencies.containsAll(legalCurrencies)) {
1036             TreeSet<String> extras = new TreeSet<>(legalCurrencies);
1037             extras.removeAll(currencies);
1038             Map<String, Set<String>> failures = new TreeMap<>();
1039             for (Iterator<String> it = extras.iterator(); it.hasNext();) {
1040                 String code = it.next();
1041                 List<String> data = sc.getFullData("currency", code);
1042                 if (data.get(1).equals("ZZ")) continue;
1043                 String type = data.get(3) + "/" + data.get(1);
1044                 Set<String> s = failures.get(type);
1045                 if (s == null) failures.put(type, s = new TreeSet<>());
1046                 s.add(code);
1047             }
1048             for (Iterator<String> it = failures.keySet().iterator(); it.hasNext();) {
1049                 String type = it.next();
1050                 Set<String> s = failures.get(type);
1051                 warnln("Currency info -- Missing Currencies: " + type + "\t \u2192 " + EnglishCurrencyName.transform(s));
1052             }
1053         }
1054         logln("Missing English currency names");
1055         for (Iterator<String> it = legalCurrencies.iterator(); it.hasNext();) {
1056             String currency = it.next();
1057             String name = english.getName("currency", currency);
1058             if (name == null) {
1059                 String standardName = sc.getFullData("currency", currency).get(0);
1060                 logln("\t\t\t<currency type=\"" + currency + "\">");
1061                 logln("\t\t\t\t<displayName>" + standardName + "</displayName>");
1062                 logln("\t\t\t</currency>");
1063             }
1064         }
1065         logln("Check Aliases");
1066         for (Iterator<String> it = aliases.keySet().iterator(); it.hasNext();) {
1067             // the first part of the mapping had better not be in the standardTerritories
1068             String key = it.next();
1069             Map<String, String> submap = aliases.get(key);
1070             if (key.equals("territoryAlias")) {
1071                 checkEqual(key, submap, changedTerritory);
1072             } else if (key.equals("languageAlias")) {
1073                 for (Iterator<String> it2 = submap.keySet().iterator(); it2.hasNext();) {
1074                     String k = it2.next();
1075                     String value = submap.get(k);
1076                     if (value.indexOf("_") >= 0) it2.remove();
1077                 }
1078                 checkEqual(key, submap, changedLanguage);
1079             }
1080         }
1081     }
1082 
1083     /**
1084      *
1085      */
checkEqual(String title, Map map1, Map map2)1086     private void checkEqual(String title, Map map1, Map map2) {
1087         Set foo = new TreeSet(map1.keySet());
1088         foo.removeAll(map2.keySet());
1089         if (!foo.isEmpty()) errln("Extraneous Aliases: " + title + "\t" + foo);
1090         foo = new TreeSet(map2.keySet());
1091         foo.removeAll(map1.keySet());
1092         if (!foo.isEmpty()) errln("Missing Aliases: " + title + "\t" + foo);
1093         foo = map2.keySet();
1094         foo.retainAll(map1.keySet());
1095         for (Iterator it = foo.iterator(); it.hasNext();) {
1096             Object key = it.next();
1097             Object result1 = map1.get(key);
1098             Object result2 = map2.get(key);
1099             if (!result1.equals(result2))
1100                 errln("Missing Aliases: " + title + "\t" + key + "\t" + result1 + " != " + result2);
1101         }
1102     }
1103 
1104     /**
1105      * Test that the zone ids are well-formed.
1106      *
1107      */
TestZones()1108     public void TestZones() {
1109         StandardCodes sc = StandardCodes.make();
1110 
1111         Map<String, String> defaultNames = new TreeMap();
1112         Map<String, String> old_new = sc.getZoneLinkold_new();
1113         Set<String> core = sc.getZoneData().keySet();
1114         logln("Checking for collisions with last field");
1115         for (Iterator<String> it = core.iterator(); it.hasNext();) {
1116             String currentItem = it.next();
1117             String defaultName = TimezoneFormatter.getFallbackName(currentItem);
1118             String fullName = defaultNames.get(defaultName);
1119             if (fullName == null)
1120                 defaultNames.put(defaultName, currentItem);
1121             else {
1122                 errln("Collision between: " + currentItem + " AND " + fullName);
1123             }
1124         }
1125 
1126         logln("Checking that all links are TO canonical zones");
1127         Set<String> s = new TreeSet<>(old_new.values());
1128         s.removeAll(core);
1129         if (s.size() != 0) {
1130             errln("Links go TO zones that are not canonical! " + s);
1131         }
1132 
1133         logln("Checking that no links are FROM canonical zones");
1134         s = new TreeSet<>(core);
1135         s.retainAll(old_new.keySet());
1136         if (s.size() != 0) {
1137             errln("Links go FROM zones that are canonical! " + s);
1138         }
1139 
1140         logln("Checking that the zones with rule data are all canonical");
1141         Set<String> zonesWithRules = sc.getZone_rules().keySet();
1142         s.clear();
1143         s.addAll(zonesWithRules);
1144         s.removeAll(core);
1145         if (s.size() != 0) logln("Zones with rules that are not canonical: " + s);
1146 
1147         logln("Checking that the rule data are all canonical");
1148         s.clear();
1149         s.addAll(core);
1150         s.removeAll(zonesWithRules);
1151         s.removeAll(old_new.keySet());
1152         if (s.size() != 0) logln("Canonical zones that don't have rules or links: " + s);
1153 
1154         for (Iterator<String> it = old_new.keySet().iterator(); it.hasNext();) {
1155             String oldItem = it.next();
1156             logln("old: " + oldItem + "\tnew: " + old_new.get(oldItem));
1157         }
1158         Map<String, Set<String>> new_old = new TreeMap<>();
1159         for (Iterator<String> it = core.iterator(); it.hasNext();) {
1160             new_old.put(it.next(), new TreeSet<String>());
1161         }
1162         for (Iterator<String> it = old_new.keySet().iterator(); it.hasNext();) {
1163             String oldItem = it.next();
1164             String newItem = old_new.get(oldItem);
1165             Set<String> oldItems = new_old.get(newItem);
1166             if (oldItems == null) { // try recursing
1167                 logln("!!!!Skipping " + oldItem + " \u2192 " + newItem);
1168                 continue;
1169                 // new_old.put(oldOne, oldItems = new TreeSet());
1170             }
1171             oldItems.add(oldItem);
1172         }
1173         for (Iterator<String> it = new_old.keySet().iterator(); it.hasNext();) {
1174             String newOne = it.next();
1175             Set<String> oldItems = new_old.get(newOne);
1176             logln(newOne + "\t" + oldItems);
1177         }
1178     }
1179 
TestNarrowForms()1180     public void TestNarrowForms() {
1181         if (disableUntilLater("TestMinimalLocalization")) return;
1182 
1183         for (Iterator<String> it = locales.iterator(); it.hasNext();) {
1184             String locale = it.next();
1185             logln("Testing: " + getLocaleAndName(locale));
1186             BreakIterator bi = BreakIterator.getCharacterInstance(new ULocale(locale));
1187             CLDRFile item = cldrFactory.make(locale, false);
1188             // Walk through all the xpaths, adding to currentValues
1189             // Whenever two values for the same xpath are different, we remove from currentValues, and add to okValues
1190             for (Iterator<String> it2 = item.iterator(); it2.hasNext();) {
1191                 String xpath = it2.next();
1192                 if (xpath.indexOf("[@type=\"narrow\"]") >= 0) {
1193                     String value = item.getStringValue(xpath);
1194                     // logln("\tTesting: " + value + "\t path: " + xpath);
1195                     int end = getXGraphemeClusterBoundary(bi, value, 0);
1196                     if (end == value.length()) continue;
1197                     errln(getLocaleAndName(locale) + "\tillegal narrow value " + value + "\t path: " + xpath);
1198                     surveyInfo.add(locale + "\t" + xpath + "\t'" + value + "' is too wide for a \"narrow\" value.");
1199                 }
1200             }
1201         }
1202     }
1203 
1204     static final UnicodeSet XGRAPHEME = new UnicodeSet("[[:mark:][:grapheme_extend:]]");
1205     static final UnicodeSet DIGIT = new UnicodeSet("[:decimal_number:]");
1206 
getXGraphemeClusterBoundary(BreakIterator bi, String value, int start)1207     private int getXGraphemeClusterBoundary(BreakIterator bi, String value, int start) {
1208         if (value.length() <= 1) return 1;
1209 
1210         bi.setText(value);
1211         if (start != 0) bi.preceding(start + 1); // backup one
1212         int current = bi.next();
1213         // link any digits
1214         if (DIGIT.contains(UTF16.charAt(value, current - 1))) {
1215             current = DIGIT.findIn(value, current, true);
1216         }
1217         // continue collecting any additional characters that are M or grapheme extend
1218         return XGRAPHEME.findIn(value, current, true);
1219     }
1220 }
1221