• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ******************************************************************************
3  * Copyright (C) 2004, International Business Machines Corporation and        *
4  * others. All Rights Reserved.                                               *
5  ******************************************************************************
6  */
7 package org.unicode.cldr.test;
8 
9 import static org.unicode.cldr.util.PathUtilities.getNormalizedPath;
10 
11 import java.io.File;
12 import java.io.IOException;
13 import java.io.PrintWriter;
14 import java.util.Arrays;
15 import java.util.Calendar;
16 import java.util.Collection;
17 import java.util.Date;
18 import java.util.HashMap;
19 import java.util.HashSet;
20 import java.util.Iterator;
21 import java.util.LinkedHashSet;
22 import java.util.List;
23 import java.util.Map;
24 import java.util.Set;
25 import java.util.TreeMap;
26 import java.util.TreeSet;
27 
28 import org.unicode.cldr.draft.FileUtilities;
29 import org.unicode.cldr.test.DisplayAndInputProcessor.NumericType;
30 import org.unicode.cldr.util.CLDRFile;
31 import org.unicode.cldr.util.CLDRPaths;
32 import org.unicode.cldr.util.CldrUtility;
33 import org.unicode.cldr.util.Factory;
34 import org.unicode.cldr.util.LanguageTagParser;
35 import org.unicode.cldr.util.SimpleFactory;
36 import org.unicode.cldr.util.StandardCodes;
37 import org.unicode.cldr.util.TimezoneFormatter;
38 import org.unicode.cldr.util.XPathParts;
39 import org.xml.sax.SAXException;
40 
41 import com.ibm.icu.dev.test.TestFmwk;
42 import com.ibm.icu.text.BreakIterator;
43 import com.ibm.icu.text.DecimalFormat;
44 import com.ibm.icu.text.NumberFormat;
45 import com.ibm.icu.text.UTF16;
46 import com.ibm.icu.text.UnicodeSet;
47 import com.ibm.icu.util.ULocale;
48 
49 /**
50  * Initial version of CLDR tests. Each test is named TextXXX. To run all the tests, use the options
51  * <blockquote>-nothrow</blockquote>
52  * To run a particular set of tests, include their names, like
53  * <blockquote>-nothrow TestForIllegalAttributeValues TestMinimalLocalization</blockquote>
54  * To show more information (logln), add -verbose
55  * <p>
56  * There are some environment variables that can be used with the test. <br>
57  * -DSHOW_FILES=<anything> shows all create/open of files. <br>
58  * -DXML_MATCH=<regular expression> skips all locales that don't match the regular expression <br>
59  * -DXML_MAIN_DIR=<filesystem directory> resets to a different main directory (eg not cldr/common/main. For example,
60  * some of the tools generate into a locale directory like -DXML_MAIN_DIR=C:\Unicode-CVS2\cldr\common\gen\main\ so this
61  * can be used to check that directory. <br>
62  * -DSKIP_DRAFT=<boolean> skips draft locales if <boolean> is a string starting with T or t
63  */
64 public class CLDRTest extends TestFmwk {
65     /**
66      * privates
67      */
68     private static String MATCH;
69     private static String MAIN_DIR;
70     private static boolean SKIP_DRAFT;
71     private Set<String> locales;
72     private Set<String> languageLocales;
73     private Factory cldrFactory;
74     private CLDRFile resolvedRoot;
75     private CLDRFile resolvedEnglish;
76     private final UnicodeSet commonAndInherited = new UnicodeSet(
77         "[[:script=common:][:script=inherited:][:alphabetic=false:]]");
78     private static final String[] WIDTHS = { "narrow", "wide", "abbreviated", "short" };
79     private static final String[] MONTHORDAYS = { "day", "month" };
80     private Map<String, String> localeNameCache = new HashMap<>();
81     private CLDRFile english = null;
82 
83     private Set<String> surveyInfo = new TreeSet<>();
84 
85     /**
86      * TestFmwk boilerplate
87      */
main(String[] args)88     public static void main(String[] args) throws Exception {
89         MATCH = System.getProperty("XML_MATCH");
90         if (MATCH == null)
91             MATCH = ".*";
92         else
93             System.out.println("Resetting MATCH:" + MATCH);
94         MAIN_DIR = System.getProperty("XML_MAIN_DIR");
95         if (MAIN_DIR == null)
96             MAIN_DIR = CLDRPaths.MAIN_DIRECTORY;
97         else
98             System.out.println("Resetting MAIN_DIR:" + MAIN_DIR);
99         SKIP_DRAFT = System.getProperty("XML_SKIP_DRAFT") != null;
100         if (SKIP_DRAFT) System.out.println("Skipping Draft locales");
101 
102         double deltaTime = System.currentTimeMillis();
103         new CLDRTest().run(args);
104         deltaTime = System.currentTimeMillis() - deltaTime;
105         System.out.println("Seconds: " + deltaTime / 1000);
106 
107     }
108 
TestZZZZHack()109     public void TestZZZZHack() throws IOException {
110         // hack to get file written at the end of run.
111         PrintWriter surveyFile = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "surveyInfo.txt");
112         for (String s : surveyInfo) {
113             surveyFile.println(s);
114         }
115         surveyFile.close();
116     }
117 
118     /**
119      * TestFmwk boilerplate
120      */
CLDRTest()121     public CLDRTest() throws SAXException, IOException {
122         // TODO parameterize the directory and filter
123         cldrFactory = Factory.make(MAIN_DIR, MATCH);
124         // CLDRKey.main(new String[]{"-mde.*"});
125         locales = cldrFactory.getAvailable();
126         languageLocales = cldrFactory.getAvailableLanguages();
127         resolvedRoot = cldrFactory.make("root", true);
128         /*
129          * PrintWriter out = FileUtilities.openUTF8Writer(Utility.GEN_DIRECTORY + "resolved/", "root.xml");
130          * CLDRFile temp = (CLDRFile) resolvedRoot.clone();
131          * temp.write(out);
132          * out.close();
133          */
134         resolvedEnglish = cldrFactory.make("en", true);
135     }
136 
137     /**
138      * Check to make sure that the currency formats are kosher.
139      */
TestCurrencyFormats()140     public void TestCurrencyFormats() {
141         // String decimal = "//ldml/numbers/decimalFormats/decimalFormatLength/decimalFormat[@type=\"standard\"]/";
142         // String currency = "//ldml/numbers/currencyFormats/currencyFormatLength/currencyFormat[@type=\"standard\"]/";
143         for (String locale : locales) {
144             boolean isPOSIX = locale.indexOf("POSIX") >= 0;
145             logln("Testing: " + locale);
146             CLDRFile item = cldrFactory.make(locale, false);
147             for (String xpath : item) {
148                 NumericType type = NumericType.getNumericType(xpath);
149                 if (type == NumericType.NOT_NUMERIC) continue;
150                 String value = item.getStringValue(xpath);
151                 // at this point, we only have currency formats
152                 String pattern = DisplayAndInputProcessor.getCanonicalPattern(value, type, isPOSIX);
153                 if (!pattern.equals(value)) {
154                     String draft = "";
155                     if (item.getFullXPath(xpath).indexOf("[@draft=\"unconfirmed\"]") >= 0) draft = " [draft]";
156                     assertEquals(getLocaleAndName(locale) + draft + " " + type + " pattern incorrect", pattern, value);
157                 }
158             }
159         }
160     }
161 
162     /**
163      * Internal class
164      */
165     private static class ValueCount {
166         int count = 1;
167         String value;
168         String fullxpath;
169     }
170 
171     /**
172      * Verify that if all the children of a language locale do not have the same value for the same key.
173      */
TestCommonChildren()174     public void TestCommonChildren() {
175         if (disableUntilLater("TestCommonChildren")) return;
176 
177         Map<String, ValueCount> currentValues = new TreeMap<>();
178         Set<String> okValues = new TreeSet<>();
179 
180         for (String parent : languageLocales) {
181             logln("Testing: " + parent);
182             currentValues.clear();
183             okValues.clear();
184             Set<String> availableWithParent = cldrFactory.getAvailableWithParent(parent, true);
185             for (String locale : availableWithParent) {
186                 logln("\tTesting: " + locale);
187                 CLDRFile item = cldrFactory.make(locale, false);
188                 // Walk through all the xpaths, adding to currentValues
189                 // Whenever two values for the same xpath are different, we remove from currentValues, and add to
190                 // okValues
191                 for (String xpath : item) {
192                     if (okValues.contains(xpath)) continue;
193                     if (xpath.startsWith("//ldml/identity/")) continue; // skip identity elements
194                     String v = item.getStringValue(xpath);
195                     ValueCount last = currentValues.get(xpath);
196                     if (last == null) {
197                         ValueCount vc = new ValueCount();
198                         vc.value = v;
199                         vc.fullxpath = item.getFullXPath(xpath);
200                         currentValues.put(xpath, vc);
201                     } else if (v.equals(last.value)) {
202                         last.count++;
203                     } else {
204                         okValues.add(xpath);
205                         currentValues.remove(xpath);
206                     }
207                 }
208                 // at the end, only the keys left in currentValues are (possibly) faulty
209                 // they are actually bad IFF either
210                 // (a) the count is equal to the total (thus all children are the same), or
211                 // (b) their value is the same as the parent's resolved value (thus all children are the same or the
212                 // same
213                 // as the inherited parent value).
214             }
215             if (currentValues.size() == 0) continue;
216             int size = availableWithParent.size();
217             CLDRFile parentCLDR = cldrFactory.make(parent, true);
218             for (String xpath : currentValues.keySet()) {
219                 ValueCount vc = currentValues.get(xpath);
220                 if (vc.count == size || (vc.value.equals(parentCLDR.getStringValue(xpath))
221                     && vc.fullxpath.equals(parentCLDR.getStringValue(xpath)))) {
222                     String draft = "";
223                     if (vc.fullxpath.indexOf("[@draft=\"unconfirmed\"]") >= 0) draft = " [draft]";
224                     String count = (vc.count == size ? "" : vc.count + "/") + size;
225                     warnln(getLocaleAndName(parent) + draft +
226                         "\tall children (" + count + ") have same value for:\t"
227                         + xpath + ";\t" + vc.value);
228                 }
229             }
230         }
231     }
232 
233     static String[] EXEMPLAR_SKIPS = { "/hourFormat", "/exemplarCharacters", "/pattern", "/localizedPatternChars" };
234 
235     /**
236      * Check that the exemplars include all characters in the data.
237      */
TestThatExemplarsContainAll()238     public void TestThatExemplarsContainAll() {
239         UnicodeSet allExemplars = new UnicodeSet();
240         if (disableUntilLater("TestThatExemplarsContainAll")) return;
241         Set<String> counts = new TreeSet<>();
242         int totalCount = 0;
243         UnicodeSet localeMissing = new UnicodeSet();
244         for (String locale : locales) {
245             if (locale.equals("root")) continue;
246             CLDRFile resolved = cldrFactory.make(locale, false); // FIX LATER
247             UnicodeSet exemplars = getFixedExemplarSet(locale, resolved);
248             CLDRFile plain = cldrFactory.make(locale, false);
249             int count = 0;
250             localeMissing.clear();
251             file: for (String xpath : plain) {
252                 for (int i = 0; i < EXEMPLAR_SKIPS.length; ++i) {
253                     if (xpath.indexOf(EXEMPLAR_SKIPS[i]) > 0) continue file; // skip some items.
254                 }
255                 if (SKIP_DRAFT) {
256                     String fullxpath = plain.getFullXPath(xpath);
257                     if (fullxpath.indexOf("[@draft=\"unconfirmed\"") > 0) continue;
258                 }
259                 if (xpath.startsWith("//ldml/posix/messages")) continue;
260                 String value = plain.getStringValue(xpath);
261                 allExemplars.addAll(value);
262                 if (!exemplars.containsAll(value)) {
263                     count++;
264                     UnicodeSet missing = new UnicodeSet().addAll(value).removeAll(exemplars);
265                     localeMissing.addAll(missing);
266                     logln(getLocaleAndName(locale) + "\t" + xpath + "\t<" + value + "> contains " + missing
267                         + ", not in exemplars");
268                     surveyInfo.add(locale + "\t" + xpath + "\t'" + value + "' contains characters "
269                         + missing.toPattern(false) + ", which are not in exemplars");
270                 }
271             }
272             NumberFormat nf = new DecimalFormat("000");
273             if (count != 0) {
274                 totalCount += count;
275                 counts.add(nf.format(count) + "\t" + getLocaleAndName(locale) + "\t" + localeMissing);
276             }
277             if (localeMissing.size() != 0) {
278                 errln(getLocaleAndName(locale) + "\t uses " + localeMissing + ", not in exemplars");
279             }
280         }
281         for (String c : counts) {
282             logln(c);
283         }
284         logln("Total Count: " + totalCount);
285         System.out.println("All exemplars: " + allExemplars.toPattern(true));
286     }
287 
288     // Get Date-Time in milliseconds
getDateTimeinMillis(int year, int month, int date)289     private static long getDateTimeinMillis(int year, int month, int date) {
290         Calendar cal = Calendar.getInstance();
291         cal.set(year, month, date);
292         return cal.getTimeInMillis();
293     }
294 
295     static final long disableDate = getDateTimeinMillis(2005, 6 - 1, 3);
296 
297     /**
298      *
299      */
disableUntilLater(String string)300     private boolean disableUntilLater(String string) {
301         if (new Date().getTime() >= disableDate) return false;
302         warnln("Disabling " + string + " until " + new Date(disableDate));
303         return true;
304     }
305 
306     /**
307      * Internal
308      */
getFixedExemplarSet(String locale, CLDRFile cldrfile)309     private UnicodeSet getFixedExemplarSet(String locale, CLDRFile cldrfile) {
310         UnicodeSet exemplars = getExemplarSet(cldrfile, "");
311         if (exemplars.size() == 0) {
312             errln(getLocaleAndName(locale) + " has empty exemplar set");
313         }
314         exemplars.addAll(getExemplarSet(cldrfile, "standard"));
315         UnicodeSet auxiliary = getExemplarSet(cldrfile, "auxiliary");
316         if (exemplars.containsSome(auxiliary)) {
317             errln(getLocaleAndName(locale) + "Auxiliary & main exemplars should be disjoint, but overlap with " +
318                 new UnicodeSet(exemplars).retainAll(auxiliary) +
319                 ": change auxiliary to " + auxiliary.removeAll(exemplars));
320         }
321         exemplars.addAll(auxiliary);
322         exemplars.addAll(commonAndInherited);
323         return exemplars;
324     }
325 
326     /**
327      * @return Gets an exemplar set. Also verifies that the set contains no properties.
328      */
getExemplarSet(CLDRFile cldrfile, String type)329     public UnicodeSet getExemplarSet(CLDRFile cldrfile, String type) {
330         if (type.length() != 0) type = "[@type=\"" + type + "\"]";
331         String v = cldrfile.getStringValue("//ldml/characters/exemplarCharacters" + type);
332         if (v == null) return new UnicodeSet();
333         String pattern = v;
334         if (pattern.indexOf("[:") >= 0 || pattern.indexOf("\\p{") > 0) {
335             errln(getLocaleName(cldrfile.getLocaleID()) + " exemplar pattern contains property: " + pattern);
336         }
337         try {
338             UnicodeSet result = new UnicodeSet(v, UnicodeSet.CASE);
339             result.remove(0x20);
340             return result;
341         } catch (RuntimeException e) {
342             e.printStackTrace();
343             errln(getLocaleAndName(cldrfile.getLocaleID()) + " has illegal exemplar set: <" + v + ">");
344             return new UnicodeSet();
345         }
346         // if (type.length() != 0) System.out.println("fetched set for " + type);
347     }
348 
getLocaleAndName(String locale)349     public String getLocaleAndName(String locale) {
350         return locale + " (" + getLocaleName(locale) + ")";
351     }
352 
353     /**
354      * @return the ID plus its localization (for language, script, and territory IDs only)
355      */
getIDAndLocalization(String id)356     public String getIDAndLocalization(String id) {
357         return id + " " + getLocalization(id);
358     }
359 
360     /**
361      * @return the localization (for language, script, and territory IDs only)
362      */
getLocalization(String id)363     public String getLocalization(String id) {
364         if (english == null) english = cldrFactory.make("en", true);
365         if (id.length() == 0) return "?";
366         // pick on basis of case
367         char ch = id.charAt(0);
368         if ('a' <= ch && ch <= 'z') return getName(english, "languages/language", id);
369         if (id.length() == 4 && 'A' <= ch && ch <= 'Z') return getName(english, "scripts/script", id);
370         return getName(english, "territories/territory", id);
371     }
372 
373     /**
374      * Internal
375      */
getIDAndLocalization(Set<String> missing)376     private String getIDAndLocalization(Set<String> missing) {
377         StringBuffer buffer = new StringBuffer();
378         for (String next : missing) {
379             if (buffer.length() != 0) buffer.append("; ");
380             buffer.append(getIDAndLocalization(next));
381         }
382         return buffer.toString();
383     }
384 
getLocaleName(String locale)385     public String getLocaleName(String locale) {
386         String name = localeNameCache.get(locale);
387         if (name != null) return name;
388         if (english == null) english = cldrFactory.make("en", true);
389         String result = english.getName(locale);
390         /*
391          * Collection c = Utility.splitList(locale, '_', false, null);
392          * String[] pieces = new String[c.size()];
393          * c.toArray(pieces);
394          * int i = 0;
395          * String result = getName(english, "languages/language", pieces[i++]);
396          * if (pieces[i].length() == 0) return result;
397          * if (pieces[i].length() == 4) {
398          * result += " " + getName(english, "scripts/script", pieces[i++]);
399          * }
400          * if (pieces[i].length() == 0) return result;
401          * result += " " + getName(english, "territories/territory", pieces[i++]);
402          * if (pieces[i].length() == 0) return result;
403          * result += " " + getName(english, "variant/variants", pieces[i++]);
404          */
405         localeNameCache.put(locale, result);
406         return result;
407     }
408 
409     /**
410      * Internal
411      */
getName(CLDRFile english, String kind, String type)412     private String getName(CLDRFile english, String kind, String type) {
413         String v = english.getStringValue("//ldml/localeDisplayNames/" + kind + "[@type=\"" + type + "\"]");
414         if (v == null) return "<" + type + ">";
415         return v;
416     }
417 
418     /**
419      * Make sure we are only using attribute values that are in RFC3066bis, the Olson database (with aliases removed)
420      * or ISO 4217
421      *
422      * @throws IOException
423      */
TestForIllegalAttributeValues()424     public void TestForIllegalAttributeValues() {
425         // check for illegal attribute values that are not in the DTD
426         Map<String, Set<String>> result = new TreeMap<>();
427         Map<String, Set<String>> totalResult = new TreeMap<>();
428         for (String locale : locales) {
429             logln("Testing: " + locale);
430             CLDRFile item = cldrFactory.make(locale, false);
431             result.clear();
432             Set<String> xpathFailures = null; // don't collect
433             // XPathParts parts;
434             // String xpath;
435             // CLDRFile.StringValue value;
436             // String element;
437             // Map attributes;
438             checkAttributeValidity(item, result, xpathFailures);
439 
440             // now show
441             //String localeName = getLocaleAndName(locale);
442             for (Iterator<String> it3 = result.keySet().iterator(); it3.hasNext();) {
443                 String code = it3.next();
444                 Set<String> avalues = result.get(code);
445                 errln(getLocaleAndName(locale) + "\tillegal attribute value for " + code + ", value:\t" + show(avalues));
446                 Set<String> totalvalues = totalResult.get(code);
447                 if (totalvalues == null) totalResult.put(code, totalvalues = new TreeSet<>());
448                 totalvalues.addAll(avalues);
449             }
450         }
451         for (Iterator<String> it3 = totalResult.keySet().iterator(); it3.hasNext();) {
452             String code = it3.next();
453             Set<String> avalues = totalResult.get(code);
454             errln("All illegal attribute values for " + code + ", value:\t" + show(avalues));
455         }
456     }
457 
458     /**
459      * Tests whether the display names have any collisions, e.g. if in the fully resolved
460      * locale $ is used for both USD and UAD.
461      *
462      */
TestDisplayNameCollisions()463     public void TestDisplayNameCollisions() {
464         if (disableUntilLater("TestDisplayNameCollisions")) return;
465 
466         Map<String, String>[] maps = new HashMap[CLDRFile.LIMIT_TYPES];
467         for (int i = 0; i < maps.length; ++i) {
468             maps[i] = new HashMap<>();
469         }
470         Set<String> collisions = new TreeSet<>();
471         for (Iterator<String> it = locales.iterator(); it.hasNext();) {
472             String locale = it.next();
473             CLDRFile item = cldrFactory.make(locale, true);
474             for (int i = 0; i < maps.length; ++i) {
475                 maps[i].clear();
476             }
477             collisions.clear();
478 
479             for (Iterator<String> it2 = item.iterator(); it2.hasNext();) {
480                 String xpath = it2.next();
481                 int nameType = CLDRFile.getNameType(xpath);
482                 if (nameType < 0) continue;
483                 String value = item.getStringValue(xpath);
484                 String xpath2 = maps[nameType].get(value);
485                 if (xpath2 == null) {
486                     maps[nameType].put(value, xpath);
487                     continue;
488                 }
489                 collisions.add(CLDRFile.getNameTypeName(nameType) + "\t" + value + "\t" + xpath + "\t" + xpath2);
490                 surveyInfo.add(locale + "\t" + xpath + "\t'" + value + "' is a duplicate of what is in " + xpath2);
491             }
492             String name = getLocaleAndName(locale) + "\t";
493             for (Iterator<String> it2 = collisions.iterator(); it2.hasNext();) {
494                 errln(name + it2.next());
495             }
496         }
497     }
498 
499     /**
500      * Checks the validity of attributes, based on StandardCodes.
501      * The invalid codes are added to badCodes, and the failing xpaths are added to xpathFailures.
502      *
503      * @param item
504      * @param badCodes
505      * @param xpathFailures
506      */
checkAttributeValidity(CLDRFile item, Map<String, Set<String>> badCodes, Set<String> xpathFailures)507     public static void checkAttributeValidity(CLDRFile item, Map<String, Set<String>> badCodes, Set<String> xpathFailures) {
508         for (Iterator<String> it2 = item.iterator(); it2.hasNext();) {
509             String xpath = it2.next();
510             XPathParts parts = XPathParts.getFrozenInstance(item.getFullXPath(xpath));
511             for (int i = 0; i < parts.size(); ++i) {
512                 if (parts.getAttributeCount(i) == 0) {
513                     continue;
514                 }
515                 String element = parts.getElement(i);
516                 Map<String, String> attributes = parts.getAttributes(i);
517                 for (Iterator<String> it3 = attributes.keySet().iterator(); it3.hasNext();) {
518                     String attribute = it3.next();
519                     String avalue = attributes.get(attribute);
520                     checkValidity(xpath, element, attribute, avalue, badCodes, xpathFailures);
521                 }
522             }
523         }
524     }
525 
526     /**
527      * Internal
528      */
show(Collection<String> avalues)529     private String show(Collection<String> avalues) {
530         StringBuffer result = new StringBuffer("{");
531         boolean first = true;
532         for (Iterator<String> it3 = avalues.iterator(); it3.hasNext();) {
533             if (first)
534                 first = false;
535             else
536                 result.append(", ");
537             result.append(it3.next().toString());
538         }
539         result.append("}");
540         return result.toString();
541     }
542 
543     /**
544      * Internal function
545      */
checkValidity(String xpath, String element, String attribute, String avalue, Map<String, Set<String>> results, Set<String> xpathsFailing)546     private static void checkValidity(String xpath, String element, String attribute, String avalue, Map<String, Set<String>> results,
547         Set<String> xpathsFailing) {
548         StandardCodes codes = StandardCodes.make();
549         if (attribute.equals("type")) {
550             boolean checkReplacements = xpath.indexOf("/identity") < 0;
551             if (element.equals("currency"))
552                 checkCodes(xpath, "currency", avalue, codes, results, xpathsFailing, checkReplacements);
553             else if (element.equals("script"))
554                 checkCodes(xpath, "script", avalue, codes, results, xpathsFailing, checkReplacements);
555             else if (element.equals("territory"))
556                 checkCodes(xpath, "territory", avalue, codes, results, xpathsFailing, checkReplacements);
557             else if (element.equals("language"))
558                 checkCodes(xpath, "language", avalue, codes, results, xpathsFailing, checkReplacements);
559             else if (element.equals("zone"))
560                 checkCodes(xpath, "tzid", avalue, codes, results, xpathsFailing, checkReplacements);
561         }
562     }
563 
564     /**
565      * Internal function
566      *
567      * @param checkReplacements
568      *            TODO
569      */
570     private static void checkCodes(String xpath, String code, String avalue, StandardCodes codes, Map<String, Set<String>> results,
571         Set<String> xpathFailures, boolean checkReplacements) {
572         // ok if code is found AND it has no replacement
573         if (codes.getData(code, avalue) != null
574             && (!checkReplacements || codes.getReplacement(code, avalue) == null)) return;
575 
576         if (xpathFailures != null) xpathFailures.add(xpath);
577         if (results == null) return;
578         Set<String> s = results.get(code);
579         if (s == null) {
580             s = new TreeSet<>();
581             results.put(code, s);
582         }
583         s.add(avalue);
584     }
585 
586     /**
587      * Verify that a small set of locales (currently just English) has everything translated.
588      *
589      * @throws IOException
590      */
591     public void TestCompleteLocales() {
592         // just test English for now
593         if (english == null) english = cldrFactory.make("en", true);
594         checkTranslatedCodes(english);
595     }
596 
597     /**
598      * Tests that the file contains codes for all main display name ids: language, script, territory, tzid, currency.
599      */
600     private void checkTranslatedCodes(CLDRFile cldrfile) {
601         StandardCodes codes = StandardCodes.make();
602         checkTranslatedCode(cldrfile, codes, "currency", "//ldml/numbers/currencies/currency", "/displayName");
603         // can't check timezones for English.
604         // checkTranslatedCode(cldrfile, codes, "tzid", "//ldml/dates/timeZoneNames/zone", "");
605         checkTranslatedCode(cldrfile, codes, "language", "//ldml/localeDisplayNames/languages/language", "");
606         checkTranslatedCode(cldrfile, codes, "script", "//ldml/localeDisplayNames/scripts/script", "");
607         checkTranslatedCode(cldrfile, codes, "territory", "//ldml/localeDisplayNames/territories/territory", "");
608         checkTranslatedCode(cldrfile, codes, "variant", "//ldml/localeDisplayNames/variants/variant", "");
609     }
610 
611     private void checkTranslatedCode(CLDRFile cldrfile, StandardCodes codes, String type, String prefix, String postfix) {
612         Map<String, Set<String>> completionExceptions = getCompletionExceptions();
613         Set<String> codeItems = codes.getGoodAvailableCodes(type);
614         int count = 0;
615         Set<String> exceptions = completionExceptions.get(type);
616         for (String code : codeItems) {
617             String rfcname = codes.getData(type, code);
618             // if (rfcname.equals("ZZ")) continue;
619             ++count;
620             if (rfcname.equals("PRIVATE USE")) continue;
621             String fullFragment = prefix + "[@type=\"" + code + "\"]" + postfix;
622             String v = cldrfile.getStringValue(fullFragment);
623             if (v == null) {
624                 errln("Missing translation for:\t<" + type + " type=\"" + code + "\">" + rfcname + "</" + type + ">");
625                 continue;
626             }
627             String translation = v;
628             if (translation.equals(code)) {
629                 if (exceptions != null && exceptions.contains(code)) continue;
630                 errln("Translation = code for:\t<" + type + " type=\"" + code + "\">" + rfcname + "</" + type + ">");
631                 continue;
632             }
633         }
634         logln("Total " + type + ":\t" + count);
635     }
636 
637     private Map<String, Set<String>> theCompletionExceptions = null;
638 
639     private Map<String, Set<String>> getCompletionExceptions() {
640         if (theCompletionExceptions == null) {
641             theCompletionExceptions = new HashMap<>();
642             final Set<String> scriptExceptions = new HashSet<>();
643             scriptExceptions.add("Cham");
644             scriptExceptions.add("Modi");
645             scriptExceptions.add("Thai");
646             scriptExceptions.add("Toto");
647             theCompletionExceptions.put("script", scriptExceptions);
648         }
649         return theCompletionExceptions;
650     }
651 
652     // <territoryContainment><group type="001" contains="002 009 019 142 150"/>
653     // <languageData><language type="af" scripts="Latn" territories="ZA"/>
654     void getSupplementalData(Map<String, Set<String>> language_scripts, Map<String, Set<String>> language_territories,
655         Map<String, Set<String>> group_territory,
656         Map<String, Set<String>> territory_currencies, Map<String, Map<String, String>> aliases) {
657 
658         boolean SHOW = false;
659         Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
660         CLDRFile supp = cldrFactory.make(CLDRFile.SUPPLEMENTAL_NAME, false);
661         for (Iterator<String> it = supp.iterator(); it.hasNext();) {
662             String path = it.next();
663             try {
664                 XPathParts parts = XPathParts.getFrozenInstance(supp.getFullXPath(path));
665                 Map<String, String> m;
666                 String type = "";
667                 if (aliases != null && parts.findElement("alias") >= 0) {
668                     m = parts.findAttributes(type = "languageAlias");
669                     if (m == null) m = parts.findAttributes(type = "territoryAlias");
670                     if (m != null) {
671                         Map top = aliases.get(type);
672                         if (top == null) {
673                             aliases.put(type, top = new TreeMap());
674                         }
675                         top.put(m.get("type"), m.get("replacement"));
676                     }
677                 }
678                 if (territory_currencies != null) {
679                     m = parts.findAttributes("region");
680                     if (m != null) {
681                         String region = m.get("iso3166");
682                         Set s = territory_currencies.get(region);
683                         if (s == null) {
684                             territory_currencies.put(region, s = new LinkedHashSet());
685                         }
686                         m = parts.findAttributes("currency");
687                         if (m == null) {
688                             warnln("missing currency for region: " + path);
689                             continue;
690                         }
691                         String currency = m.get("iso4217");
692                         s.add(currency);
693                         m = parts.findAttributes("alternate");
694                         String alternate = m == null ? null : (String) m.get("iso4217");
695                         if (alternate != null) {
696                             s.add(alternate);
697                         }
698                         continue;
699                     }
700                 }
701                 m = parts.findAttributes("group");
702                 if (m != null) {
703                     if (group_territory == null) continue;
704                     type = m.get("type");
705                     String contains = m.get("contains");
706                     group_territory.put(type, new TreeSet(CldrUtility.splitList(contains, ' ', true)));
707                     continue;
708                 }
709                 m = parts.findAttributes("language");
710                 if (m == null) continue;
711                 String language = m.get("type");
712                 String scripts = m.get("scripts");
713                 if (scripts == null)
714                     language_scripts.put(language, new TreeSet<String>());
715                 else {
716                     language_scripts.put(language, new TreeSet<>(CldrUtility.splitList(scripts, ' ', true)));
717                     if (SHOW)
718                         System.out.println(getIDAndLocalization(language) + "\t\t"
719                             + getIDAndLocalization(language_scripts.get(language)));
720                 }
721                 String territories = m.get("territories");
722                 if (territories == null)
723                     language_territories.put(language, new TreeSet<String>());
724                 else {
725                     language_territories.put(language, new TreeSet<>(CldrUtility.splitList(territories, ' ', true)));
726                     if (SHOW)
727                         System.out.println(getIDAndLocalization(language) + "\t\t"
728                             + getIDAndLocalization(language_territories.get(language)));
729                 }
730             } catch (RuntimeException e) {
731                 throw (IllegalArgumentException) new IllegalArgumentException("Failure with: " + path).initCause(e);
732             }
733         }
734     }
735 
736     /**
737      * Verify that the minimal localizations are present.
738      */
TestMinimalLocalization()739     public void TestMinimalLocalization() throws IOException {
740         if (disableUntilLater("TestMinimalLocalization")) return;
741 
742         boolean testDraft = false;
743         Map<String, Set<String>> language_scripts = new HashMap<>();
744         Map<String, Set<String>> language_territories = new HashMap<>();
745         getSupplementalData(language_scripts, language_territories, null, null, null);
746         LanguageTagParser localIDParser = new LanguageTagParser();
747         // see http://oss.software.ibm.com/cvs/icu/~checkout~/locale/docs/design/minimal_requirements.htm
748         int[] failureCount = new int[1];
749         int[] warningCount = new int[1];
750         for (Iterator<String> it = languageLocales.iterator(); it.hasNext();) {
751             String locale = it.next();
752             if (locale.equals("root")) continue;
753             // if (!locale.equals("zh_Hant")) continue;
754 
755             CLDRFile item = cldrFactory.make(locale, true);
756             if (!testDraft && item.isDraft()) {
757                 logln(getLocaleAndName(locale) + "\tskipping draft");
758                 continue;
759             }
760             UnicodeSet exemplars = getFixedExemplarSet(locale, item);
761             CLDRFile missing = SimpleFactory.makeFile(locale);
762             failureCount[0] = 0;
763             warningCount[0] = 0;
764             localIDParser.set(locale);
765             String language = localIDParser.getLanguage();
766             logln("Testing: " + locale);
767             // languages
768             Set<String> languages = new TreeSet<>(CldrUtility.MINIMUM_LANGUAGES);
769             languages.add(language);
770             // LANGUAGE_NAME = 0, SCRIPT_NAME = 1, TERRITORY_NAME = 2, VARIANT_NAME = 3,
771             // CURRENCY_NAME = 4, CURRENCY_SYMBOL = 5, TZID = 6
772 
773             checkForItems(item, languages, CLDRFile.LANGUAGE_NAME, missing, failureCount, null);
774 
775             /*
776              * checkTranslatedCode(cldrfile, codes, "currency", "//ldml/numbers/currencies/currency");
777              * checkTranslatedCode(cldrfile, codes, "tzid", "//ldml/dates/timeZoneNames/zone");
778              * checkTranslatedCode(cldrfile, codes, "variant", "//ldml/localeDisplayNames/variants/variant");
779              */
780 
781             Set<String> scripts = new TreeSet<>();
782             scripts.add("Latn");
783             Set<String> others = language_scripts.get(language);
784             if (others != null) scripts.addAll(others);
785             checkForItems(item, scripts, CLDRFile.SCRIPT_NAME, missing, failureCount, null);
786 
787             Set<String> countries = new TreeSet<>(CldrUtility.MINIMUM_TERRITORIES);
788             others = language_territories.get(language);
789             if (others != null) countries.addAll(others);
790             checkForItems(item, countries, CLDRFile.TERRITORY_NAME, missing, failureCount, null);
791 
792             Set<String> currencies = new TreeSet<>();
793             StandardCodes sc = StandardCodes.make();
794             for (Iterator<String> it2 = countries.iterator(); it2.hasNext();) {
795                 String country = it2.next();
796                 Set<String> countryCurrencies = sc.getMainCurrencies(country);
797                 if (countryCurrencies == null) {
798                     errln("Internal Error: no currencies for " + country + ", locale: " + locale);
799                 } else {
800                     currencies.addAll(countryCurrencies);
801                 }
802             }
803             checkForItems(item, currencies, CLDRFile.CURRENCY_NAME, missing, failureCount, null);
804             checkForItems(item, currencies, CLDRFile.CURRENCY_SYMBOL, missing, failureCount, exemplars);
805 
806             // context=format and width=wide; context=stand-alone & width=abbreviated
807             Set<String> months = new TreeSet<>();
808             for (int i = 1; i <= 12; ++i)
809                 months.add(i + "");
810             Set<String> days = new TreeSet<>(Arrays.asList(new String[] { "sun", "mon", "tue", "wed", "thu", "fri", "sat" }));
811             for (int i = -7; i < 0; ++i) {
812                 checkForItems(item, (i < -4 ? months : days), i, missing, failureCount, null);
813             }
814 
815             String filename = "missing_" + locale + ".xml";
816             if (failureCount[0] > 0 || warningCount[0] > 0) {
817                 PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY + "missing/", filename);
818                 missing.write(out);
819                 out.close();
820                 // String s = getIDAndLocalization(missing);
821                 String message = "missing localizations, creating file"
822                     + getNormalizedPath(CLDRPaths.GEN_DIRECTORY, "missing", filename);
823                 if (failureCount[0] > 0)
824                     warnln(getLocaleAndName(locale) + "\t" + message);
825                 else
826                     logln(getLocaleAndName(locale) + "\tpossibly " + message);
827             } else {
828                 new File(CLDRPaths.GEN_DIRECTORY + "missing/", filename).delete();
829             }
830         }
831     }
832 
833     /**
834      * Internal
835      */
getDateKey(String monthOrDay, String width, String code)836     private String getDateKey(String monthOrDay, String width, String code) {
837         // String context = width.equals("narrow") ? "format" : "stand-alone";
838         return "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/"
839             + monthOrDay + "s/" + monthOrDay + "Context[@type=\"format\"]/"
840             + monthOrDay + "Width[@type=\"" + width + "\"]/" + monthOrDay
841             + "[@type=\"" + code + "\"]";
842     }
843 
844     /**
845      * Internal
846      */
getDateKey(int type, String code)847     private String getDateKey(int type, String code) {
848         // type is 6..4 for months abbrev..narrow, 3..0 for days short..narrow
849         int monthOrDayType = 0, widthType = type;
850         if (type >= 4) {
851             monthOrDayType = 1;
852             widthType -= 4;
853         }
854         return getDateKey(MONTHORDAYS[monthOrDayType], WIDTHS[widthType], code);
855     }
856 
857     /**
858      * @param item
859      * @param codes
860      * @param missing
861      * @param exemplarTest
862      *            TODO
863      *            TODO
864      */
checkForItems(CLDRFile item, Set<String> codes, int type, CLDRFile missing, int failureCount[], UnicodeSet exemplarTest)865     private void checkForItems(CLDRFile item, Set<String> codes, int type, CLDRFile missing, int failureCount[],
866         UnicodeSet exemplarTest) {
867         // check codes
868         for (Iterator<String> it2 = codes.iterator(); it2.hasNext();) {
869             String code = it2.next();
870             String key;
871             if (type >= 0) {
872                 key = CLDRFile.getKey(type, code);
873             } else {
874                 key = getDateKey(-type - 1, code);
875             }
876             String v = item.getStringValue(key);
877             String rootValue = resolvedRoot.getStringValue(key);
878             if (v == null || v.equals(rootValue) && (exemplarTest == null || !exemplarTest.containsAll(rootValue))) {
879                 String englishValue = resolvedEnglish.getStringValue(key);
880                 String transValue;
881                 if (englishValue != null) {
882                     transValue = englishValue;
883                 } else {
884                     transValue = code;
885                 }
886                 missing.add(key, "TODO " + transValue);
887                 failureCount[0]++;
888             } else {
889                 logln("\t" + code + "\t" + v);
890             }
891         }
892     }
893 
894     /*
895      * void showTestStr() {
896      * LocaleIDParser lparser = new LocaleIDParser();
897      * Collection s = split(teststr,',', true, new ArrayList());
898      * for (Iterator it = s.iterator(); it.hasNext();) {
899      * String item = (String)it.next();
900      * lparser.set(item.replace('?', '_'));
901      * String region = lparser.getRegion();
902      * System.out.print(item.replace('?', '-') + " (" + getLocalization(region) + "), ");
903      * //System.out.print(getLocalization(region) + ", ");
904      * }
905      * }
906      * static String teststr =
907      * "en?AG, en?AI, en?AS, en?AU, en?IN, en?BB, en?BE, en?BM, en?BN, en?BS, en?BW, en?BZ, en?CA, en?CK, en?CM, en?DM, en?ER, en?ET, en?FJ, en?FK, en?FM, en?GB, en?GD, en?GH, en?GI, en?GM, en?GU, en?GY, en?HK, en?IE, en?IL, en?IO, en?JM, en?KE, en?KI, en?KN, en?KY, en?LC, en?LR, en?LS, en?MH, en?MP, en?MS, en?MT, en?MU, en?MW, en?NA, en?NF, en?NG, en?NR, en?NU, en?NZ, en?PG, en?PH, en?PK, en?PN, en?PR, en?PW, en?RW, en?SB, en?SC, en?SG, en?SH, en?SL, en?SO, en?SZ, en?TC, en?TK, en?TO, en?TT, en?UG, en?UM, en?US, en?VC, en?VG, en?VI, en?VU, en?WS, en?ZA, en?ZM, en?ZW"
908      * ;
909      */
910 
911     CldrUtility.CollectionTransform EnglishName = new CldrUtility.CollectionTransform() {
912         @Override
913         public Object transform(Object source) {
914             // TODO Auto-generated method stub
915             return getLocalization(source.toString()) + " (" + source + ")";
916         }
917     };
918 
919     CldrUtility.CollectionTransform EnglishCurrencyName = new CldrUtility.CollectionTransform() {
920         @Override
921         public Object transform(Object source) {
922             if (english == null) english = cldrFactory.make("en", true);
923             return english.getName("currency", source.toString()) + " (" + source + ")";
924         }
925     };
926 
927     /**
928      * Tests that the supplemental data is well-formed.
929      *
930      */
TestSupplementalData()931     public void TestSupplementalData() {
932         Map<String, Set<String>> language_scripts = new TreeMap<>();
933         Map<String, Set<String>> language_territories = new TreeMap<>();
934         Map<String, Set<String>> groups = new TreeMap<>();
935         Map<String, Set<String>> territory_currencies = new TreeMap<>();
936         Map<String, Map<String, String>> aliases = new TreeMap<>();
937         getSupplementalData(language_scripts, language_territories, groups, territory_currencies, aliases);
938         Set<String> sTerritories = new TreeSet<>();
939         for (Iterator<Set<String>> it = language_territories.values().iterator(); it.hasNext();) {
940             sTerritories.addAll(it.next());
941         }
942         StandardCodes sc = StandardCodes.make();
943         Set<String> fullTerritories = sc.getAvailableCodes("territory");
944         Set<String> fullLanguages = sc.getAvailableCodes("language");
945 
946         Set<String> allLanguages = new TreeSet<>(language_scripts.keySet());
947         allLanguages.addAll(language_territories.keySet());
948         for (Iterator<String> it = allLanguages.iterator(); it.hasNext();) {
949             Object language = it.next();
950             Set<String> scripts = language_scripts.get(language);
951             Set<String> territories = language_territories.get(language);
952             logln(EnglishName.transform(language)
953                 + " scripts: " + EnglishName.transform(scripts)
954                 + " territories: " + EnglishName.transform(territories));
955         }
956 
957         Map<String, String> changedLanguage = new TreeMap<>();
958         for (Iterator<String> it = fullLanguages.iterator(); it.hasNext();) {
959             String code = it.next();
960             List<String> data = sc.getFullData("language", code);
961             if (data.size() < 3) {
962                 System.out.println("data problem: " + data);
963                 continue;
964             }
965             String replacement = data.get(2);
966             if (!replacement.equals("")) {
967                 if (!replacement.equals("--")) changedLanguage.put(code, replacement);
968                 continue;
969             }
970         }
971 
972         // remove private use, deprecated, groups
973         Set<String> standardTerritories = new TreeSet<>();
974         Map<String, String> changedTerritory = new TreeMap<>();
975         for (Iterator<String> it = fullTerritories.iterator(); it.hasNext();) {
976             String code = it.next();
977             if (code.equals("200")) continue; // || code.equals("YU") || code.equals("PZ")
978             List<String> data = sc.getFullData("territory", code);
979             if (data.get(0).equals("PRIVATE USE")) continue;
980             if (!data.get(2).equals("")) {
981                 if (!data.get(2).equals("--")) changedTerritory.put(code, data.get(2));
982                 continue;
983             }
984             standardTerritories.add(code);
985         }
986         standardTerritories.removeAll(groups.keySet());
987 
988         if (!standardTerritories.containsAll(sTerritories)) {
989             TreeSet<String> extras = new TreeSet<>(sTerritories);
990             extras.removeAll(standardTerritories);
991             errln("Supplemental Language Territories contain illegal values: " + EnglishName.transform(extras));
992         }
993         if (!sTerritories.containsAll(standardTerritories)) {
994             TreeSet<String> extras = new TreeSet<>(standardTerritories);
995             extras.removeAll(sTerritories);
996             warnln("Missing Language Territories: " + EnglishName.transform(extras));
997         }
998 
999         // now test currencies
1000         logln("Check that no illegal territories are used");
1001         if (!standardTerritories.containsAll(territory_currencies.keySet())) {
1002             TreeSet<String> extras = new TreeSet<>(territory_currencies.keySet());
1003             extras.removeAll(fullTerritories);
1004             if (extras.size() != 0) errln("Currency info -- Illegal Territories: " + EnglishName.transform(extras));
1005             extras = new TreeSet<>(territory_currencies.keySet());
1006             extras.retainAll(fullTerritories);
1007             extras.removeAll(standardTerritories);
1008             if (extras.size() != 0) warnln("Currency info -- Archaic Territories: " + EnglishName.transform(extras));
1009         }
1010         logln("Check that no territories are missing");
1011         if (!territory_currencies.keySet().containsAll(standardTerritories)) {
1012             TreeSet<String> extras = new TreeSet<>(standardTerritories);
1013             extras.removeAll(territory_currencies.keySet());
1014             errln("Currency info -- Missing Territories: " + EnglishName.transform(extras));
1015         }
1016         Set<String> currencies = new TreeSet<>();
1017         for (Iterator<Set<String>> it = territory_currencies.values().iterator(); it.hasNext();) {
1018             currencies.addAll(it.next());
1019         }
1020         logln("Check that no illegal currencies are used");
1021         Set<String> legalCurrencies = new TreeSet<>(sc.getAvailableCodes("currency"));
1022         // first remove non-ISO
1023         for (Iterator<String> it = legalCurrencies.iterator(); it.hasNext();) {
1024             String code = it.next();
1025             List<String> data = sc.getFullData("currency", code);
1026             if ("X".equals(data.get(3))) it.remove();
1027         }
1028         if (!legalCurrencies.containsAll(currencies)) {
1029             TreeSet<String> extras = new TreeSet<>(currencies);
1030             extras.removeAll(legalCurrencies);
1031             errln("Currency info -- Illegal Currencies: " + EnglishCurrencyName.transform(extras));
1032         }
1033         logln("Check that there are no missing currencies");
1034         if (!currencies.containsAll(legalCurrencies)) {
1035             TreeSet<String> extras = new TreeSet<>(legalCurrencies);
1036             extras.removeAll(currencies);
1037             Map<String, Set<String>> failures = new TreeMap<>();
1038             for (Iterator<String> it = extras.iterator(); it.hasNext();) {
1039                 String code = it.next();
1040                 List<String> data = sc.getFullData("currency", code);
1041                 if (data.get(1).equals("ZZ")) continue;
1042                 String type = data.get(3) + "/" + data.get(1);
1043                 Set<String> s = failures.get(type);
1044                 if (s == null) failures.put(type, s = new TreeSet<>());
1045                 s.add(code);
1046             }
1047             for (Iterator<String> it = failures.keySet().iterator(); it.hasNext();) {
1048                 String type = it.next();
1049                 Set<String> s = failures.get(type);
1050                 warnln("Currency info -- Missing Currencies: " + type + "\t \u2192 " + EnglishCurrencyName.transform(s));
1051             }
1052         }
1053         logln("Missing English currency names");
1054         for (Iterator<String> it = legalCurrencies.iterator(); it.hasNext();) {
1055             String currency = it.next();
1056             String name = english.getName("currency", currency);
1057             if (name == null) {
1058                 String standardName = sc.getFullData("currency", currency).get(0);
1059                 logln("\t\t\t<currency type=\"" + currency + "\">");
1060                 logln("\t\t\t\t<displayName>" + standardName + "</displayName>");
1061                 logln("\t\t\t</currency>");
1062             }
1063         }
1064         logln("Check Aliases");
1065         for (Iterator<String> it = aliases.keySet().iterator(); it.hasNext();) {
1066             // the first part of the mapping had better not be in the standardTerritories
1067             String key = it.next();
1068             Map<String, String> submap = aliases.get(key);
1069             if (key.equals("territoryAlias")) {
1070                 checkEqual(key, submap, changedTerritory);
1071             } else if (key.equals("languageAlias")) {
1072                 for (Iterator<String> it2 = submap.keySet().iterator(); it2.hasNext();) {
1073                     String k = it2.next();
1074                     String value = submap.get(k);
1075                     if (value.indexOf("_") >= 0) it2.remove();
1076                 }
1077                 checkEqual(key, submap, changedLanguage);
1078             }
1079         }
1080     }
1081 
1082     /**
1083      *
1084      */
checkEqual(String title, Map map1, Map map2)1085     private void checkEqual(String title, Map map1, Map map2) {
1086         Set foo = new TreeSet(map1.keySet());
1087         foo.removeAll(map2.keySet());
1088         if (!foo.isEmpty()) errln("Extraneous Aliases: " + title + "\t" + foo);
1089         foo = new TreeSet(map2.keySet());
1090         foo.removeAll(map1.keySet());
1091         if (!foo.isEmpty()) errln("Missing Aliases: " + title + "\t" + foo);
1092         foo = map2.keySet();
1093         foo.retainAll(map1.keySet());
1094         for (Iterator it = foo.iterator(); it.hasNext();) {
1095             Object key = it.next();
1096             Object result1 = map1.get(key);
1097             Object result2 = map2.get(key);
1098             if (!result1.equals(result2))
1099                 errln("Missing Aliases: " + title + "\t" + key + "\t" + result1 + " != " + result2);
1100         }
1101     }
1102 
1103     /**
1104      * Test that the zone ids are well-formed.
1105      *
1106      */
TestZones()1107     public void TestZones() {
1108         StandardCodes sc = StandardCodes.make();
1109 
1110         Map<String, String> defaultNames = new TreeMap();
1111         Map<String, String> old_new = sc.getZoneLinkold_new();
1112         Set<String> core = sc.getZoneData().keySet();
1113         logln("Checking for collisions with last field");
1114         for (Iterator<String> it = core.iterator(); it.hasNext();) {
1115             String currentItem = it.next();
1116             String defaultName = TimezoneFormatter.getFallbackName(currentItem);
1117             String fullName = defaultNames.get(defaultName);
1118             if (fullName == null)
1119                 defaultNames.put(defaultName, currentItem);
1120             else {
1121                 errln("Collision between: " + currentItem + " AND " + fullName);
1122             }
1123         }
1124 
1125         logln("Checking that all links are TO canonical zones");
1126         Set<String> s = new TreeSet<>(old_new.values());
1127         s.removeAll(core);
1128         if (s.size() != 0) {
1129             errln("Links go TO zones that are not canonical! " + s);
1130         }
1131 
1132         logln("Checking that no links are FROM canonical zones");
1133         s = new TreeSet<>(core);
1134         s.retainAll(old_new.keySet());
1135         if (s.size() != 0) {
1136             errln("Links go FROM zones that are canonical! " + s);
1137         }
1138 
1139         logln("Checking that the zones with rule data are all canonical");
1140         Set<String> zonesWithRules = sc.getZone_rules().keySet();
1141         s.clear();
1142         s.addAll(zonesWithRules);
1143         s.removeAll(core);
1144         if (s.size() != 0) logln("Zones with rules that are not canonical: " + s);
1145 
1146         logln("Checking that the rule data are all canonical");
1147         s.clear();
1148         s.addAll(core);
1149         s.removeAll(zonesWithRules);
1150         s.removeAll(old_new.keySet());
1151         if (s.size() != 0) logln("Canonical zones that don't have rules or links: " + s);
1152 
1153         for (Iterator<String> it = old_new.keySet().iterator(); it.hasNext();) {
1154             String oldItem = it.next();
1155             logln("old: " + oldItem + "\tnew: " + old_new.get(oldItem));
1156         }
1157         Map<String, Set<String>> new_old = new TreeMap<>();
1158         for (Iterator<String> it = core.iterator(); it.hasNext();) {
1159             new_old.put(it.next(), new TreeSet<String>());
1160         }
1161         for (Iterator<String> it = old_new.keySet().iterator(); it.hasNext();) {
1162             String oldItem = it.next();
1163             String newItem = old_new.get(oldItem);
1164             Set<String> oldItems = new_old.get(newItem);
1165             if (oldItems == null) { // try recursing
1166                 logln("!!!!Skipping " + oldItem + " \u2192 " + newItem);
1167                 continue;
1168                 // new_old.put(oldOne, oldItems = new TreeSet());
1169             }
1170             oldItems.add(oldItem);
1171         }
1172         for (Iterator<String> it = new_old.keySet().iterator(); it.hasNext();) {
1173             String newOne = it.next();
1174             Set<String> oldItems = new_old.get(newOne);
1175             logln(newOne + "\t" + oldItems);
1176         }
1177     }
1178 
TestNarrowForms()1179     public void TestNarrowForms() {
1180         if (disableUntilLater("TestMinimalLocalization")) return;
1181 
1182         for (Iterator<String> it = locales.iterator(); it.hasNext();) {
1183             String locale = it.next();
1184             logln("Testing: " + getLocaleAndName(locale));
1185             BreakIterator bi = BreakIterator.getCharacterInstance(new ULocale(locale));
1186             CLDRFile item = cldrFactory.make(locale, false);
1187             // Walk through all the xpaths, adding to currentValues
1188             // Whenever two values for the same xpath are different, we remove from currentValues, and add to okValues
1189             for (Iterator<String> it2 = item.iterator(); it2.hasNext();) {
1190                 String xpath = it2.next();
1191                 if (xpath.indexOf("[@type=\"narrow\"]") >= 0) {
1192                     String value = item.getStringValue(xpath);
1193                     // logln("\tTesting: " + value + "\t path: " + xpath);
1194                     int end = getXGraphemeClusterBoundary(bi, value, 0);
1195                     if (end == value.length()) continue;
1196                     errln(getLocaleAndName(locale) + "\tillegal narrow value " + value + "\t path: " + xpath);
1197                     surveyInfo.add(locale + "\t" + xpath + "\t'" + value + "' is too wide for a \"narrow\" value.");
1198                 }
1199             }
1200         }
1201     }
1202 
1203     static final UnicodeSet XGRAPHEME = new UnicodeSet("[[:mark:][:grapheme_extend:]]");
1204     static final UnicodeSet DIGIT = new UnicodeSet("[:decimal_number:]");
1205 
getXGraphemeClusterBoundary(BreakIterator bi, String value, int start)1206     private int getXGraphemeClusterBoundary(BreakIterator bi, String value, int start) {
1207         if (value.length() <= 1) return 1;
1208 
1209         bi.setText(value);
1210         if (start != 0) bi.preceding(start + 1); // backup one
1211         int current = bi.next();
1212         // link any digits
1213         if (DIGIT.contains(UTF16.charAt(value, current - 1))) {
1214             current = DIGIT.findIn(value, current, true);
1215         }
1216         // continue collecting any additional characters that are M or grapheme extend
1217         return XGRAPHEME.findIn(value, current, true);
1218     }
1219 }
1220