• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ******************************************************************************
3  * Copyright (C) 2004, International Business Machines Corporation and        *
4  * others. All Rights Reserved.                                               *
5  ******************************************************************************
6  */
7 package org.unicode.cldr.test;
8 
9 import java.io.File;
10 import java.io.IOException;
11 import java.io.PrintWriter;
12 import java.util.Arrays;
13 import java.util.Calendar;
14 import java.util.Collection;
15 import java.util.Date;
16 import java.util.HashMap;
17 import java.util.HashSet;
18 import java.util.Iterator;
19 import java.util.LinkedHashSet;
20 import java.util.List;
21 import java.util.Map;
22 import java.util.Set;
23 import java.util.TreeMap;
24 import java.util.TreeSet;
25 
26 import org.unicode.cldr.draft.FileUtilities;
27 import org.unicode.cldr.test.DisplayAndInputProcessor.NumericType;
28 import org.unicode.cldr.util.CLDRFile;
29 import org.unicode.cldr.util.CLDRPaths;
30 import org.unicode.cldr.util.CldrUtility;
31 import org.unicode.cldr.util.Factory;
32 import org.unicode.cldr.util.LanguageTagParser;
33 import org.unicode.cldr.util.SimpleFactory;
34 import org.unicode.cldr.util.StandardCodes;
35 import org.unicode.cldr.util.TimezoneFormatter;
36 import org.unicode.cldr.util.XPathParts;
37 import org.xml.sax.SAXException;
38 
39 import com.ibm.icu.dev.test.TestFmwk;
40 import com.ibm.icu.text.BreakIterator;
41 import com.ibm.icu.text.DecimalFormat;
42 import com.ibm.icu.text.NumberFormat;
43 import com.ibm.icu.text.UTF16;
44 import com.ibm.icu.text.UnicodeSet;
45 import com.ibm.icu.util.ULocale;
46 
47 /**
48  * Initial version of CLDR tests. Each test is named TextXXX. To run all the tests, use the options
49  * <blockquote>-nothrow</blockquote>
50  * To run a particular set of tests, include their names, like
51  * <blockquote>-nothrow TestForIllegalAttributeValues TestMinimalLocalization</blockquote>
52  * To show more information (logln), add -verbose
53  * <p>
54  * There are some environment variables that can be used with the test. <br>
55  * -DSHOW_FILES=<anything> shows all create/open of files. <br>
56  * -DXML_MATCH=<regular expression> skips all locales that don't match the regular expression <br>
57  * -DXML_MAIN_DIR=<filesystem directory> resets to a different main directory (eg not cldr/common/main. For example,
58  * some of the tools generate into a locale directory like -DXML_MAIN_DIR=C:\Unicode-CVS2\cldr\common\gen\main\ so this
59  * can be used to check that directory. <br>
60  * -DSKIP_DRAFT=<boolean> skips draft locales if <boolean> is a string starting with T or t
61  */
62 public class CLDRTest extends TestFmwk {
63     /**
64      * privates
65      */
66     private static String MATCH;
67     private static String MAIN_DIR;
68     private static boolean SKIP_DRAFT;
69     private Set<String> locales;
70     private Set<String> languageLocales;
71     private Factory cldrFactory;
72     private CLDRFile resolvedRoot;
73     private CLDRFile resolvedEnglish;
74     private final UnicodeSet commonAndInherited = new UnicodeSet(
75         "[[:script=common:][:script=inherited:][:alphabetic=false:]]");
76     private static final String[] WIDTHS = { "narrow", "wide", "abbreviated", "short" };
77     private static final String[] MONTHORDAYS = { "day", "month" };
78     private Map<String, String> localeNameCache = new HashMap<String, String>();
79     private CLDRFile english = null;
80 
81     private Set<String> surveyInfo = new TreeSet<String>();
82 
83     /**
84      * TestFmwk boilerplate
85      */
main(String[] args)86     public static void main(String[] args) throws Exception {
87         MATCH = System.getProperty("XML_MATCH");
88         if (MATCH == null)
89             MATCH = ".*";
90         else
91             System.out.println("Resetting MATCH:" + MATCH);
92         MAIN_DIR = System.getProperty("XML_MAIN_DIR");
93         if (MAIN_DIR == null)
94             MAIN_DIR = CLDRPaths.MAIN_DIRECTORY;
95         else
96             System.out.println("Resetting MAIN_DIR:" + MAIN_DIR);
97         SKIP_DRAFT = System.getProperty("XML_SKIP_DRAFT") != null;
98         if (SKIP_DRAFT) System.out.println("Skipping Draft locales");
99 
100         double deltaTime = System.currentTimeMillis();
101         new CLDRTest().run(args);
102         deltaTime = System.currentTimeMillis() - deltaTime;
103         System.out.println("Seconds: " + deltaTime / 1000);
104 
105     }
106 
TestZZZZHack()107     public void TestZZZZHack() throws IOException {
108         // hack to get file written at the end of run.
109         PrintWriter surveyFile = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "surveyInfo.txt");
110         for (String s : surveyInfo) {
111             surveyFile.println(s);
112         }
113         surveyFile.close();
114     }
115 
116     /**
117      * TestFmwk boilerplate
118      */
CLDRTest()119     public CLDRTest() throws SAXException, IOException {
120         // TODO parameterize the directory and filter
121         cldrFactory = Factory.make(MAIN_DIR, MATCH);
122         // CLDRKey.main(new String[]{"-mde.*"});
123         locales = cldrFactory.getAvailable();
124         languageLocales = cldrFactory.getAvailableLanguages();
125         resolvedRoot = cldrFactory.make("root", true);
126         /*
127          * PrintWriter out = FileUtilities.openUTF8Writer(Utility.GEN_DIRECTORY + "resolved/", "root.xml");
128          * CLDRFile temp = (CLDRFile) resolvedRoot.clone();
129          * temp.write(out);
130          * out.close();
131          */
132         resolvedEnglish = cldrFactory.make("en", true);
133     }
134 
135     /**
136      * Check to make sure that the currency formats are kosher.
137      */
TestCurrencyFormats()138     public void TestCurrencyFormats() {
139         // String decimal = "//ldml/numbers/decimalFormats/decimalFormatLength/decimalFormat[@type=\"standard\"]/";
140         // String currency = "//ldml/numbers/currencyFormats/currencyFormatLength/currencyFormat[@type=\"standard\"]/";
141         for (String locale : locales) {
142             boolean isPOSIX = locale.indexOf("POSIX") >= 0;
143             logln("Testing: " + locale);
144             CLDRFile item = cldrFactory.make(locale, false);
145             for (String xpath : item) {
146                 NumericType type = NumericType.getNumericType(xpath);
147                 if (type == NumericType.NOT_NUMERIC) continue;
148                 String value = item.getStringValue(xpath);
149                 // at this point, we only have currency formats
150                 String pattern = DisplayAndInputProcessor.getCanonicalPattern(value, type, isPOSIX);
151                 if (!pattern.equals(value)) {
152                     String draft = "";
153                     if (item.getFullXPath(xpath).indexOf("[@draft=\"unconfirmed\"]") >= 0) draft = " [draft]";
154                     assertEquals(getLocaleAndName(locale) + draft + " " + type + " pattern incorrect", pattern, value);
155                 }
156             }
157         }
158     }
159 
160     /**
161      * Internal class
162      */
163     private static class ValueCount {
164         int count = 1;
165         String value;
166         String fullxpath;
167     }
168 
169     /**
170      * Verify that if all the children of a language locale do not have the same value for the same key.
171      */
TestCommonChildren()172     public void TestCommonChildren() {
173         if (disableUntilLater("TestCommonChildren")) return;
174 
175         Map<String, ValueCount> currentValues = new TreeMap<String, ValueCount>();
176         Set<String> okValues = new TreeSet<String>();
177 
178         for (String parent : languageLocales) {
179             logln("Testing: " + parent);
180             currentValues.clear();
181             okValues.clear();
182             Set<String> availableWithParent = cldrFactory.getAvailableWithParent(parent, true);
183             for (String locale : availableWithParent) {
184                 logln("\tTesting: " + locale);
185                 CLDRFile item = cldrFactory.make(locale, false);
186                 // Walk through all the xpaths, adding to currentValues
187                 // Whenever two values for the same xpath are different, we remove from currentValues, and add to
188                 // okValues
189                 for (String xpath : item) {
190                     if (okValues.contains(xpath)) continue;
191                     if (xpath.startsWith("//ldml/identity/")) continue; // skip identity elements
192                     String v = item.getStringValue(xpath);
193                     ValueCount last = currentValues.get(xpath);
194                     if (last == null) {
195                         ValueCount vc = new ValueCount();
196                         vc.value = v;
197                         vc.fullxpath = item.getFullXPath(xpath);
198                         currentValues.put(xpath, vc);
199                     } else if (v.equals(last.value)) {
200                         last.count++;
201                     } else {
202                         okValues.add(xpath);
203                         currentValues.remove(xpath);
204                     }
205                 }
206                 // at the end, only the keys left in currentValues are (possibly) faulty
207                 // they are actually bad IFF either
208                 // (a) the count is equal to the total (thus all children are the same), or
209                 // (b) their value is the same as the parent's resolved value (thus all children are the same or the
210                 // same
211                 // as the inherited parent value).
212             }
213             if (currentValues.size() == 0) continue;
214             int size = availableWithParent.size();
215             CLDRFile parentCLDR = cldrFactory.make(parent, true);
216             for (String xpath : currentValues.keySet()) {
217                 ValueCount vc = currentValues.get(xpath);
218                 if (vc.count == size || (vc.value.equals(parentCLDR.getStringValue(xpath))
219                     && vc.fullxpath.equals(parentCLDR.getStringValue(xpath)))) {
220                     String draft = "";
221                     if (vc.fullxpath.indexOf("[@draft=\"unconfirmed\"]") >= 0) draft = " [draft]";
222                     String count = (vc.count == size ? "" : vc.count + "/") + size;
223                     warnln(getLocaleAndName(parent) + draft +
224                         "\tall children (" + count + ") have same value for:\t"
225                         + xpath + ";\t" + vc.value);
226                 }
227             }
228         }
229     }
230 
231     static String[] EXEMPLAR_SKIPS = { "/hourFormat", "/exemplarCharacters", "/pattern", "/localizedPatternChars" };
232 
233     /**
234      * Check that the exemplars include all characters in the data.
235      */
TestThatExemplarsContainAll()236     public void TestThatExemplarsContainAll() {
237         UnicodeSet allExemplars = new UnicodeSet();
238         if (disableUntilLater("TestThatExemplarsContainAll")) return;
239         Set<String> counts = new TreeSet<String>();
240         int totalCount = 0;
241         UnicodeSet localeMissing = new UnicodeSet();
242         for (String locale : locales) {
243             if (locale.equals("root")) continue;
244             CLDRFile resolved = cldrFactory.make(locale, false); // FIX LATER
245             UnicodeSet exemplars = getFixedExemplarSet(locale, resolved);
246             CLDRFile plain = cldrFactory.make(locale, false);
247             int count = 0;
248             localeMissing.clear();
249             file: for (String xpath : plain) {
250                 for (int i = 0; i < EXEMPLAR_SKIPS.length; ++i) {
251                     if (xpath.indexOf(EXEMPLAR_SKIPS[i]) > 0) continue file; // skip some items.
252                 }
253                 if (SKIP_DRAFT) {
254                     String fullxpath = plain.getFullXPath(xpath);
255                     if (fullxpath.indexOf("[@draft=\"unconfirmed\"") > 0) continue;
256                 }
257                 if (xpath.startsWith("//ldml/posix/messages")) continue;
258                 String value = plain.getStringValue(xpath);
259                 allExemplars.addAll(value);
260                 if (!exemplars.containsAll(value)) {
261                     count++;
262                     UnicodeSet missing = new UnicodeSet().addAll(value).removeAll(exemplars);
263                     localeMissing.addAll(missing);
264                     logln(getLocaleAndName(locale) + "\t" + xpath + "\t<" + value + "> contains " + missing
265                         + ", not in exemplars");
266                     surveyInfo.add(locale + "\t" + xpath + "\t'" + value + "' contains characters "
267                         + missing.toPattern(false) + ", which are not in exemplars");
268                 }
269             }
270             NumberFormat nf = new DecimalFormat("000");
271             if (count != 0) {
272                 totalCount += count;
273                 counts.add(nf.format(count) + "\t" + getLocaleAndName(locale) + "\t" + localeMissing);
274             }
275             if (localeMissing.size() != 0) {
276                 errln(getLocaleAndName(locale) + "\t uses " + localeMissing + ", not in exemplars");
277             }
278         }
279         for (String c : counts) {
280             logln(c);
281         }
282         logln("Total Count: " + totalCount);
283         System.out.println("All exemplars: " + allExemplars.toPattern(true));
284     }
285 
286     // Get Date-Time in milliseconds
getDateTimeinMillis(int year, int month, int date)287     private static long getDateTimeinMillis(int year, int month, int date) {
288         Calendar cal = Calendar.getInstance();
289         cal.set(year, month, date);
290         return cal.getTimeInMillis();
291     }
292 
293     static final long disableDate = getDateTimeinMillis(2005, 6 - 1, 3);
294 
295     /**
296      *
297      */
disableUntilLater(String string)298     private boolean disableUntilLater(String string) {
299         if (new Date().getTime() >= disableDate) return false;
300         warnln("Disabling " + string + " until " + new Date(disableDate));
301         return true;
302     }
303 
304     /**
305      * Internal
306      */
getFixedExemplarSet(String locale, CLDRFile cldrfile)307     private UnicodeSet getFixedExemplarSet(String locale, CLDRFile cldrfile) {
308         UnicodeSet exemplars = getExemplarSet(cldrfile, "");
309         if (exemplars.size() == 0) {
310             errln(getLocaleAndName(locale) + " has empty exemplar set");
311         }
312         exemplars.addAll(getExemplarSet(cldrfile, "standard"));
313         UnicodeSet auxiliary = getExemplarSet(cldrfile, "auxiliary");
314         if (exemplars.containsSome(auxiliary)) {
315             errln(getLocaleAndName(locale) + "Auxiliary & main exemplars should be disjoint, but overlap with " +
316                 new UnicodeSet(exemplars).retainAll(auxiliary) +
317                 ": change auxiliary to " + auxiliary.removeAll(exemplars));
318         }
319         exemplars.addAll(auxiliary);
320         exemplars.addAll(commonAndInherited);
321         return exemplars;
322     }
323 
324     /**
325      * @return Gets an exemplar set. Also verifies that the set contains no properties.
326      */
getExemplarSet(CLDRFile cldrfile, String type)327     public UnicodeSet getExemplarSet(CLDRFile cldrfile, String type) {
328         if (type.length() != 0) type = "[@type=\"" + type + "\"]";
329         String v = cldrfile.getStringValue("//ldml/characters/exemplarCharacters" + type);
330         if (v == null) return new UnicodeSet();
331         String pattern = v;
332         if (pattern.indexOf("[:") >= 0 || pattern.indexOf("\\p{") > 0) {
333             errln(getLocaleName(cldrfile.getLocaleID()) + " exemplar pattern contains property: " + pattern);
334         }
335         try {
336             UnicodeSet result = new UnicodeSet(v, UnicodeSet.CASE);
337             result.remove(0x20);
338             return result;
339         } catch (RuntimeException e) {
340             e.printStackTrace();
341             errln(getLocaleAndName(cldrfile.getLocaleID()) + " has illegal exemplar set: <" + v + ">");
342             return new UnicodeSet();
343         }
344         // if (type.length() != 0) System.out.println("fetched set for " + type);
345     }
346 
getLocaleAndName(String locale)347     public String getLocaleAndName(String locale) {
348         return locale + " (" + getLocaleName(locale) + ")";
349     }
350 
351     /**
352      * @return the ID plus its localization (for language, script, and territory IDs only)
353      */
getIDAndLocalization(String id)354     public String getIDAndLocalization(String id) {
355         return id + " " + getLocalization(id);
356     }
357 
358     /**
359      * @return the localization (for language, script, and territory IDs only)
360      */
getLocalization(String id)361     public String getLocalization(String id) {
362         if (english == null) english = cldrFactory.make("en", true);
363         if (id.length() == 0) return "?";
364         // pick on basis of case
365         char ch = id.charAt(0);
366         if ('a' <= ch && ch <= 'z') return getName(english, "languages/language", id);
367         if (id.length() == 4 && 'A' <= ch && ch <= 'Z') return getName(english, "scripts/script", id);
368         return getName(english, "territories/territory", id);
369     }
370 
371     /**
372      * Internal
373      */
getIDAndLocalization(Set<String> missing)374     private String getIDAndLocalization(Set<String> missing) {
375         StringBuffer buffer = new StringBuffer();
376         for (String next : missing) {
377             if (buffer.length() != 0) buffer.append("; ");
378             buffer.append(getIDAndLocalization(next));
379         }
380         return buffer.toString();
381     }
382 
getLocaleName(String locale)383     public String getLocaleName(String locale) {
384         String name = localeNameCache.get(locale);
385         if (name != null) return name;
386         if (english == null) english = cldrFactory.make("en", true);
387         String result = english.getName(locale);
388         /*
389          * Collection c = Utility.splitList(locale, '_', false, null);
390          * String[] pieces = new String[c.size()];
391          * c.toArray(pieces);
392          * int i = 0;
393          * String result = getName(english, "languages/language", pieces[i++]);
394          * if (pieces[i].length() == 0) return result;
395          * if (pieces[i].length() == 4) {
396          * result += " " + getName(english, "scripts/script", pieces[i++]);
397          * }
398          * if (pieces[i].length() == 0) return result;
399          * result += " " + getName(english, "territories/territory", pieces[i++]);
400          * if (pieces[i].length() == 0) return result;
401          * result += " " + getName(english, "variant/variants", pieces[i++]);
402          */
403         localeNameCache.put(locale, result);
404         return result;
405     }
406 
407     /**
408      * Internal
409      */
getName(CLDRFile english, String kind, String type)410     private String getName(CLDRFile english, String kind, String type) {
411         String v = english.getStringValue("//ldml/localeDisplayNames/" + kind + "[@type=\"" + type + "\"]");
412         if (v == null) return "<" + type + ">";
413         return v;
414     }
415 
416     /**
417      * Make sure we are only using attribute values that are in RFC3066bis, the Olson database (with aliases removed)
418      * or ISO 4217
419      *
420      * @throws IOException
421      */
TestForIllegalAttributeValues()422     public void TestForIllegalAttributeValues() {
423         // check for illegal attribute values that are not in the DTD
424         Map<String, Set<String>> result = new TreeMap<String, Set<String>>();
425         Map<String, Set<String>> totalResult = new TreeMap<String, Set<String>>();
426         for (String locale : locales) {
427             logln("Testing: " + locale);
428             CLDRFile item = cldrFactory.make(locale, false);
429             result.clear();
430             Set<String> xpathFailures = null; // don't collect
431             // XPathParts parts;
432             // String xpath;
433             // CLDRFile.StringValue value;
434             // String element;
435             // Map attributes;
436             checkAttributeValidity(item, result, xpathFailures);
437 
438             // now show
439             //String localeName = getLocaleAndName(locale);
440             for (Iterator<String> it3 = result.keySet().iterator(); it3.hasNext();) {
441                 String code = it3.next();
442                 Set<String> avalues = result.get(code);
443                 errln(getLocaleAndName(locale) + "\tillegal attribute value for " + code + ", value:\t" + show(avalues));
444                 Set<String> totalvalues = totalResult.get(code);
445                 if (totalvalues == null) totalResult.put(code, totalvalues = new TreeSet<String>());
446                 totalvalues.addAll(avalues);
447             }
448         }
449         for (Iterator<String> it3 = totalResult.keySet().iterator(); it3.hasNext();) {
450             String code = it3.next();
451             Set<String> avalues = totalResult.get(code);
452             errln("All illegal attribute values for " + code + ", value:\t" + show(avalues));
453         }
454     }
455 
456     /**
457      * Tests whether the display names have any collisions, e.g. if in the fully resolved
458      * locale $ is used for both USD and UAD.
459      *
460      */
TestDisplayNameCollisions()461     public void TestDisplayNameCollisions() {
462         if (disableUntilLater("TestDisplayNameCollisions")) return;
463 
464         Map<String, String>[] maps = new HashMap[CLDRFile.LIMIT_TYPES];
465         for (int i = 0; i < maps.length; ++i)
466             maps[i] = new HashMap<String, String>();
467         Set<String> collisions = new TreeSet<String>();
468         for (Iterator<String> it = locales.iterator(); it.hasNext();) {
469             String locale = it.next();
470             CLDRFile item = cldrFactory.make(locale, true);
471             for (int i = 0; i < maps.length; ++i)
472                 maps[i].clear();
473             collisions.clear();
474 
475             for (Iterator<String> it2 = item.iterator(); it2.hasNext();) {
476                 String xpath = it2.next();
477                 int nameType = CLDRFile.getNameType(xpath);
478                 if (nameType < 0) continue;
479                 String value = item.getStringValue(xpath);
480                 String xpath2 = maps[nameType].get(value);
481                 if (xpath2 == null) {
482                     maps[nameType].put(value, xpath);
483                     continue;
484                 }
485                 collisions.add(CLDRFile.getNameTypeName(nameType) + "\t" + value + "\t" + xpath + "\t" + xpath2);
486                 surveyInfo.add(locale + "\t" + xpath + "\t'" + value + "' is a duplicate of what is in " + xpath2);
487             }
488             String name = getLocaleAndName(locale) + "\t";
489             for (Iterator<String> it2 = collisions.iterator(); it2.hasNext();) {
490                 errln(name + it2.next());
491             }
492         }
493     }
494 
495     /**
496      * Checks the validity of attributes, based on StandardCodes.
497      * The invalid codes are added to badCodes, and the failing xpaths are added to xpathFailures.
498      *
499      * @param item
500      * @param badCodes
501      * @param xpathFailures
502      */
checkAttributeValidity(CLDRFile item, Map<String, Set<String>> badCodes, Set<String> xpathFailures)503     public static void checkAttributeValidity(CLDRFile item, Map<String, Set<String>> badCodes, Set<String> xpathFailures) {
504         XPathParts parts = new XPathParts(null, null);
505         for (Iterator<String> it2 = item.iterator(); it2.hasNext();) {
506             String xpath = it2.next();
507             parts.set(item.getFullXPath(xpath));
508             for (int i = 0; i < parts.size(); ++i) {
509                 if (parts.getAttributeCount(i) == 0) continue;
510                 String element = parts.getElement(i);
511                 Map<String, String> attributes = parts.getAttributes(i);
512                 for (Iterator<String> it3 = attributes.keySet().iterator(); it3.hasNext();) {
513                     String attribute = it3.next();
514                     String avalue = attributes.get(attribute);
515                     checkValidity(xpath, element, attribute, avalue, badCodes, xpathFailures);
516                 }
517             }
518         }
519     }
520 
521     /**
522      * Internal
523      */
show(Collection<String> avalues)524     private String show(Collection<String> avalues) {
525         StringBuffer result = new StringBuffer("{");
526         boolean first = true;
527         for (Iterator<String> it3 = avalues.iterator(); it3.hasNext();) {
528             if (first)
529                 first = false;
530             else
531                 result.append(", ");
532             result.append(it3.next().toString());
533         }
534         result.append("}");
535         return result.toString();
536     }
537 
538     /**
539      * Internal function
540      */
checkValidity(String xpath, String element, String attribute, String avalue, Map<String, Set<String>> results, Set<String> xpathsFailing)541     private static void checkValidity(String xpath, String element, String attribute, String avalue, Map<String, Set<String>> results,
542         Set<String> xpathsFailing) {
543         StandardCodes codes = StandardCodes.make();
544         if (attribute.equals("type")) {
545             boolean checkReplacements = xpath.indexOf("/identity") < 0;
546             if (element.equals("currency"))
547                 checkCodes(xpath, "currency", avalue, codes, results, xpathsFailing, checkReplacements);
548             else if (element.equals("script"))
549                 checkCodes(xpath, "script", avalue, codes, results, xpathsFailing, checkReplacements);
550             else if (element.equals("territory"))
551                 checkCodes(xpath, "territory", avalue, codes, results, xpathsFailing, checkReplacements);
552             else if (element.equals("language"))
553                 checkCodes(xpath, "language", avalue, codes, results, xpathsFailing, checkReplacements);
554             else if (element.equals("zone"))
555                 checkCodes(xpath, "tzid", avalue, codes, results, xpathsFailing, checkReplacements);
556         }
557     }
558 
559     /**
560      * Internal function
561      *
562      * @param checkReplacements
563      *            TODO
564      */
565     private static void checkCodes(String xpath, String code, String avalue, StandardCodes codes, Map<String, Set<String>> results,
566         Set<String> xpathFailures, boolean checkReplacements) {
567         // ok if code is found AND it has no replacement
568         if (codes.getData(code, avalue) != null
569             && (!checkReplacements || codes.getReplacement(code, avalue) == null)) return;
570 
571         if (xpathFailures != null) xpathFailures.add(xpath);
572         if (results == null) return;
573         Set<String> s = results.get(code);
574         if (s == null) {
575             s = new TreeSet<String>();
576             results.put(code, s);
577         }
578         s.add(avalue);
579     }
580 
581     /**
582      * Verify that a small set of locales (currently just English) has everything translated.
583      *
584      * @throws IOException
585      */
586     public void TestCompleteLocales() {
587         // just test English for now
588         if (english == null) english = cldrFactory.make("en", true);
589         checkTranslatedCodes(english);
590     }
591 
592     /**
593      * Tests that the file contains codes for all main display name ids: language, script, territory, tzid, currency.
594      */
595     private void checkTranslatedCodes(CLDRFile cldrfile) {
596         StandardCodes codes = StandardCodes.make();
597         checkTranslatedCode(cldrfile, codes, "currency", "//ldml/numbers/currencies/currency", "/displayName");
598         // can't check timezones for English.
599         // checkTranslatedCode(cldrfile, codes, "tzid", "//ldml/dates/timeZoneNames/zone", "");
600         checkTranslatedCode(cldrfile, codes, "language", "//ldml/localeDisplayNames/languages/language", "");
601         checkTranslatedCode(cldrfile, codes, "script", "//ldml/localeDisplayNames/scripts/script", "");
602         checkTranslatedCode(cldrfile, codes, "territory", "//ldml/localeDisplayNames/territories/territory", "");
603         checkTranslatedCode(cldrfile, codes, "variant", "//ldml/localeDisplayNames/variants/variant", "");
604     }
605 
606     /**
607      * @param codes
608      * @param type
609      * @param prefix
610      * @param postfix
611      *            TODO
612      */
613     private void checkTranslatedCode(CLDRFile cldrfile, StandardCodes codes, String type, String prefix, String postfix) {
614 
615         // TODO, expand to other languages
616         Map<String, Set<String>> completionExceptions = new HashMap<String, Set<String>>();
617         Set<String> scriptExceptions = new HashSet<String>();
618         scriptExceptions.add("Cham");
619         scriptExceptions.add("Thai");
620         completionExceptions.put("script", scriptExceptions);
621 
622         Set<String> codeItems = codes.getGoodAvailableCodes(type);
623         int count = 0;
624         Set<String> exceptions = completionExceptions.get(type);
625         for (String code : codeItems) {
626             String rfcname = codes.getData(type, code);
627             // if (rfcname.equals("ZZ")) continue;
628             ++count;
629             if (rfcname.equals("PRIVATE USE")) continue;
630             String fullFragment = prefix + "[@type=\"" + code + "\"]" + postfix;
631             String v = cldrfile.getStringValue(fullFragment);
632             if (v == null) {
633                 errln("Missing translation for:\t<" + type + " type=\"" + code + "\">" + rfcname + "</" + type + ">");
634                 continue;
635             }
636             String translation = v;
637             if (translation.equals(code)) {
638                 if (exceptions != null && exceptions.contains(code)) continue;
639                 errln("Translation = code for:\t<" + type + " type=\"" + code + "\">" + rfcname + "</" + type + ">");
640                 continue;
641             }
642             if (false && !translation.equalsIgnoreCase(rfcname)) {
643                 warnln(type + " translation differs from RFC, check: " + code + "\trfc: " + rfcname + "\tcldr: "
644                     + translation);
645             }
646         }
647         logln("Total " + type + ":\t" + count);
648     }
649 
650     // <territoryContainment><group type="001" contains="002 009 019 142 150"/>
651     // <languageData><language type="af" scripts="Latn" territories="ZA"/>
652     void getSupplementalData(Map<String, Set<String>> language_scripts, Map<String, Set<String>> language_territories,
653         Map<String, Set<String>> group_territory,
654         Map<String, Set<String>> territory_currencies, Map<String, Map<String, String>> aliases) {
655         boolean SHOW = false;
656         Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
657         CLDRFile supp = cldrFactory.make(CLDRFile.SUPPLEMENTAL_NAME, false);
658         XPathParts parts = new XPathParts(new UTF16.StringComparator(), null);
659         for (Iterator<String> it = supp.iterator(); it.hasNext();) {
660             String path = it.next();
661             try {
662                 parts.set(supp.getFullXPath(path));
663                 Map<String, String> m;
664                 String type = "";
665                 if (aliases != null && parts.findElement("alias") >= 0) {
666                     m = parts.findAttributes(type = "languageAlias");
667                     if (m == null) m = parts.findAttributes(type = "territoryAlias");
668                     if (m != null) {
669                         Map top = aliases.get(type);
670                         if (top == null) aliases.put(type, top = new TreeMap());
671                         top.put(m.get("type"), m.get("replacement"));
672                     }
673                 }
674                 if (territory_currencies != null) {
675                     m = parts.findAttributes("region");
676                     if (m != null) {
677                         String region = m.get("iso3166");
678                         Set s = territory_currencies.get(region);
679                         if (s == null) territory_currencies.put(region, s = new LinkedHashSet());
680                         m = parts.findAttributes("currency");
681                         if (m == null) {
682                             warnln("missing currency for region: " + path);
683                             continue;
684                         }
685                         String currency = m.get("iso4217");
686                         s.add(currency);
687                         m = parts.findAttributes("alternate");
688                         String alternate = m == null ? null : (String) m.get("iso4217");
689                         if (alternate != null) s.add(alternate);
690                         continue;
691                     }
692                 }
693                 m = parts.findAttributes("group");
694                 if (m != null) {
695                     if (group_territory == null) continue;
696                     type = m.get("type");
697                     String contains = m.get("contains");
698                     group_territory.put(type, new TreeSet(CldrUtility.splitList(contains, ' ', true)));
699                     continue;
700                 }
701                 m = parts.findAttributes("language");
702                 if (m == null) continue;
703                 String language = m.get("type");
704                 String scripts = m.get("scripts");
705                 if (scripts == null)
706                     language_scripts.put(language, new TreeSet<String>());
707                 else {
708                     language_scripts.put(language, new TreeSet<String>(CldrUtility.splitList(scripts, ' ', true)));
709                     if (SHOW)
710                         System.out.println(getIDAndLocalization(language) + "\t\t"
711                             + getIDAndLocalization(language_scripts.get(language)));
712                 }
713                 String territories = m.get("territories");
714                 if (territories == null)
715                     language_territories.put(language, new TreeSet<String>());
716                 else {
717                     language_territories.put(language, new TreeSet<String>(CldrUtility.splitList(territories, ' ', true)));
718                     if (SHOW)
719                         System.out.println(getIDAndLocalization(language) + "\t\t"
720                             + getIDAndLocalization(language_territories.get(language)));
721                 }
722             } catch (RuntimeException e) {
723                 throw (IllegalArgumentException) new IllegalArgumentException("Failure with: " + path).initCause(e);
724             }
725         }
726     }
727 
728     /**
729      * Verify that the minimal localizations are present.
730      */
TestMinimalLocalization()731     public void TestMinimalLocalization() throws IOException {
732         if (disableUntilLater("TestMinimalLocalization")) return;
733 
734         boolean testDraft = false;
735         Map<String, Set<String>> language_scripts = new HashMap<String, Set<String>>();
736         Map<String, Set<String>> language_territories = new HashMap<String, Set<String>>();
737         getSupplementalData(language_scripts, language_territories, null, null, null);
738         LanguageTagParser localIDParser = new LanguageTagParser();
739         // see http://oss.software.ibm.com/cvs/icu/~checkout~/locale/docs/design/minimal_requirements.htm
740         int[] failureCount = new int[1];
741         int[] warningCount = new int[1];
742         for (Iterator<String> it = languageLocales.iterator(); it.hasNext();) {
743             String locale = it.next();
744             if (locale.equals("root")) continue;
745             // if (!locale.equals("zh_Hant")) continue;
746 
747             CLDRFile item = cldrFactory.make(locale, true);
748             if (!testDraft && item.isDraft()) {
749                 logln(getLocaleAndName(locale) + "\tskipping draft");
750                 continue;
751             }
752             UnicodeSet exemplars = getFixedExemplarSet(locale, item);
753             CLDRFile missing = SimpleFactory.makeFile(locale);
754             failureCount[0] = 0;
755             warningCount[0] = 0;
756             localIDParser.set(locale);
757             String language = localIDParser.getLanguage();
758             logln("Testing: " + locale);
759             // languages
760             Set<String> languages = new TreeSet<String>(CldrUtility.MINIMUM_LANGUAGES);
761             languages.add(language);
762             // LANGUAGE_NAME = 0, SCRIPT_NAME = 1, TERRITORY_NAME = 2, VARIANT_NAME = 3,
763             // CURRENCY_NAME = 4, CURRENCY_SYMBOL = 5, TZID = 6
764 
765             checkForItems(item, languages, CLDRFile.LANGUAGE_NAME, missing, failureCount, null);
766 
767             /*
768              * checkTranslatedCode(cldrfile, codes, "currency", "//ldml/numbers/currencies/currency");
769              * checkTranslatedCode(cldrfile, codes, "tzid", "//ldml/dates/timeZoneNames/zone");
770              * checkTranslatedCode(cldrfile, codes, "variant", "//ldml/localeDisplayNames/variants/variant");
771              */
772 
773             Set<String> scripts = new TreeSet<String>();
774             scripts.add("Latn");
775             Set<String> others = language_scripts.get(language);
776             if (others != null) scripts.addAll(others);
777             checkForItems(item, scripts, CLDRFile.SCRIPT_NAME, missing, failureCount, null);
778 
779             Set<String> countries = new TreeSet<String>(CldrUtility.MINIMUM_TERRITORIES);
780             others = language_territories.get(language);
781             if (others != null) countries.addAll(others);
782             checkForItems(item, countries, CLDRFile.TERRITORY_NAME, missing, failureCount, null);
783 
784             Set<String> currencies = new TreeSet<String>();
785             StandardCodes sc = StandardCodes.make();
786             for (Iterator<String> it2 = countries.iterator(); it2.hasNext();) {
787                 String country = it2.next();
788                 Set<String> countryCurrencies = sc.getMainCurrencies(country);
789                 if (countryCurrencies == null) {
790                     errln("Internal Error: no currencies for " + country + ", locale: " + locale);
791                 } else {
792                     currencies.addAll(countryCurrencies);
793                 }
794             }
795             checkForItems(item, currencies, CLDRFile.CURRENCY_NAME, missing, failureCount, null);
796             checkForItems(item, currencies, CLDRFile.CURRENCY_SYMBOL, missing, failureCount, exemplars);
797 
798             // context=format and width=wide; context=stand-alone & width=abbreviated
799             Set<String> months = new TreeSet<String>();
800             for (int i = 1; i <= 12; ++i)
801                 months.add(i + "");
802             Set<String> days = new TreeSet<String>(Arrays.asList(new String[] { "sun", "mon", "tue", "wed", "thu", "fri", "sat" }));
803             for (int i = -7; i < 0; ++i) {
804                 checkForItems(item, (i < -4 ? months : days), i, missing, failureCount, null);
805             }
806 
807             String filename = "missing_" + locale + ".xml";
808             if (failureCount[0] > 0 || warningCount[0] > 0) {
809                 PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY + "missing/", filename);
810                 missing.write(out);
811                 out.close();
812                 // String s = getIDAndLocalization(missing);
813                 String message = "missing localizations, creating file"
814                     + new File(CLDRPaths.GEN_DIRECTORY + "missing/", filename).getCanonicalPath();
815                 if (failureCount[0] > 0)
816                     warnln(getLocaleAndName(locale) + "\t" + message);
817                 else
818                     logln(getLocaleAndName(locale) + "\tpossibly " + message);
819             } else {
820                 new File(CLDRPaths.GEN_DIRECTORY + "missing/", filename).delete();
821             }
822         }
823     }
824 
825     /**
826      * Internal
827      */
getDateKey(String monthOrDay, String width, String code)828     private String getDateKey(String monthOrDay, String width, String code) {
829         // String context = width.equals("narrow") ? "format" : "stand-alone";
830         return "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/"
831             + monthOrDay + "s/" + monthOrDay + "Context[@type=\"format\"]/"
832             + monthOrDay + "Width[@type=\"" + width + "\"]/" + monthOrDay
833             + "[@type=\"" + code + "\"]";
834     }
835 
836     /**
837      * Internal
838      */
getDateKey(int type, String code)839     private String getDateKey(int type, String code) {
840         // type is 6..4 for months abbrev..narrow, 3..0 for days short..narrow
841         int monthOrDayType = 0, widthType = type;
842         if (type >= 4) {
843             monthOrDayType = 1;
844             widthType -= 4;
845         }
846         return getDateKey(MONTHORDAYS[monthOrDayType], WIDTHS[widthType], code);
847     }
848 
849     /**
850      * @param item
851      * @param codes
852      * @param missing
853      * @param exemplarTest
854      *            TODO
855      *            TODO
856      */
checkForItems(CLDRFile item, Set<String> codes, int type, CLDRFile missing, int failureCount[], UnicodeSet exemplarTest)857     private void checkForItems(CLDRFile item, Set<String> codes, int type, CLDRFile missing, int failureCount[],
858         UnicodeSet exemplarTest) {
859         // check codes
860         for (Iterator<String> it2 = codes.iterator(); it2.hasNext();) {
861             String code = it2.next();
862             String key;
863             if (type >= 0) {
864                 key = CLDRFile.getKey(type, code);
865             } else {
866                 key = getDateKey(-type - 1, code);
867             }
868             String v = item.getStringValue(key);
869             String rootValue = resolvedRoot.getStringValue(key);
870             if (v == null || v.equals(rootValue) && (exemplarTest == null || !exemplarTest.containsAll(rootValue))) {
871                 String englishValue = resolvedEnglish.getStringValue(key);
872                 String transValue;
873                 if (englishValue != null) {
874                     transValue = englishValue;
875                 } else {
876                     transValue = code;
877                 }
878                 missing.add(key, "TODO " + transValue);
879                 failureCount[0]++;
880             } else {
881                 logln("\t" + code + "\t" + v);
882             }
883         }
884     }
885 
886     /*
887      * void showTestStr() {
888      * LocaleIDParser lparser = new LocaleIDParser();
889      * Collection s = split(teststr,',', true, new ArrayList());
890      * for (Iterator it = s.iterator(); it.hasNext();) {
891      * String item = (String)it.next();
892      * lparser.set(item.replace('?', '_'));
893      * String region = lparser.getRegion();
894      * System.out.print(item.replace('?', '-') + " (" + getLocalization(region) + "), ");
895      * //System.out.print(getLocalization(region) + ", ");
896      * }
897      * }
898      * static String teststr =
899      * "en?AG, en?AI, en?AS, en?AU, en?IN, en?BB, en?BE, en?BM, en?BN, en?BS, en?BW, en?BZ, en?CA, en?CK, en?CM, en?DM, en?ER, en?ET, en?FJ, en?FK, en?FM, en?GB, en?GD, en?GH, en?GI, en?GM, en?GU, en?GY, en?HK, en?IE, en?IL, en?IO, en?JM, en?KE, en?KI, en?KN, en?KY, en?LC, en?LR, en?LS, en?MH, en?MP, en?MS, en?MT, en?MU, en?MW, en?NA, en?NF, en?NG, en?NR, en?NU, en?NZ, en?PG, en?PH, en?PK, en?PN, en?PR, en?PW, en?RW, en?SB, en?SC, en?SG, en?SH, en?SL, en?SO, en?SZ, en?TC, en?TK, en?TO, en?TT, en?UG, en?UM, en?US, en?VC, en?VG, en?VI, en?VU, en?WS, en?ZA, en?ZM, en?ZW"
900      * ;
901      */
902 
903     CldrUtility.CollectionTransform EnglishName = new CldrUtility.CollectionTransform() {
904         public Object transform(Object source) {
905             // TODO Auto-generated method stub
906             return getLocalization(source.toString()) + " (" + source + ")";
907         }
908     };
909 
910     CldrUtility.CollectionTransform EnglishCurrencyName = new CldrUtility.CollectionTransform() {
911         public Object transform(Object source) {
912             if (english == null) english = cldrFactory.make("en", true);
913             return english.getName("currency", source.toString()) + " (" + source + ")";
914         }
915     };
916 
917     /**
918      * Tests that the supplemental data is well-formed.
919      *
920      */
TestSupplementalData()921     public void TestSupplementalData() {
922         Map<String, Set<String>> language_scripts = new TreeMap<String, Set<String>>();
923         Map<String, Set<String>> language_territories = new TreeMap<String, Set<String>>();
924         Map<String, Set<String>> groups = new TreeMap<String, Set<String>>();
925         Map<String, Set<String>> territory_currencies = new TreeMap<String, Set<String>>();
926         Map<String, Map<String, String>> aliases = new TreeMap<String, Map<String, String>>();
927         getSupplementalData(language_scripts, language_territories, groups, territory_currencies, aliases);
928         Set<String> sTerritories = new TreeSet<String>();
929         for (Iterator<Set<String>> it = language_territories.values().iterator(); it.hasNext();) {
930             sTerritories.addAll(it.next());
931         }
932         StandardCodes sc = StandardCodes.make();
933         Set<String> fullTerritories = sc.getAvailableCodes("territory");
934         Set<String> fullLanguages = sc.getAvailableCodes("language");
935 
936         Set<String> allLanguages = new TreeSet<String>(language_scripts.keySet());
937         allLanguages.addAll(language_territories.keySet());
938         for (Iterator<String> it = allLanguages.iterator(); it.hasNext();) {
939             Object language = it.next();
940             Set<String> scripts = language_scripts.get(language);
941             Set<String> territories = language_territories.get(language);
942             logln(EnglishName.transform(language)
943                 + " scripts: " + EnglishName.transform(scripts)
944                 + " territories: " + EnglishName.transform(territories));
945         }
946 
947         Map<String, String> changedLanguage = new TreeMap<String, String>();
948         for (Iterator<String> it = fullLanguages.iterator(); it.hasNext();) {
949             String code = it.next();
950             List<String> data = sc.getFullData("language", code);
951             if (data.size() < 3) {
952                 System.out.println("data problem: " + data);
953                 continue;
954             }
955             String replacement = data.get(2);
956             if (!replacement.equals("")) {
957                 if (!replacement.equals("--")) changedLanguage.put(code, replacement);
958                 continue;
959             }
960         }
961 
962         // remove private use, deprecated, groups
963         Set<String> standardTerritories = new TreeSet<String>();
964         Map<String, String> changedTerritory = new TreeMap<String, String>();
965         for (Iterator<String> it = fullTerritories.iterator(); it.hasNext();) {
966             String code = it.next();
967             if (code.equals("200")) continue; // || code.equals("YU") || code.equals("PZ")
968             List<String> data = sc.getFullData("territory", code);
969             if (data.get(0).equals("PRIVATE USE")) continue;
970             if (!data.get(2).equals("")) {
971                 if (!data.get(2).equals("--")) changedTerritory.put(code, data.get(2));
972                 continue;
973             }
974             standardTerritories.add(code);
975         }
976         standardTerritories.removeAll(groups.keySet());
977 
978         if (!standardTerritories.containsAll(sTerritories)) {
979             TreeSet<String> extras = new TreeSet<String>(sTerritories);
980             extras.removeAll(standardTerritories);
981             errln("Supplemental Language Territories contain illegal values: " + EnglishName.transform(extras));
982         }
983         if (!sTerritories.containsAll(standardTerritories)) {
984             TreeSet<String> extras = new TreeSet<String>(standardTerritories);
985             extras.removeAll(sTerritories);
986             warnln("Missing Language Territories: " + EnglishName.transform(extras));
987         }
988 
989         // now test currencies
990         logln("Check that no illegal territories are used");
991         if (!standardTerritories.containsAll(territory_currencies.keySet())) {
992             TreeSet<String> extras = new TreeSet<String>(territory_currencies.keySet());
993             extras.removeAll(fullTerritories);
994             if (extras.size() != 0) errln("Currency info -- Illegal Territories: " + EnglishName.transform(extras));
995             extras = new TreeSet<String>(territory_currencies.keySet());
996             extras.retainAll(fullTerritories);
997             extras.removeAll(standardTerritories);
998             if (extras.size() != 0) warnln("Currency info -- Archaic Territories: " + EnglishName.transform(extras));
999         }
1000         logln("Check that no territories are missing");
1001         if (!territory_currencies.keySet().containsAll(standardTerritories)) {
1002             TreeSet<String> extras = new TreeSet<String>(standardTerritories);
1003             extras.removeAll(territory_currencies.keySet());
1004             errln("Currency info -- Missing Territories: " + EnglishName.transform(extras));
1005         }
1006         Set<String> currencies = new TreeSet<String>();
1007         for (Iterator<Set<String>> it = territory_currencies.values().iterator(); it.hasNext();) {
1008             currencies.addAll(it.next());
1009         }
1010         logln("Check that no illegal currencies are used");
1011         Set<String> legalCurrencies = new TreeSet<String>(sc.getAvailableCodes("currency"));
1012         // first remove non-ISO
1013         for (Iterator<String> it = legalCurrencies.iterator(); it.hasNext();) {
1014             String code = it.next();
1015             List<String> data = sc.getFullData("currency", code);
1016             if ("X".equals(data.get(3))) it.remove();
1017         }
1018         if (!legalCurrencies.containsAll(currencies)) {
1019             TreeSet<String> extras = new TreeSet<String>(currencies);
1020             extras.removeAll(legalCurrencies);
1021             errln("Currency info -- Illegal Currencies: " + EnglishCurrencyName.transform(extras));
1022         }
1023         logln("Check that there are no missing currencies");
1024         if (!currencies.containsAll(legalCurrencies)) {
1025             TreeSet<String> extras = new TreeSet<String>(legalCurrencies);
1026             extras.removeAll(currencies);
1027             Map<String, Set<String>> failures = new TreeMap<String, Set<String>>();
1028             for (Iterator<String> it = extras.iterator(); it.hasNext();) {
1029                 String code = it.next();
1030                 List<String> data = sc.getFullData("currency", code);
1031                 if (data.get(1).equals("ZZ")) continue;
1032                 String type = data.get(3) + "/" + data.get(1);
1033                 Set<String> s = failures.get(type);
1034                 if (s == null) failures.put(type, s = new TreeSet<String>());
1035                 s.add(code);
1036             }
1037             for (Iterator<String> it = failures.keySet().iterator(); it.hasNext();) {
1038                 String type = it.next();
1039                 Set<String> s = failures.get(type);
1040                 warnln("Currency info -- Missing Currencies: " + type + "\t \u2192 " + EnglishCurrencyName.transform(s));
1041             }
1042         }
1043         logln("Missing English currency names");
1044         for (Iterator<String> it = legalCurrencies.iterator(); it.hasNext();) {
1045             String currency = it.next();
1046             String name = english.getName("currency", currency);
1047             if (name == null) {
1048                 String standardName = sc.getFullData("currency", currency).get(0);
1049                 logln("\t\t\t<currency type=\"" + currency + "\">");
1050                 logln("\t\t\t\t<displayName>" + standardName + "</displayName>");
1051                 logln("\t\t\t</currency>");
1052             }
1053         }
1054         logln("Check Aliases");
1055         for (Iterator<String> it = aliases.keySet().iterator(); it.hasNext();) {
1056             // the first part of the mapping had better not be in the standardTerritories
1057             String key = it.next();
1058             Map<String, String> submap = aliases.get(key);
1059             if (key.equals("territoryAlias")) {
1060                 checkEqual(key, submap, changedTerritory);
1061             } else if (key.equals("languageAlias")) {
1062                 for (Iterator<String> it2 = submap.keySet().iterator(); it2.hasNext();) {
1063                     String k = it2.next();
1064                     String value = submap.get(k);
1065                     if (value.indexOf("_") >= 0) it2.remove();
1066                 }
1067                 checkEqual(key, submap, changedLanguage);
1068             }
1069         }
1070     }
1071 
1072     /**
1073      *
1074      */
checkEqual(String title, Map map1, Map map2)1075     private void checkEqual(String title, Map map1, Map map2) {
1076         Set foo = new TreeSet(map1.keySet());
1077         foo.removeAll(map2.keySet());
1078         if (!foo.isEmpty()) errln("Extraneous Aliases: " + title + "\t" + foo);
1079         foo = new TreeSet(map2.keySet());
1080         foo.removeAll(map1.keySet());
1081         if (!foo.isEmpty()) errln("Missing Aliases: " + title + "\t" + foo);
1082         foo = map2.keySet();
1083         foo.retainAll(map1.keySet());
1084         for (Iterator it = foo.iterator(); it.hasNext();) {
1085             Object key = it.next();
1086             Object result1 = map1.get(key);
1087             Object result2 = map2.get(key);
1088             if (!result1.equals(result2))
1089                 errln("Missing Aliases: " + title + "\t" + key + "\t" + result1 + " != " + result2);
1090         }
1091     }
1092 
1093     /**
1094      * Test that the zone ids are well-formed.
1095      *
1096      */
TestZones()1097     public void TestZones() {
1098         StandardCodes sc = StandardCodes.make();
1099 
1100         Map<String, String> defaultNames = new TreeMap();
1101         Map<String, String> old_new = sc.getZoneLinkold_new();
1102         Set<String> core = sc.getZoneData().keySet();
1103         logln("Checking for collisions with last field");
1104         for (Iterator<String> it = core.iterator(); it.hasNext();) {
1105             String currentItem = it.next();
1106             String defaultName = TimezoneFormatter.getFallbackName(currentItem);
1107             String fullName = defaultNames.get(defaultName);
1108             if (fullName == null)
1109                 defaultNames.put(defaultName, currentItem);
1110             else {
1111                 errln("Collision between: " + currentItem + " AND " + fullName);
1112             }
1113         }
1114 
1115         logln("Checking that all links are TO canonical zones");
1116         Set<String> s = new TreeSet<String>(old_new.values());
1117         s.removeAll(core);
1118         if (s.size() != 0) {
1119             errln("Links go TO zones that are not canonical! " + s);
1120         }
1121 
1122         logln("Checking that no links are FROM canonical zones");
1123         s = new TreeSet<String>(core);
1124         s.retainAll(old_new.keySet());
1125         if (s.size() != 0) {
1126             errln("Links go FROM zones that are canonical! " + s);
1127         }
1128 
1129         logln("Checking that the zones with rule data are all canonical");
1130         Set<String> zonesWithRules = sc.getZone_rules().keySet();
1131         s.clear();
1132         s.addAll(zonesWithRules);
1133         s.removeAll(core);
1134         if (s.size() != 0) logln("Zones with rules that are not canonical: " + s);
1135 
1136         logln("Checking that the rule data are all canonical");
1137         s.clear();
1138         s.addAll(core);
1139         s.removeAll(zonesWithRules);
1140         s.removeAll(old_new.keySet());
1141         if (s.size() != 0) logln("Canonical zones that don't have rules or links: " + s);
1142 
1143         for (Iterator<String> it = old_new.keySet().iterator(); it.hasNext();) {
1144             String oldItem = it.next();
1145             logln("old: " + oldItem + "\tnew: " + old_new.get(oldItem));
1146         }
1147         Map<String, Set<String>> new_old = new TreeMap<String, Set<String>>();
1148         for (Iterator<String> it = core.iterator(); it.hasNext();) {
1149             new_old.put(it.next(), new TreeSet<String>());
1150         }
1151         for (Iterator<String> it = old_new.keySet().iterator(); it.hasNext();) {
1152             String oldItem = it.next();
1153             String newItem = old_new.get(oldItem);
1154             Set<String> oldItems = new_old.get(newItem);
1155             if (oldItems == null) { // try recursing
1156                 logln("!!!!Skipping " + oldItem + " \u2192 " + newItem);
1157                 continue;
1158                 // new_old.put(oldOne, oldItems = new TreeSet());
1159             }
1160             oldItems.add(oldItem);
1161         }
1162         for (Iterator<String> it = new_old.keySet().iterator(); it.hasNext();) {
1163             String newOne = it.next();
1164             Set<String> oldItems = new_old.get(newOne);
1165             logln(newOne + "\t" + oldItems);
1166         }
1167     }
1168 
TestNarrowForms()1169     public void TestNarrowForms() {
1170         if (disableUntilLater("TestMinimalLocalization")) return;
1171 
1172         for (Iterator<String> it = locales.iterator(); it.hasNext();) {
1173             String locale = it.next();
1174             logln("Testing: " + getLocaleAndName(locale));
1175             BreakIterator bi = BreakIterator.getCharacterInstance(new ULocale(locale));
1176             CLDRFile item = cldrFactory.make(locale, false);
1177             // Walk through all the xpaths, adding to currentValues
1178             // Whenever two values for the same xpath are different, we remove from currentValues, and add to okValues
1179             for (Iterator<String> it2 = item.iterator(); it2.hasNext();) {
1180                 String xpath = it2.next();
1181                 if (xpath.indexOf("[@type=\"narrow\"]") >= 0) {
1182                     String value = item.getStringValue(xpath);
1183                     // logln("\tTesting: " + value + "\t path: " + xpath);
1184                     int end = getXGraphemeClusterBoundary(bi, value, 0);
1185                     if (end == value.length()) continue;
1186                     errln(getLocaleAndName(locale) + "\tillegal narrow value " + value + "\t path: " + xpath);
1187                     surveyInfo.add(locale + "\t" + xpath + "\t'" + value + "' is too wide for a \"narrow\" value.");
1188                 }
1189             }
1190         }
1191     }
1192 
1193     static final UnicodeSet XGRAPHEME = new UnicodeSet("[[:mark:][:grapheme_extend:]]");
1194     static final UnicodeSet DIGIT = new UnicodeSet("[:decimal_number:]");
1195 
getXGraphemeClusterBoundary(BreakIterator bi, String value, int start)1196     private int getXGraphemeClusterBoundary(BreakIterator bi, String value, int start) {
1197         if (value.length() <= 1) return 1;
1198 
1199         bi.setText(value);
1200         if (start != 0) bi.preceding(start + 1); // backup one
1201         int current = bi.next();
1202         int cp = 0;
1203         // link any digits
1204         if (DIGIT.contains(UTF16.charAt(value, current - 1))) {
1205             current = DIGIT.findIn(value, current, true);
1206         }
1207         // continue collecting any additional characters that are M or grapheme extend
1208         return XGRAPHEME.findIn(value, current, true);
1209     }
1210 }
1211 
1212 /*
1213  * private static final int
1214  * HELP1 = 0,
1215  * HELP2 = 1,
1216  * SOURCEDIR = 2,
1217  * DESTDIR = 3,
1218  * MATCH = 4,
1219  * SKIP = 5,
1220  * TZADIR = 6,
1221  * NONVALIDATING = 7,
1222  * SHOW_DTD = 8,
1223  * TRANSLIT = 9;
1224  * options[SOURCEDIR].value
1225  *
1226  * private static final UOption[] options = {
1227  * UOption.HELP_H(),
1228  * UOption.HELP_QUESTION_MARK(),
1229  * UOption.SOURCEDIR().setDefault("C:\\ICU4C\\locale\\common\\main\\"),
1230  * UOption.DESTDIR().setDefault("C:\\DATA\\GEN\\cldr\\mainCheck\\"),
1231  * UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"),
1232  * UOption.create("skip", 'z', UOption.REQUIRES_ARG).setDefault("zh_(C|S|HK|M).*"),
1233  * UOption.create("tzadir", 't',
1234  * UOption.REQUIRES_ARG).setDefault("C:\\ICU4J\\icu4j\\src\\com\\ibm\\icu\\dev\\tool\\cldr\\"),
1235  * UOption.create("nonvalidating", 'n', UOption.NO_ARG),
1236  * UOption.create("dtd", 'w', UOption.NO_ARG),
1237  * UOption.create("transliterate", 'y', UOption.NO_ARG), };
1238  *
1239  * private static String timeZoneAliasDir = null;
1240  * /
1241  *
1242  * public static void main(String[] args) throws SAXException, IOException {
1243  * UOption.parseArgs(args, options);
1244  * localeList = getMatchingXMLFiles(options[SOURCEDIR].value, options[MATCH].value);
1245  * /*
1246  * log = FileUtilities.openUTF8Writer(options[DESTDIR].value, "log.txt");
1247  * try {
1248  * for (Iterator it = getMatchingXMLFiles(options[SOURCEDIR].value, options[MATCH].value).iterator(); it.hasNext();) {
1249  * String name = (String) it.next();
1250  * for (int i = 0; i <= 1; ++i) {
1251  * boolean resolved = i == 1;
1252  * CLDRKey key = make(name, resolved);
1253  *
1254  * PrintWriter pw = FileUtilities.openUTF8Writer(options[DESTDIR].value, name + (resolved ? "_r" : "") + ".txt");
1255  * write(pw, key);
1256  * pw.close();
1257  *
1258  * }
1259  * }
1260  * } finally {
1261  * log.close();
1262  * System.out.println("Done");
1263  * }
1264  *
1265  *
1266  * <language type="in">Indonesian</language>
1267  * <language type="iw">Hebrew</language>
1268  * <script type="Bali">Balinese</script>
1269  * <script type="Batk">Batak</script>
1270  * <script type="Blis">Blissymbols</script>
1271  * <script type="Brah">Brahmi</script>
1272  * <script type="Bugi">Buginese</script>
1273  * <script type="Cham">Cham</script>
1274  * <script type="Cirt">Cirth</script>
1275  * <script type="Cyrs">Cyrillic (Old Church Slavonic variant)</script>
1276  * <script type="Egyd">Egyptian demotic</script>
1277  * <script type="Egyh">Egyptian hieratic</script>
1278  * <script type="Egyp">Egyptian hieroglyphs</script>
1279  * <script type="Glag">Glagolitic</script>
1280  * <script type="Hmng">Pahawh Hmong</script>
1281  * <script type="Hung">Old Hungarian</script>
1282  * <script type="Inds">Indus (Harappan)</script>
1283  * <script type="Java">Javanese</script>
1284  * <script type="Kali">Kayah Li</script>
1285  * <script type="Khar">Kharoshthi</script>
1286  * <script type="Latf">Latin (Fraktur variant)</script>
1287  * <script type="Latg">Latin (Gaelic variant)</script>
1288  * <script type="Lepc">Lepcha (Rong)</script>
1289  * <script type="Lina">Linear A</script>
1290  * <script type="Mand">Mandaean</script>
1291  * <script type="Maya">Mayan hieroglyphs</script>
1292  * <script type="Mero">Meroitic</script>
1293  * <script type="Orkh">Orkhon</script>
1294  * <script type="Perm">Old Permic</script>
1295  * <script type="Phag">Phags-pa</script>
1296  * <script type="Phnx">Phoenician</script>
1297  * <script type="Plrd">Pollard Phonetic</script>
1298  * <script type="Roro">Rongorongo</script>
1299  * <script type="Sara">Sarati</script>
1300  * <script type="Sylo">Syloti Nagri</script>
1301  * <script type="Syre">Syriac (Estrangelo variant)</script>
1302  * <script type="Syrj">Syriac (Western variant)</script>
1303  * <script type="Syrn">Syriac (Eastern variant)</script>
1304  * <script type="Talu">Tai Lue</script>
1305  * <script type="Teng">Tengwar</script>
1306  * <script type="Tfng">Tifinagh (Berber)</script>
1307  * <script type="Thai">Thai</script>
1308  * <script type="Vaii">Vai</script>
1309  * <script type="Visp">Visible Speech</script>
1310  * <script type="Xpeo">Old Persian</script>
1311  * <script type="Xsux">Cuneiform, Sumero-Akkadian</script>
1312  * <script type="Zxxx">Code for unwritten languages</script>
1313  * <script type="Zzzz">Code for uncoded script</script>
1314  * <territory type="001">World</territory>
1315  * <territory type="002">Africa</territory>
1316  * <territory type="003">North America</territory>
1317  * <territory type="005">South America</territory>
1318  * <territory type="009">Oceania</territory>
1319  * <territory type="011">Western Africa</territory>
1320  * <territory type="013">Central America</territory>
1321  * <territory type="014">Eastern Africa</territory>
1322  * <territory type="015">Northern Africa</territory>
1323  * <territory type="017">Middle Africa</territory>
1324  * <territory type="018">Southern Africa</territory>
1325  * <territory type="019">Americas</territory>
1326  * <territory type="021">Northern America</territory>
1327  * <territory type="029">Caribbean</territory>
1328  * <territory type="030">Eastern Asia</territory>
1329  * <territory type="035">South-eastern Asia</territory>
1330  * <territory type="039">Southern Europe</territory>
1331  * <territory type="053">Australia and New Zealand</territory>
1332  * <territory type="054">Melanesia</territory>
1333  * <territory type="057">Micronesia</territory>
1334  * <territory type="061">Polynesia</territory>
1335  * <territory type="062">South-central Asia</territory>
1336  * <territory type="AX">Aland Islands</territory>
1337  * <territory type="BQ">British Antarctic Territory</territory>
1338  * <territory type="BU">Myanmar</territory>
1339  * <territory type="CS">Czechoslovakia</territory>
1340  * <territory type="CT">Canton and Enderbury Islands</territory>
1341  * <territory type="DD">East Germany</territory>
1342  * <territory type="DY">Benin</territory>
1343  * <territory type="FQ">French Southern and Antarctic Territories</territory>
1344  * <territory type="FX">Metropolitan France</territory>
1345  * <territory type="HV">Burkina Faso</territory>
1346  * <territory type="JT">Johnston Island</territory>
1347  * <territory type="MI">Midway Islands</territory>
1348  * <territory type="NH">Vanuatu</territory>
1349  * <territory type="NQ">Dronning Maud Land</territory>
1350  * <territory type="NT">Neutral Zone</territory>
1351  * <territory type="PC">Pacific Islands Trust Territory</territory>
1352  * <territory type="PU">U.S. Miscellaneous Pacific Islands</territory>
1353  * <territory type="PZ">Panama Canal Zone</territory>
1354  * <territory type="RH">Zimbabwe</territory>
1355  * <territory type="SU">Union of Soviet Socialist Republics</territory>
1356  * <territory type="TP">Timor-Leste</territory>
1357  * <territory type="VD">North Vietnam</territory>
1358  * <territory type="WK">Wake Island</territory>
1359  * <territory type="YD">People's Democratic Republic of Yemen</territory>
1360  * <territory type="ZR">Congo, The Democratic Republic of the</territory>
1361  * <variant type="1901">Traditional German orthography</variant>
1362  * <variant type="1996">German orthography of 1996</variant>
1363  * <variant type="boont">Boontling</variant>
1364  * <variant type="gaulish">Gaulish</variant>
1365  * <variant type="guoyu">Mandarin or Standard Chinese</variant>
1366  * <variant type="hakka">Hakka</variant>
1367  * <variant type="lojban">Lojban</variant>
1368  * <variant type="nedis">Natisone dialect</variant>
1369  * <variant type="rozaj">Resian</variant>
1370  * <variant type="scouse">Scouse</variant>
1371  * <variant type="xiang">Xiang or Hunanese</variant>
1372  *
1373  *
1374  * <currency type="CFP"><displayName>???</displayName></currency>
1375  * <currency type="DDR"><displayName>???</displayName></currency>
1376  * <currency type="EQE"><displayName>???</displayName></currency>
1377  * <currency type="ESA"><displayName>???</displayName></currency>
1378  * <currency type="ESB"><displayName>???</displayName></currency>
1379  * <currency type="JAN"><displayName>???</displayName></currency>
1380  * <currency type="LSM"><displayName>???</displayName></currency>
1381  * <currency type="LUC"><displayName>???</displayName></currency>
1382  * <currency type="LUL"><displayName>???</displayName></currency>
1383  * <currency type="NAM"><displayName>???</displayName></currency>
1384  * <currency type="NEW"><displayName>???</displayName></currency>
1385  * <currency type="RHD"><displayName>???</displayName></currency>
1386  * <currency type="SAN"><displayName>???</displayName></currency>
1387  * <currency type="SDR"><displayName>???</displayName></currency>
1388  * <currency type="SEE"><displayName>???</displayName></currency>
1389  * <currency type="SRI"><displayName>???</displayName></currency>
1390  * <currency type="UAE"><displayName>???</displayName></currency>
1391  * <currency type="UDI"><displayName>???</displayName></currency>
1392  * <currency type="UIC"><displayName>???</displayName></currency>
1393  * <currency type="XAG"><displayName>???</displayName></currency>
1394  * <currency type="XPD"><displayName>???</displayName></currency>
1395  * <currency type="XPT"><displayName>???</displayName></currency>
1396  * <currency type="XRE"><displayName>???</displayName></currency>
1397  * <currency type="XTS"><displayName>???</displayName></currency>
1398  * <currency type="XXX"><displayName>???</displayName></currency>
1399  */
1400