• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ******************************************************************************
3  * Copyright (C) 2004, International Business Machines Corporation and        *
4  * others. All Rights Reserved.                                               *
5  ******************************************************************************
6  */
7 package org.unicode.cldr.test;
8 
9 import static org.unicode.cldr.util.PathUtilities.getNormalizedPath;
10 
11 import java.io.File;
12 import java.io.IOException;
13 import java.io.PrintWriter;
14 import java.util.Arrays;
15 import java.util.Calendar;
16 import java.util.Collection;
17 import java.util.Date;
18 import java.util.HashMap;
19 import java.util.HashSet;
20 import java.util.Iterator;
21 import java.util.LinkedHashSet;
22 import java.util.List;
23 import java.util.Map;
24 import java.util.Set;
25 import java.util.TreeMap;
26 import java.util.TreeSet;
27 
28 import org.unicode.cldr.draft.FileUtilities;
29 import org.unicode.cldr.test.DisplayAndInputProcessor.NumericType;
30 import org.unicode.cldr.util.CLDRFile;
31 import org.unicode.cldr.util.CLDRPaths;
32 import org.unicode.cldr.util.CldrUtility;
33 import org.unicode.cldr.util.Factory;
34 import org.unicode.cldr.util.LanguageTagParser;
35 import org.unicode.cldr.util.PathUtilities;
36 import org.unicode.cldr.util.SimpleFactory;
37 import org.unicode.cldr.util.StandardCodes;
38 import org.unicode.cldr.util.TimezoneFormatter;
39 import org.unicode.cldr.util.XPathParts;
40 import org.xml.sax.SAXException;
41 
42 import com.ibm.icu.dev.test.TestFmwk;
43 import com.ibm.icu.text.BreakIterator;
44 import com.ibm.icu.text.DecimalFormat;
45 import com.ibm.icu.text.NumberFormat;
46 import com.ibm.icu.text.UTF16;
47 import com.ibm.icu.text.UnicodeSet;
48 import com.ibm.icu.util.ULocale;
49 
50 /**
51  * Initial version of CLDR tests. Each test is named TextXXX. To run all the tests, use the options
52  * <blockquote>-nothrow</blockquote>
53  * To run a particular set of tests, include their names, like
54  * <blockquote>-nothrow TestForIllegalAttributeValues TestMinimalLocalization</blockquote>
55  * To show more information (logln), add -verbose
56  * <p>
57  * There are some environment variables that can be used with the test. <br>
58  * -DSHOW_FILES=<anything> shows all create/open of files. <br>
59  * -DXML_MATCH=<regular expression> skips all locales that don't match the regular expression <br>
60  * -DXML_MAIN_DIR=<filesystem directory> resets to a different main directory (eg not cldr/common/main. For example,
61  * some of the tools generate into a locale directory like -DXML_MAIN_DIR=C:\Unicode-CVS2\cldr\common\gen\main\ so this
62  * can be used to check that directory. <br>
63  * -DSKIP_DRAFT=<boolean> skips draft locales if <boolean> is a string starting with T or t
64  */
65 public class CLDRTest extends TestFmwk {
66     /**
67      * privates
68      */
69     private static String MATCH;
70     private static String MAIN_DIR;
71     private static boolean SKIP_DRAFT;
72     private Set<String> locales;
73     private Set<String> languageLocales;
74     private Factory cldrFactory;
75     private CLDRFile resolvedRoot;
76     private CLDRFile resolvedEnglish;
77     private final UnicodeSet commonAndInherited = new UnicodeSet(
78         "[[:script=common:][:script=inherited:][:alphabetic=false:]]");
79     private static final String[] WIDTHS = { "narrow", "wide", "abbreviated", "short" };
80     private static final String[] MONTHORDAYS = { "day", "month" };
81     private Map<String, String> localeNameCache = new HashMap<>();
82     private CLDRFile english = null;
83 
84     private Set<String> surveyInfo = new TreeSet<>();
85 
86     /**
87      * TestFmwk boilerplate
88      */
main(String[] args)89     public static void main(String[] args) throws Exception {
90         MATCH = System.getProperty("XML_MATCH");
91         if (MATCH == null)
92             MATCH = ".*";
93         else
94             System.out.println("Resetting MATCH:" + MATCH);
95         MAIN_DIR = System.getProperty("XML_MAIN_DIR");
96         if (MAIN_DIR == null)
97             MAIN_DIR = CLDRPaths.MAIN_DIRECTORY;
98         else
99             System.out.println("Resetting MAIN_DIR:" + MAIN_DIR);
100         SKIP_DRAFT = System.getProperty("XML_SKIP_DRAFT") != null;
101         if (SKIP_DRAFT) System.out.println("Skipping Draft locales");
102 
103         double deltaTime = System.currentTimeMillis();
104         new CLDRTest().run(args);
105         deltaTime = System.currentTimeMillis() - deltaTime;
106         System.out.println("Seconds: " + deltaTime / 1000);
107 
108     }
109 
TestZZZZHack()110     public void TestZZZZHack() throws IOException {
111         // hack to get file written at the end of run.
112         PrintWriter surveyFile = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "surveyInfo.txt");
113         for (String s : surveyInfo) {
114             surveyFile.println(s);
115         }
116         surveyFile.close();
117     }
118 
119     /**
120      * TestFmwk boilerplate
121      */
CLDRTest()122     public CLDRTest() throws SAXException, IOException {
123         // TODO parameterize the directory and filter
124         cldrFactory = Factory.make(MAIN_DIR, MATCH);
125         // CLDRKey.main(new String[]{"-mde.*"});
126         locales = cldrFactory.getAvailable();
127         languageLocales = cldrFactory.getAvailableLanguages();
128         resolvedRoot = cldrFactory.make("root", true);
129         /*
130          * PrintWriter out = FileUtilities.openUTF8Writer(Utility.GEN_DIRECTORY + "resolved/", "root.xml");
131          * CLDRFile temp = (CLDRFile) resolvedRoot.clone();
132          * temp.write(out);
133          * out.close();
134          */
135         resolvedEnglish = cldrFactory.make("en", true);
136     }
137 
138     /**
139      * Check to make sure that the currency formats are kosher.
140      */
TestCurrencyFormats()141     public void TestCurrencyFormats() {
142         // String decimal = "//ldml/numbers/decimalFormats/decimalFormatLength/decimalFormat[@type=\"standard\"]/";
143         // String currency = "//ldml/numbers/currencyFormats/currencyFormatLength/currencyFormat[@type=\"standard\"]/";
144         for (String locale : locales) {
145             boolean isPOSIX = locale.indexOf("POSIX") >= 0;
146             logln("Testing: " + locale);
147             CLDRFile item = cldrFactory.make(locale, false);
148             for (String xpath : item) {
149                 NumericType type = NumericType.getNumericType(xpath);
150                 if (type == NumericType.NOT_NUMERIC) continue;
151                 String value = item.getStringValue(xpath);
152                 // at this point, we only have currency formats
153                 String pattern = DisplayAndInputProcessor.getCanonicalPattern(value, type, isPOSIX);
154                 if (!pattern.equals(value)) {
155                     String draft = "";
156                     if (item.getFullXPath(xpath).indexOf("[@draft=\"unconfirmed\"]") >= 0) draft = " [draft]";
157                     assertEquals(getLocaleAndName(locale) + draft + " " + type + " pattern incorrect", pattern, value);
158                 }
159             }
160         }
161     }
162 
163     /**
164      * Internal class
165      */
166     private static class ValueCount {
167         int count = 1;
168         String value;
169         String fullxpath;
170     }
171 
172     /**
173      * Verify that if all the children of a language locale do not have the same value for the same key.
174      */
TestCommonChildren()175     public void TestCommonChildren() {
176         if (disableUntilLater("TestCommonChildren")) return;
177 
178         Map<String, ValueCount> currentValues = new TreeMap<>();
179         Set<String> okValues = new TreeSet<>();
180 
181         for (String parent : languageLocales) {
182             logln("Testing: " + parent);
183             currentValues.clear();
184             okValues.clear();
185             Set<String> availableWithParent = cldrFactory.getAvailableWithParent(parent, true);
186             for (String locale : availableWithParent) {
187                 logln("\tTesting: " + locale);
188                 CLDRFile item = cldrFactory.make(locale, false);
189                 // Walk through all the xpaths, adding to currentValues
190                 // Whenever two values for the same xpath are different, we remove from currentValues, and add to
191                 // okValues
192                 for (String xpath : item) {
193                     if (okValues.contains(xpath)) continue;
194                     if (xpath.startsWith("//ldml/identity/")) continue; // skip identity elements
195                     String v = item.getStringValue(xpath);
196                     ValueCount last = currentValues.get(xpath);
197                     if (last == null) {
198                         ValueCount vc = new ValueCount();
199                         vc.value = v;
200                         vc.fullxpath = item.getFullXPath(xpath);
201                         currentValues.put(xpath, vc);
202                     } else if (v.equals(last.value)) {
203                         last.count++;
204                     } else {
205                         okValues.add(xpath);
206                         currentValues.remove(xpath);
207                     }
208                 }
209                 // at the end, only the keys left in currentValues are (possibly) faulty
210                 // they are actually bad IFF either
211                 // (a) the count is equal to the total (thus all children are the same), or
212                 // (b) their value is the same as the parent's resolved value (thus all children are the same or the
213                 // same
214                 // as the inherited parent value).
215             }
216             if (currentValues.size() == 0) continue;
217             int size = availableWithParent.size();
218             CLDRFile parentCLDR = cldrFactory.make(parent, true);
219             for (String xpath : currentValues.keySet()) {
220                 ValueCount vc = currentValues.get(xpath);
221                 if (vc.count == size || (vc.value.equals(parentCLDR.getStringValue(xpath))
222                     && vc.fullxpath.equals(parentCLDR.getStringValue(xpath)))) {
223                     String draft = "";
224                     if (vc.fullxpath.indexOf("[@draft=\"unconfirmed\"]") >= 0) draft = " [draft]";
225                     String count = (vc.count == size ? "" : vc.count + "/") + size;
226                     warnln(getLocaleAndName(parent) + draft +
227                         "\tall children (" + count + ") have same value for:\t"
228                         + xpath + ";\t" + vc.value);
229                 }
230             }
231         }
232     }
233 
234     static String[] EXEMPLAR_SKIPS = { "/hourFormat", "/exemplarCharacters", "/pattern", "/localizedPatternChars" };
235 
236     /**
237      * Check that the exemplars include all characters in the data.
238      */
TestThatExemplarsContainAll()239     public void TestThatExemplarsContainAll() {
240         UnicodeSet allExemplars = new UnicodeSet();
241         if (disableUntilLater("TestThatExemplarsContainAll")) return;
242         Set<String> counts = new TreeSet<>();
243         int totalCount = 0;
244         UnicodeSet localeMissing = new UnicodeSet();
245         for (String locale : locales) {
246             if (locale.equals("root")) continue;
247             CLDRFile resolved = cldrFactory.make(locale, false); // FIX LATER
248             UnicodeSet exemplars = getFixedExemplarSet(locale, resolved);
249             CLDRFile plain = cldrFactory.make(locale, false);
250             int count = 0;
251             localeMissing.clear();
252             file: for (String xpath : plain) {
253                 for (int i = 0; i < EXEMPLAR_SKIPS.length; ++i) {
254                     if (xpath.indexOf(EXEMPLAR_SKIPS[i]) > 0) continue file; // skip some items.
255                 }
256                 if (SKIP_DRAFT) {
257                     String fullxpath = plain.getFullXPath(xpath);
258                     if (fullxpath.indexOf("[@draft=\"unconfirmed\"") > 0) continue;
259                 }
260                 if (xpath.startsWith("//ldml/posix/messages")) continue;
261                 String value = plain.getStringValue(xpath);
262                 allExemplars.addAll(value);
263                 if (!exemplars.containsAll(value)) {
264                     count++;
265                     UnicodeSet missing = new UnicodeSet().addAll(value).removeAll(exemplars);
266                     localeMissing.addAll(missing);
267                     logln(getLocaleAndName(locale) + "\t" + xpath + "\t<" + value + "> contains " + missing
268                         + ", not in exemplars");
269                     surveyInfo.add(locale + "\t" + xpath + "\t'" + value + "' contains characters "
270                         + missing.toPattern(false) + ", which are not in exemplars");
271                 }
272             }
273             NumberFormat nf = new DecimalFormat("000");
274             if (count != 0) {
275                 totalCount += count;
276                 counts.add(nf.format(count) + "\t" + getLocaleAndName(locale) + "\t" + localeMissing);
277             }
278             if (localeMissing.size() != 0) {
279                 errln(getLocaleAndName(locale) + "\t uses " + localeMissing + ", not in exemplars");
280             }
281         }
282         for (String c : counts) {
283             logln(c);
284         }
285         logln("Total Count: " + totalCount);
286         System.out.println("All exemplars: " + allExemplars.toPattern(true));
287     }
288 
289     // Get Date-Time in milliseconds
getDateTimeinMillis(int year, int month, int date)290     private static long getDateTimeinMillis(int year, int month, int date) {
291         Calendar cal = Calendar.getInstance();
292         cal.set(year, month, date);
293         return cal.getTimeInMillis();
294     }
295 
296     static final long disableDate = getDateTimeinMillis(2005, 6 - 1, 3);
297 
298     /**
299      *
300      */
disableUntilLater(String string)301     private boolean disableUntilLater(String string) {
302         if (new Date().getTime() >= disableDate) return false;
303         warnln("Disabling " + string + " until " + new Date(disableDate));
304         return true;
305     }
306 
307     /**
308      * Internal
309      */
getFixedExemplarSet(String locale, CLDRFile cldrfile)310     private UnicodeSet getFixedExemplarSet(String locale, CLDRFile cldrfile) {
311         UnicodeSet exemplars = getExemplarSet(cldrfile, "");
312         if (exemplars.size() == 0) {
313             errln(getLocaleAndName(locale) + " has empty exemplar set");
314         }
315         exemplars.addAll(getExemplarSet(cldrfile, "standard"));
316         UnicodeSet auxiliary = getExemplarSet(cldrfile, "auxiliary");
317         if (exemplars.containsSome(auxiliary)) {
318             errln(getLocaleAndName(locale) + "Auxiliary & main exemplars should be disjoint, but overlap with " +
319                 new UnicodeSet(exemplars).retainAll(auxiliary) +
320                 ": change auxiliary to " + auxiliary.removeAll(exemplars));
321         }
322         exemplars.addAll(auxiliary);
323         exemplars.addAll(commonAndInherited);
324         return exemplars;
325     }
326 
327     /**
328      * @return Gets an exemplar set. Also verifies that the set contains no properties.
329      */
getExemplarSet(CLDRFile cldrfile, String type)330     public UnicodeSet getExemplarSet(CLDRFile cldrfile, String type) {
331         if (type.length() != 0) type = "[@type=\"" + type + "\"]";
332         String v = cldrfile.getStringValue("//ldml/characters/exemplarCharacters" + type);
333         if (v == null) return new UnicodeSet();
334         String pattern = v;
335         if (pattern.indexOf("[:") >= 0 || pattern.indexOf("\\p{") > 0) {
336             errln(getLocaleName(cldrfile.getLocaleID()) + " exemplar pattern contains property: " + pattern);
337         }
338         try {
339             UnicodeSet result = new UnicodeSet(v, UnicodeSet.CASE);
340             result.remove(0x20);
341             return result;
342         } catch (RuntimeException e) {
343             e.printStackTrace();
344             errln(getLocaleAndName(cldrfile.getLocaleID()) + " has illegal exemplar set: <" + v + ">");
345             return new UnicodeSet();
346         }
347         // if (type.length() != 0) System.out.println("fetched set for " + type);
348     }
349 
getLocaleAndName(String locale)350     public String getLocaleAndName(String locale) {
351         return locale + " (" + getLocaleName(locale) + ")";
352     }
353 
354     /**
355      * @return the ID plus its localization (for language, script, and territory IDs only)
356      */
getIDAndLocalization(String id)357     public String getIDAndLocalization(String id) {
358         return id + " " + getLocalization(id);
359     }
360 
361     /**
362      * @return the localization (for language, script, and territory IDs only)
363      */
getLocalization(String id)364     public String getLocalization(String id) {
365         if (english == null) english = cldrFactory.make("en", true);
366         if (id.length() == 0) return "?";
367         // pick on basis of case
368         char ch = id.charAt(0);
369         if ('a' <= ch && ch <= 'z') return getName(english, "languages/language", id);
370         if (id.length() == 4 && 'A' <= ch && ch <= 'Z') return getName(english, "scripts/script", id);
371         return getName(english, "territories/territory", id);
372     }
373 
374     /**
375      * Internal
376      */
getIDAndLocalization(Set<String> missing)377     private String getIDAndLocalization(Set<String> missing) {
378         StringBuffer buffer = new StringBuffer();
379         for (String next : missing) {
380             if (buffer.length() != 0) buffer.append("; ");
381             buffer.append(getIDAndLocalization(next));
382         }
383         return buffer.toString();
384     }
385 
getLocaleName(String locale)386     public String getLocaleName(String locale) {
387         String name = localeNameCache.get(locale);
388         if (name != null) return name;
389         if (english == null) english = cldrFactory.make("en", true);
390         String result = english.getName(locale);
391         /*
392          * Collection c = Utility.splitList(locale, '_', false, null);
393          * String[] pieces = new String[c.size()];
394          * c.toArray(pieces);
395          * int i = 0;
396          * String result = getName(english, "languages/language", pieces[i++]);
397          * if (pieces[i].length() == 0) return result;
398          * if (pieces[i].length() == 4) {
399          * result += " " + getName(english, "scripts/script", pieces[i++]);
400          * }
401          * if (pieces[i].length() == 0) return result;
402          * result += " " + getName(english, "territories/territory", pieces[i++]);
403          * if (pieces[i].length() == 0) return result;
404          * result += " " + getName(english, "variant/variants", pieces[i++]);
405          */
406         localeNameCache.put(locale, result);
407         return result;
408     }
409 
410     /**
411      * Internal
412      */
getName(CLDRFile english, String kind, String type)413     private String getName(CLDRFile english, String kind, String type) {
414         String v = english.getStringValue("//ldml/localeDisplayNames/" + kind + "[@type=\"" + type + "\"]");
415         if (v == null) return "<" + type + ">";
416         return v;
417     }
418 
419     /**
420      * Make sure we are only using attribute values that are in RFC3066bis, the Olson database (with aliases removed)
421      * or ISO 4217
422      *
423      * @throws IOException
424      */
TestForIllegalAttributeValues()425     public void TestForIllegalAttributeValues() {
426         // check for illegal attribute values that are not in the DTD
427         Map<String, Set<String>> result = new TreeMap<>();
428         Map<String, Set<String>> totalResult = new TreeMap<>();
429         for (String locale : locales) {
430             logln("Testing: " + locale);
431             CLDRFile item = cldrFactory.make(locale, false);
432             result.clear();
433             Set<String> xpathFailures = null; // don't collect
434             // XPathParts parts;
435             // String xpath;
436             // CLDRFile.StringValue value;
437             // String element;
438             // Map attributes;
439             checkAttributeValidity(item, result, xpathFailures);
440 
441             // now show
442             //String localeName = getLocaleAndName(locale);
443             for (Iterator<String> it3 = result.keySet().iterator(); it3.hasNext();) {
444                 String code = it3.next();
445                 Set<String> avalues = result.get(code);
446                 errln(getLocaleAndName(locale) + "\tillegal attribute value for " + code + ", value:\t" + show(avalues));
447                 Set<String> totalvalues = totalResult.get(code);
448                 if (totalvalues == null) totalResult.put(code, totalvalues = new TreeSet<>());
449                 totalvalues.addAll(avalues);
450             }
451         }
452         for (Iterator<String> it3 = totalResult.keySet().iterator(); it3.hasNext();) {
453             String code = it3.next();
454             Set<String> avalues = totalResult.get(code);
455             errln("All illegal attribute values for " + code + ", value:\t" + show(avalues));
456         }
457     }
458 
459     /**
460      * Tests whether the display names have any collisions, e.g. if in the fully resolved
461      * locale $ is used for both USD and UAD.
462      *
463      */
TestDisplayNameCollisions()464     public void TestDisplayNameCollisions() {
465         if (disableUntilLater("TestDisplayNameCollisions")) return;
466 
467         Map<String, String>[] maps = new HashMap[CLDRFile.LIMIT_TYPES];
468         for (int i = 0; i < maps.length; ++i) {
469             maps[i] = new HashMap<>();
470         }
471         Set<String> collisions = new TreeSet<>();
472         for (Iterator<String> it = locales.iterator(); it.hasNext();) {
473             String locale = it.next();
474             CLDRFile item = cldrFactory.make(locale, true);
475             for (int i = 0; i < maps.length; ++i) {
476                 maps[i].clear();
477             }
478             collisions.clear();
479 
480             for (Iterator<String> it2 = item.iterator(); it2.hasNext();) {
481                 String xpath = it2.next();
482                 int nameType = CLDRFile.getNameType(xpath);
483                 if (nameType < 0) continue;
484                 String value = item.getStringValue(xpath);
485                 String xpath2 = maps[nameType].get(value);
486                 if (xpath2 == null) {
487                     maps[nameType].put(value, xpath);
488                     continue;
489                 }
490                 collisions.add(CLDRFile.getNameTypeName(nameType) + "\t" + value + "\t" + xpath + "\t" + xpath2);
491                 surveyInfo.add(locale + "\t" + xpath + "\t'" + value + "' is a duplicate of what is in " + xpath2);
492             }
493             String name = getLocaleAndName(locale) + "\t";
494             for (Iterator<String> it2 = collisions.iterator(); it2.hasNext();) {
495                 errln(name + it2.next());
496             }
497         }
498     }
499 
500     /**
501      * Checks the validity of attributes, based on StandardCodes.
502      * The invalid codes are added to badCodes, and the failing xpaths are added to xpathFailures.
503      *
504      * @param item
505      * @param badCodes
506      * @param xpathFailures
507      */
checkAttributeValidity(CLDRFile item, Map<String, Set<String>> badCodes, Set<String> xpathFailures)508     public static void checkAttributeValidity(CLDRFile item, Map<String, Set<String>> badCodes, Set<String> xpathFailures) {
509         for (Iterator<String> it2 = item.iterator(); it2.hasNext();) {
510             String xpath = it2.next();
511             XPathParts parts = XPathParts.getFrozenInstance(item.getFullXPath(xpath));
512             for (int i = 0; i < parts.size(); ++i) {
513                 if (parts.getAttributeCount(i) == 0) {
514                     continue;
515                 }
516                 String element = parts.getElement(i);
517                 Map<String, String> attributes = parts.getAttributes(i);
518                 for (Iterator<String> it3 = attributes.keySet().iterator(); it3.hasNext();) {
519                     String attribute = it3.next();
520                     String avalue = attributes.get(attribute);
521                     checkValidity(xpath, element, attribute, avalue, badCodes, xpathFailures);
522                 }
523             }
524         }
525     }
526 
527     /**
528      * Internal
529      */
show(Collection<String> avalues)530     private String show(Collection<String> avalues) {
531         StringBuffer result = new StringBuffer("{");
532         boolean first = true;
533         for (Iterator<String> it3 = avalues.iterator(); it3.hasNext();) {
534             if (first)
535                 first = false;
536             else
537                 result.append(", ");
538             result.append(it3.next().toString());
539         }
540         result.append("}");
541         return result.toString();
542     }
543 
544     /**
545      * Internal function
546      */
checkValidity(String xpath, String element, String attribute, String avalue, Map<String, Set<String>> results, Set<String> xpathsFailing)547     private static void checkValidity(String xpath, String element, String attribute, String avalue, Map<String, Set<String>> results,
548         Set<String> xpathsFailing) {
549         StandardCodes codes = StandardCodes.make();
550         if (attribute.equals("type")) {
551             boolean checkReplacements = xpath.indexOf("/identity") < 0;
552             if (element.equals("currency"))
553                 checkCodes(xpath, "currency", avalue, codes, results, xpathsFailing, checkReplacements);
554             else if (element.equals("script"))
555                 checkCodes(xpath, "script", avalue, codes, results, xpathsFailing, checkReplacements);
556             else if (element.equals("territory"))
557                 checkCodes(xpath, "territory", avalue, codes, results, xpathsFailing, checkReplacements);
558             else if (element.equals("language"))
559                 checkCodes(xpath, "language", avalue, codes, results, xpathsFailing, checkReplacements);
560             else if (element.equals("zone"))
561                 checkCodes(xpath, "tzid", avalue, codes, results, xpathsFailing, checkReplacements);
562         }
563     }
564 
565     /**
566      * Internal function
567      *
568      * @param checkReplacements
569      *            TODO
570      */
571     private static void checkCodes(String xpath, String code, String avalue, StandardCodes codes, Map<String, Set<String>> results,
572         Set<String> xpathFailures, boolean checkReplacements) {
573         // ok if code is found AND it has no replacement
574         if (codes.getData(code, avalue) != null
575             && (!checkReplacements || codes.getReplacement(code, avalue) == null)) return;
576 
577         if (xpathFailures != null) xpathFailures.add(xpath);
578         if (results == null) return;
579         Set<String> s = results.get(code);
580         if (s == null) {
581             s = new TreeSet<>();
582             results.put(code, s);
583         }
584         s.add(avalue);
585     }
586 
587     /**
588      * Verify that a small set of locales (currently just English) has everything translated.
589      *
590      * @throws IOException
591      */
592     public void TestCompleteLocales() {
593         // just test English for now
594         if (english == null) english = cldrFactory.make("en", true);
595         checkTranslatedCodes(english);
596     }
597 
598     /**
599      * Tests that the file contains codes for all main display name ids: language, script, territory, tzid, currency.
600      */
601     private void checkTranslatedCodes(CLDRFile cldrfile) {
602         StandardCodes codes = StandardCodes.make();
603         checkTranslatedCode(cldrfile, codes, "currency", "//ldml/numbers/currencies/currency", "/displayName");
604         // can't check timezones for English.
605         // checkTranslatedCode(cldrfile, codes, "tzid", "//ldml/dates/timeZoneNames/zone", "");
606         checkTranslatedCode(cldrfile, codes, "language", "//ldml/localeDisplayNames/languages/language", "");
607         checkTranslatedCode(cldrfile, codes, "script", "//ldml/localeDisplayNames/scripts/script", "");
608         checkTranslatedCode(cldrfile, codes, "territory", "//ldml/localeDisplayNames/territories/territory", "");
609         checkTranslatedCode(cldrfile, codes, "variant", "//ldml/localeDisplayNames/variants/variant", "");
610     }
611 
612     /**
613      * @param codes
614      * @param type
615      * @param prefix
616      * @param postfix
617      *            TODO
618      */
619     private void checkTranslatedCode(CLDRFile cldrfile, StandardCodes codes, String type, String prefix, String postfix) {
620 
621         // TODO, expand to other languages
622         Map<String, Set<String>> completionExceptions = new HashMap<>();
623         Set<String> scriptExceptions = new HashSet<>();
624         scriptExceptions.add("Cham");
625         scriptExceptions.add("Thai");
626         completionExceptions.put("script", scriptExceptions);
627 
628         Set<String> codeItems = codes.getGoodAvailableCodes(type);
629         int count = 0;
630         Set<String> exceptions = completionExceptions.get(type);
631         for (String code : codeItems) {
632             String rfcname = codes.getData(type, code);
633             // if (rfcname.equals("ZZ")) continue;
634             ++count;
635             if (rfcname.equals("PRIVATE USE")) continue;
636             String fullFragment = prefix + "[@type=\"" + code + "\"]" + postfix;
637             String v = cldrfile.getStringValue(fullFragment);
638             if (v == null) {
639                 errln("Missing translation for:\t<" + type + " type=\"" + code + "\">" + rfcname + "</" + type + ">");
640                 continue;
641             }
642             String translation = v;
643             if (translation.equals(code)) {
644                 if (exceptions != null && exceptions.contains(code)) continue;
645                 errln("Translation = code for:\t<" + type + " type=\"" + code + "\">" + rfcname + "</" + type + ">");
646                 continue;
647             }
648         }
649         logln("Total " + type + ":\t" + count);
650     }
651 
652     // <territoryContainment><group type="001" contains="002 009 019 142 150"/>
653     // <languageData><language type="af" scripts="Latn" territories="ZA"/>
654     void getSupplementalData(Map<String, Set<String>> language_scripts, Map<String, Set<String>> language_territories,
655         Map<String, Set<String>> group_territory,
656         Map<String, Set<String>> territory_currencies, Map<String, Map<String, String>> aliases) {
657 
658         boolean SHOW = false;
659         Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
660         CLDRFile supp = cldrFactory.make(CLDRFile.SUPPLEMENTAL_NAME, false);
661         for (Iterator<String> it = supp.iterator(); it.hasNext();) {
662             String path = it.next();
663             try {
664                 XPathParts parts = XPathParts.getFrozenInstance(supp.getFullXPath(path));
665                 Map<String, String> m;
666                 String type = "";
667                 if (aliases != null && parts.findElement("alias") >= 0) {
668                     m = parts.findAttributes(type = "languageAlias");
669                     if (m == null) m = parts.findAttributes(type = "territoryAlias");
670                     if (m != null) {
671                         Map top = aliases.get(type);
672                         if (top == null) {
673                             aliases.put(type, top = new TreeMap());
674                         }
675                         top.put(m.get("type"), m.get("replacement"));
676                     }
677                 }
678                 if (territory_currencies != null) {
679                     m = parts.findAttributes("region");
680                     if (m != null) {
681                         String region = m.get("iso3166");
682                         Set s = territory_currencies.get(region);
683                         if (s == null) {
684                             territory_currencies.put(region, s = new LinkedHashSet());
685                         }
686                         m = parts.findAttributes("currency");
687                         if (m == null) {
688                             warnln("missing currency for region: " + path);
689                             continue;
690                         }
691                         String currency = m.get("iso4217");
692                         s.add(currency);
693                         m = parts.findAttributes("alternate");
694                         String alternate = m == null ? null : (String) m.get("iso4217");
695                         if (alternate != null) {
696                             s.add(alternate);
697                         }
698                         continue;
699                     }
700                 }
701                 m = parts.findAttributes("group");
702                 if (m != null) {
703                     if (group_territory == null) continue;
704                     type = m.get("type");
705                     String contains = m.get("contains");
706                     group_territory.put(type, new TreeSet(CldrUtility.splitList(contains, ' ', true)));
707                     continue;
708                 }
709                 m = parts.findAttributes("language");
710                 if (m == null) continue;
711                 String language = m.get("type");
712                 String scripts = m.get("scripts");
713                 if (scripts == null)
714                     language_scripts.put(language, new TreeSet<String>());
715                 else {
716                     language_scripts.put(language, new TreeSet<>(CldrUtility.splitList(scripts, ' ', true)));
717                     if (SHOW)
718                         System.out.println(getIDAndLocalization(language) + "\t\t"
719                             + getIDAndLocalization(language_scripts.get(language)));
720                 }
721                 String territories = m.get("territories");
722                 if (territories == null)
723                     language_territories.put(language, new TreeSet<String>());
724                 else {
725                     language_territories.put(language, new TreeSet<>(CldrUtility.splitList(territories, ' ', true)));
726                     if (SHOW)
727                         System.out.println(getIDAndLocalization(language) + "\t\t"
728                             + getIDAndLocalization(language_territories.get(language)));
729                 }
730             } catch (RuntimeException e) {
731                 throw (IllegalArgumentException) new IllegalArgumentException("Failure with: " + path).initCause(e);
732             }
733         }
734     }
735 
736     /**
737      * Verify that the minimal localizations are present.
738      */
TestMinimalLocalization()739     public void TestMinimalLocalization() throws IOException {
740         if (disableUntilLater("TestMinimalLocalization")) return;
741 
742         boolean testDraft = false;
743         Map<String, Set<String>> language_scripts = new HashMap<>();
744         Map<String, Set<String>> language_territories = new HashMap<>();
745         getSupplementalData(language_scripts, language_territories, null, null, null);
746         LanguageTagParser localIDParser = new LanguageTagParser();
747         // see http://oss.software.ibm.com/cvs/icu/~checkout~/locale/docs/design/minimal_requirements.htm
748         int[] failureCount = new int[1];
749         int[] warningCount = new int[1];
750         for (Iterator<String> it = languageLocales.iterator(); it.hasNext();) {
751             String locale = it.next();
752             if (locale.equals("root")) continue;
753             // if (!locale.equals("zh_Hant")) continue;
754 
755             CLDRFile item = cldrFactory.make(locale, true);
756             if (!testDraft && item.isDraft()) {
757                 logln(getLocaleAndName(locale) + "\tskipping draft");
758                 continue;
759             }
760             UnicodeSet exemplars = getFixedExemplarSet(locale, item);
761             CLDRFile missing = SimpleFactory.makeFile(locale);
762             failureCount[0] = 0;
763             warningCount[0] = 0;
764             localIDParser.set(locale);
765             String language = localIDParser.getLanguage();
766             logln("Testing: " + locale);
767             // languages
768             Set<String> languages = new TreeSet<>(CldrUtility.MINIMUM_LANGUAGES);
769             languages.add(language);
770             // LANGUAGE_NAME = 0, SCRIPT_NAME = 1, TERRITORY_NAME = 2, VARIANT_NAME = 3,
771             // CURRENCY_NAME = 4, CURRENCY_SYMBOL = 5, TZID = 6
772 
773             checkForItems(item, languages, CLDRFile.LANGUAGE_NAME, missing, failureCount, null);
774 
775             /*
776              * checkTranslatedCode(cldrfile, codes, "currency", "//ldml/numbers/currencies/currency");
777              * checkTranslatedCode(cldrfile, codes, "tzid", "//ldml/dates/timeZoneNames/zone");
778              * checkTranslatedCode(cldrfile, codes, "variant", "//ldml/localeDisplayNames/variants/variant");
779              */
780 
781             Set<String> scripts = new TreeSet<>();
782             scripts.add("Latn");
783             Set<String> others = language_scripts.get(language);
784             if (others != null) scripts.addAll(others);
785             checkForItems(item, scripts, CLDRFile.SCRIPT_NAME, missing, failureCount, null);
786 
787             Set<String> countries = new TreeSet<>(CldrUtility.MINIMUM_TERRITORIES);
788             others = language_territories.get(language);
789             if (others != null) countries.addAll(others);
790             checkForItems(item, countries, CLDRFile.TERRITORY_NAME, missing, failureCount, null);
791 
792             Set<String> currencies = new TreeSet<>();
793             StandardCodes sc = StandardCodes.make();
794             for (Iterator<String> it2 = countries.iterator(); it2.hasNext();) {
795                 String country = it2.next();
796                 Set<String> countryCurrencies = sc.getMainCurrencies(country);
797                 if (countryCurrencies == null) {
798                     errln("Internal Error: no currencies for " + country + ", locale: " + locale);
799                 } else {
800                     currencies.addAll(countryCurrencies);
801                 }
802             }
803             checkForItems(item, currencies, CLDRFile.CURRENCY_NAME, missing, failureCount, null);
804             checkForItems(item, currencies, CLDRFile.CURRENCY_SYMBOL, missing, failureCount, exemplars);
805 
806             // context=format and width=wide; context=stand-alone & width=abbreviated
807             Set<String> months = new TreeSet<>();
808             for (int i = 1; i <= 12; ++i)
809                 months.add(i + "");
810             Set<String> days = new TreeSet<>(Arrays.asList(new String[] { "sun", "mon", "tue", "wed", "thu", "fri", "sat" }));
811             for (int i = -7; i < 0; ++i) {
812                 checkForItems(item, (i < -4 ? months : days), i, missing, failureCount, null);
813             }
814 
815             String filename = "missing_" + locale + ".xml";
816             if (failureCount[0] > 0 || warningCount[0] > 0) {
817                 PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY + "missing/", filename);
818                 missing.write(out);
819                 out.close();
820                 // String s = getIDAndLocalization(missing);
821                 String message = "missing localizations, creating file"
822                     + getNormalizedPath(CLDRPaths.GEN_DIRECTORY, "missing", filename);
823                 if (failureCount[0] > 0)
824                     warnln(getLocaleAndName(locale) + "\t" + message);
825                 else
826                     logln(getLocaleAndName(locale) + "\tpossibly " + message);
827             } else {
828                 new File(CLDRPaths.GEN_DIRECTORY + "missing/", filename).delete();
829             }
830         }
831     }
832 
833     /**
834      * Internal
835      */
getDateKey(String monthOrDay, String width, String code)836     private String getDateKey(String monthOrDay, String width, String code) {
837         // String context = width.equals("narrow") ? "format" : "stand-alone";
838         return "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/"
839             + monthOrDay + "s/" + monthOrDay + "Context[@type=\"format\"]/"
840             + monthOrDay + "Width[@type=\"" + width + "\"]/" + monthOrDay
841             + "[@type=\"" + code + "\"]";
842     }
843 
844     /**
845      * Internal
846      */
getDateKey(int type, String code)847     private String getDateKey(int type, String code) {
848         // type is 6..4 for months abbrev..narrow, 3..0 for days short..narrow
849         int monthOrDayType = 0, widthType = type;
850         if (type >= 4) {
851             monthOrDayType = 1;
852             widthType -= 4;
853         }
854         return getDateKey(MONTHORDAYS[monthOrDayType], WIDTHS[widthType], code);
855     }
856 
857     /**
858      * @param item
859      * @param codes
860      * @param missing
861      * @param exemplarTest
862      *            TODO
863      *            TODO
864      */
checkForItems(CLDRFile item, Set<String> codes, int type, CLDRFile missing, int failureCount[], UnicodeSet exemplarTest)865     private void checkForItems(CLDRFile item, Set<String> codes, int type, CLDRFile missing, int failureCount[],
866         UnicodeSet exemplarTest) {
867         // check codes
868         for (Iterator<String> it2 = codes.iterator(); it2.hasNext();) {
869             String code = it2.next();
870             String key;
871             if (type >= 0) {
872                 key = CLDRFile.getKey(type, code);
873             } else {
874                 key = getDateKey(-type - 1, code);
875             }
876             String v = item.getStringValue(key);
877             String rootValue = resolvedRoot.getStringValue(key);
878             if (v == null || v.equals(rootValue) && (exemplarTest == null || !exemplarTest.containsAll(rootValue))) {
879                 String englishValue = resolvedEnglish.getStringValue(key);
880                 String transValue;
881                 if (englishValue != null) {
882                     transValue = englishValue;
883                 } else {
884                     transValue = code;
885                 }
886                 missing.add(key, "TODO " + transValue);
887                 failureCount[0]++;
888             } else {
889                 logln("\t" + code + "\t" + v);
890             }
891         }
892     }
893 
894     /*
895      * void showTestStr() {
896      * LocaleIDParser lparser = new LocaleIDParser();
897      * Collection s = split(teststr,',', true, new ArrayList());
898      * for (Iterator it = s.iterator(); it.hasNext();) {
899      * String item = (String)it.next();
900      * lparser.set(item.replace('?', '_'));
901      * String region = lparser.getRegion();
902      * System.out.print(item.replace('?', '-') + " (" + getLocalization(region) + "), ");
903      * //System.out.print(getLocalization(region) + ", ");
904      * }
905      * }
906      * static String teststr =
907      * "en?AG, en?AI, en?AS, en?AU, en?IN, en?BB, en?BE, en?BM, en?BN, en?BS, en?BW, en?BZ, en?CA, en?CK, en?CM, en?DM, en?ER, en?ET, en?FJ, en?FK, en?FM, en?GB, en?GD, en?GH, en?GI, en?GM, en?GU, en?GY, en?HK, en?IE, en?IL, en?IO, en?JM, en?KE, en?KI, en?KN, en?KY, en?LC, en?LR, en?LS, en?MH, en?MP, en?MS, en?MT, en?MU, en?MW, en?NA, en?NF, en?NG, en?NR, en?NU, en?NZ, en?PG, en?PH, en?PK, en?PN, en?PR, en?PW, en?RW, en?SB, en?SC, en?SG, en?SH, en?SL, en?SO, en?SZ, en?TC, en?TK, en?TO, en?TT, en?UG, en?UM, en?US, en?VC, en?VG, en?VI, en?VU, en?WS, en?ZA, en?ZM, en?ZW"
908      * ;
909      */
910 
911     CldrUtility.CollectionTransform EnglishName = new CldrUtility.CollectionTransform() {
912         @Override
913         public Object transform(Object source) {
914             // TODO Auto-generated method stub
915             return getLocalization(source.toString()) + " (" + source + ")";
916         }
917     };
918 
919     CldrUtility.CollectionTransform EnglishCurrencyName = new CldrUtility.CollectionTransform() {
920         @Override
921         public Object transform(Object source) {
922             if (english == null) english = cldrFactory.make("en", true);
923             return english.getName("currency", source.toString()) + " (" + source + ")";
924         }
925     };
926 
927     /**
928      * Tests that the supplemental data is well-formed.
929      *
930      */
TestSupplementalData()931     public void TestSupplementalData() {
932         Map<String, Set<String>> language_scripts = new TreeMap<>();
933         Map<String, Set<String>> language_territories = new TreeMap<>();
934         Map<String, Set<String>> groups = new TreeMap<>();
935         Map<String, Set<String>> territory_currencies = new TreeMap<>();
936         Map<String, Map<String, String>> aliases = new TreeMap<>();
937         getSupplementalData(language_scripts, language_territories, groups, territory_currencies, aliases);
938         Set<String> sTerritories = new TreeSet<>();
939         for (Iterator<Set<String>> it = language_territories.values().iterator(); it.hasNext();) {
940             sTerritories.addAll(it.next());
941         }
942         StandardCodes sc = StandardCodes.make();
943         Set<String> fullTerritories = sc.getAvailableCodes("territory");
944         Set<String> fullLanguages = sc.getAvailableCodes("language");
945 
946         Set<String> allLanguages = new TreeSet<>(language_scripts.keySet());
947         allLanguages.addAll(language_territories.keySet());
948         for (Iterator<String> it = allLanguages.iterator(); it.hasNext();) {
949             Object language = it.next();
950             Set<String> scripts = language_scripts.get(language);
951             Set<String> territories = language_territories.get(language);
952             logln(EnglishName.transform(language)
953                 + " scripts: " + EnglishName.transform(scripts)
954                 + " territories: " + EnglishName.transform(territories));
955         }
956 
957         Map<String, String> changedLanguage = new TreeMap<>();
958         for (Iterator<String> it = fullLanguages.iterator(); it.hasNext();) {
959             String code = it.next();
960             List<String> data = sc.getFullData("language", code);
961             if (data.size() < 3) {
962                 System.out.println("data problem: " + data);
963                 continue;
964             }
965             String replacement = data.get(2);
966             if (!replacement.equals("")) {
967                 if (!replacement.equals("--")) changedLanguage.put(code, replacement);
968                 continue;
969             }
970         }
971 
972         // remove private use, deprecated, groups
973         Set<String> standardTerritories = new TreeSet<>();
974         Map<String, String> changedTerritory = new TreeMap<>();
975         for (Iterator<String> it = fullTerritories.iterator(); it.hasNext();) {
976             String code = it.next();
977             if (code.equals("200")) continue; // || code.equals("YU") || code.equals("PZ")
978             List<String> data = sc.getFullData("territory", code);
979             if (data.get(0).equals("PRIVATE USE")) continue;
980             if (!data.get(2).equals("")) {
981                 if (!data.get(2).equals("--")) changedTerritory.put(code, data.get(2));
982                 continue;
983             }
984             standardTerritories.add(code);
985         }
986         standardTerritories.removeAll(groups.keySet());
987 
988         if (!standardTerritories.containsAll(sTerritories)) {
989             TreeSet<String> extras = new TreeSet<>(sTerritories);
990             extras.removeAll(standardTerritories);
991             errln("Supplemental Language Territories contain illegal values: " + EnglishName.transform(extras));
992         }
993         if (!sTerritories.containsAll(standardTerritories)) {
994             TreeSet<String> extras = new TreeSet<>(standardTerritories);
995             extras.removeAll(sTerritories);
996             warnln("Missing Language Territories: " + EnglishName.transform(extras));
997         }
998 
999         // now test currencies
1000         logln("Check that no illegal territories are used");
1001         if (!standardTerritories.containsAll(territory_currencies.keySet())) {
1002             TreeSet<String> extras = new TreeSet<>(territory_currencies.keySet());
1003             extras.removeAll(fullTerritories);
1004             if (extras.size() != 0) errln("Currency info -- Illegal Territories: " + EnglishName.transform(extras));
1005             extras = new TreeSet<>(territory_currencies.keySet());
1006             extras.retainAll(fullTerritories);
1007             extras.removeAll(standardTerritories);
1008             if (extras.size() != 0) warnln("Currency info -- Archaic Territories: " + EnglishName.transform(extras));
1009         }
1010         logln("Check that no territories are missing");
1011         if (!territory_currencies.keySet().containsAll(standardTerritories)) {
1012             TreeSet<String> extras = new TreeSet<>(standardTerritories);
1013             extras.removeAll(territory_currencies.keySet());
1014             errln("Currency info -- Missing Territories: " + EnglishName.transform(extras));
1015         }
1016         Set<String> currencies = new TreeSet<>();
1017         for (Iterator<Set<String>> it = territory_currencies.values().iterator(); it.hasNext();) {
1018             currencies.addAll(it.next());
1019         }
1020         logln("Check that no illegal currencies are used");
1021         Set<String> legalCurrencies = new TreeSet<>(sc.getAvailableCodes("currency"));
1022         // first remove non-ISO
1023         for (Iterator<String> it = legalCurrencies.iterator(); it.hasNext();) {
1024             String code = it.next();
1025             List<String> data = sc.getFullData("currency", code);
1026             if ("X".equals(data.get(3))) it.remove();
1027         }
1028         if (!legalCurrencies.containsAll(currencies)) {
1029             TreeSet<String> extras = new TreeSet<>(currencies);
1030             extras.removeAll(legalCurrencies);
1031             errln("Currency info -- Illegal Currencies: " + EnglishCurrencyName.transform(extras));
1032         }
1033         logln("Check that there are no missing currencies");
1034         if (!currencies.containsAll(legalCurrencies)) {
1035             TreeSet<String> extras = new TreeSet<>(legalCurrencies);
1036             extras.removeAll(currencies);
1037             Map<String, Set<String>> failures = new TreeMap<>();
1038             for (Iterator<String> it = extras.iterator(); it.hasNext();) {
1039                 String code = it.next();
1040                 List<String> data = sc.getFullData("currency", code);
1041                 if (data.get(1).equals("ZZ")) continue;
1042                 String type = data.get(3) + "/" + data.get(1);
1043                 Set<String> s = failures.get(type);
1044                 if (s == null) failures.put(type, s = new TreeSet<>());
1045                 s.add(code);
1046             }
1047             for (Iterator<String> it = failures.keySet().iterator(); it.hasNext();) {
1048                 String type = it.next();
1049                 Set<String> s = failures.get(type);
1050                 warnln("Currency info -- Missing Currencies: " + type + "\t \u2192 " + EnglishCurrencyName.transform(s));
1051             }
1052         }
1053         logln("Missing English currency names");
1054         for (Iterator<String> it = legalCurrencies.iterator(); it.hasNext();) {
1055             String currency = it.next();
1056             String name = english.getName("currency", currency);
1057             if (name == null) {
1058                 String standardName = sc.getFullData("currency", currency).get(0);
1059                 logln("\t\t\t<currency type=\"" + currency + "\">");
1060                 logln("\t\t\t\t<displayName>" + standardName + "</displayName>");
1061                 logln("\t\t\t</currency>");
1062             }
1063         }
1064         logln("Check Aliases");
1065         for (Iterator<String> it = aliases.keySet().iterator(); it.hasNext();) {
1066             // the first part of the mapping had better not be in the standardTerritories
1067             String key = it.next();
1068             Map<String, String> submap = aliases.get(key);
1069             if (key.equals("territoryAlias")) {
1070                 checkEqual(key, submap, changedTerritory);
1071             } else if (key.equals("languageAlias")) {
1072                 for (Iterator<String> it2 = submap.keySet().iterator(); it2.hasNext();) {
1073                     String k = it2.next();
1074                     String value = submap.get(k);
1075                     if (value.indexOf("_") >= 0) it2.remove();
1076                 }
1077                 checkEqual(key, submap, changedLanguage);
1078             }
1079         }
1080     }
1081 
1082     /**
1083      *
1084      */
checkEqual(String title, Map map1, Map map2)1085     private void checkEqual(String title, Map map1, Map map2) {
1086         Set foo = new TreeSet(map1.keySet());
1087         foo.removeAll(map2.keySet());
1088         if (!foo.isEmpty()) errln("Extraneous Aliases: " + title + "\t" + foo);
1089         foo = new TreeSet(map2.keySet());
1090         foo.removeAll(map1.keySet());
1091         if (!foo.isEmpty()) errln("Missing Aliases: " + title + "\t" + foo);
1092         foo = map2.keySet();
1093         foo.retainAll(map1.keySet());
1094         for (Iterator it = foo.iterator(); it.hasNext();) {
1095             Object key = it.next();
1096             Object result1 = map1.get(key);
1097             Object result2 = map2.get(key);
1098             if (!result1.equals(result2))
1099                 errln("Missing Aliases: " + title + "\t" + key + "\t" + result1 + " != " + result2);
1100         }
1101     }
1102 
1103     /**
1104      * Test that the zone ids are well-formed.
1105      *
1106      */
TestZones()1107     public void TestZones() {
1108         StandardCodes sc = StandardCodes.make();
1109 
1110         Map<String, String> defaultNames = new TreeMap();
1111         Map<String, String> old_new = sc.getZoneLinkold_new();
1112         Set<String> core = sc.getZoneData().keySet();
1113         logln("Checking for collisions with last field");
1114         for (Iterator<String> it = core.iterator(); it.hasNext();) {
1115             String currentItem = it.next();
1116             String defaultName = TimezoneFormatter.getFallbackName(currentItem);
1117             String fullName = defaultNames.get(defaultName);
1118             if (fullName == null)
1119                 defaultNames.put(defaultName, currentItem);
1120             else {
1121                 errln("Collision between: " + currentItem + " AND " + fullName);
1122             }
1123         }
1124 
1125         logln("Checking that all links are TO canonical zones");
1126         Set<String> s = new TreeSet<>(old_new.values());
1127         s.removeAll(core);
1128         if (s.size() != 0) {
1129             errln("Links go TO zones that are not canonical! " + s);
1130         }
1131 
1132         logln("Checking that no links are FROM canonical zones");
1133         s = new TreeSet<>(core);
1134         s.retainAll(old_new.keySet());
1135         if (s.size() != 0) {
1136             errln("Links go FROM zones that are canonical! " + s);
1137         }
1138 
1139         logln("Checking that the zones with rule data are all canonical");
1140         Set<String> zonesWithRules = sc.getZone_rules().keySet();
1141         s.clear();
1142         s.addAll(zonesWithRules);
1143         s.removeAll(core);
1144         if (s.size() != 0) logln("Zones with rules that are not canonical: " + s);
1145 
1146         logln("Checking that the rule data are all canonical");
1147         s.clear();
1148         s.addAll(core);
1149         s.removeAll(zonesWithRules);
1150         s.removeAll(old_new.keySet());
1151         if (s.size() != 0) logln("Canonical zones that don't have rules or links: " + s);
1152 
1153         for (Iterator<String> it = old_new.keySet().iterator(); it.hasNext();) {
1154             String oldItem = it.next();
1155             logln("old: " + oldItem + "\tnew: " + old_new.get(oldItem));
1156         }
1157         Map<String, Set<String>> new_old = new TreeMap<>();
1158         for (Iterator<String> it = core.iterator(); it.hasNext();) {
1159             new_old.put(it.next(), new TreeSet<String>());
1160         }
1161         for (Iterator<String> it = old_new.keySet().iterator(); it.hasNext();) {
1162             String oldItem = it.next();
1163             String newItem = old_new.get(oldItem);
1164             Set<String> oldItems = new_old.get(newItem);
1165             if (oldItems == null) { // try recursing
1166                 logln("!!!!Skipping " + oldItem + " \u2192 " + newItem);
1167                 continue;
1168                 // new_old.put(oldOne, oldItems = new TreeSet());
1169             }
1170             oldItems.add(oldItem);
1171         }
1172         for (Iterator<String> it = new_old.keySet().iterator(); it.hasNext();) {
1173             String newOne = it.next();
1174             Set<String> oldItems = new_old.get(newOne);
1175             logln(newOne + "\t" + oldItems);
1176         }
1177     }
1178 
TestNarrowForms()1179     public void TestNarrowForms() {
1180         if (disableUntilLater("TestMinimalLocalization")) return;
1181 
1182         for (Iterator<String> it = locales.iterator(); it.hasNext();) {
1183             String locale = it.next();
1184             logln("Testing: " + getLocaleAndName(locale));
1185             BreakIterator bi = BreakIterator.getCharacterInstance(new ULocale(locale));
1186             CLDRFile item = cldrFactory.make(locale, false);
1187             // Walk through all the xpaths, adding to currentValues
1188             // Whenever two values for the same xpath are different, we remove from currentValues, and add to okValues
1189             for (Iterator<String> it2 = item.iterator(); it2.hasNext();) {
1190                 String xpath = it2.next();
1191                 if (xpath.indexOf("[@type=\"narrow\"]") >= 0) {
1192                     String value = item.getStringValue(xpath);
1193                     // logln("\tTesting: " + value + "\t path: " + xpath);
1194                     int end = getXGraphemeClusterBoundary(bi, value, 0);
1195                     if (end == value.length()) continue;
1196                     errln(getLocaleAndName(locale) + "\tillegal narrow value " + value + "\t path: " + xpath);
1197                     surveyInfo.add(locale + "\t" + xpath + "\t'" + value + "' is too wide for a \"narrow\" value.");
1198                 }
1199             }
1200         }
1201     }
1202 
1203     static final UnicodeSet XGRAPHEME = new UnicodeSet("[[:mark:][:grapheme_extend:]]");
1204     static final UnicodeSet DIGIT = new UnicodeSet("[:decimal_number:]");
1205 
getXGraphemeClusterBoundary(BreakIterator bi, String value, int start)1206     private int getXGraphemeClusterBoundary(BreakIterator bi, String value, int start) {
1207         if (value.length() <= 1) return 1;
1208 
1209         bi.setText(value);
1210         if (start != 0) bi.preceding(start + 1); // backup one
1211         int current = bi.next();
1212         // link any digits
1213         if (DIGIT.contains(UTF16.charAt(value, current - 1))) {
1214             current = DIGIT.findIn(value, current, true);
1215         }
1216         // continue collecting any additional characters that are M or grapheme extend
1217         return XGRAPHEME.findIn(value, current, true);
1218     }
1219 }
1220