1 /* 2 ****************************************************************************** 3 * Copyright (C) 2004, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ****************************************************************************** 6 */ 7 package org.unicode.cldr.test; 8 9 import java.io.File; 10 import java.io.IOException; 11 import java.io.PrintWriter; 12 import java.util.Arrays; 13 import java.util.Calendar; 14 import java.util.Collection; 15 import java.util.Date; 16 import java.util.HashMap; 17 import java.util.HashSet; 18 import java.util.Iterator; 19 import java.util.LinkedHashSet; 20 import java.util.List; 21 import java.util.Map; 22 import java.util.Set; 23 import java.util.TreeMap; 24 import java.util.TreeSet; 25 26 import org.unicode.cldr.draft.FileUtilities; 27 import org.unicode.cldr.test.DisplayAndInputProcessor.NumericType; 28 import org.unicode.cldr.util.CLDRFile; 29 import org.unicode.cldr.util.CLDRPaths; 30 import org.unicode.cldr.util.CldrUtility; 31 import org.unicode.cldr.util.Factory; 32 import org.unicode.cldr.util.LanguageTagParser; 33 import org.unicode.cldr.util.SimpleFactory; 34 import org.unicode.cldr.util.StandardCodes; 35 import org.unicode.cldr.util.TimezoneFormatter; 36 import org.unicode.cldr.util.XPathParts; 37 import org.xml.sax.SAXException; 38 39 import com.ibm.icu.dev.test.TestFmwk; 40 import com.ibm.icu.text.BreakIterator; 41 import com.ibm.icu.text.DecimalFormat; 42 import com.ibm.icu.text.NumberFormat; 43 import com.ibm.icu.text.UTF16; 44 import com.ibm.icu.text.UnicodeSet; 45 import com.ibm.icu.util.ULocale; 46 47 /** 48 * Initial version of CLDR tests. Each test is named TextXXX. To run all the tests, use the options 49 * <blockquote>-nothrow</blockquote> 50 * To run a particular set of tests, include their names, like 51 * <blockquote>-nothrow TestForIllegalAttributeValues TestMinimalLocalization</blockquote> 52 * To show more information (logln), add -verbose 53 * <p> 54 * There are some environment variables that can be used with the test. <br> 55 * -DSHOW_FILES=<anything> shows all create/open of files. <br> 56 * -DXML_MATCH=<regular expression> skips all locales that don't match the regular expression <br> 57 * -DXML_MAIN_DIR=<filesystem directory> resets to a different main directory (eg not cldr/common/main. For example, 58 * some of the tools generate into a locale directory like -DXML_MAIN_DIR=C:\Unicode-CVS2\cldr\common\gen\main\ so this 59 * can be used to check that directory. <br> 60 * -DSKIP_DRAFT=<boolean> skips draft locales if <boolean> is a string starting with T or t 61 */ 62 public class CLDRTest extends TestFmwk { 63 /** 64 * privates 65 */ 66 private static String MATCH; 67 private static String MAIN_DIR; 68 private static boolean SKIP_DRAFT; 69 private Set<String> locales; 70 private Set<String> languageLocales; 71 private Factory cldrFactory; 72 private CLDRFile resolvedRoot; 73 private CLDRFile resolvedEnglish; 74 private final UnicodeSet commonAndInherited = new UnicodeSet( 75 "[[:script=common:][:script=inherited:][:alphabetic=false:]]"); 76 private static final String[] WIDTHS = { "narrow", "wide", "abbreviated", "short" }; 77 private static final String[] MONTHORDAYS = { "day", "month" }; 78 private Map<String, String> localeNameCache = new HashMap<String, String>(); 79 private CLDRFile english = null; 80 81 private Set<String> surveyInfo = new TreeSet<String>(); 82 83 /** 84 * TestFmwk boilerplate 85 */ main(String[] args)86 public static void main(String[] args) throws Exception { 87 MATCH = System.getProperty("XML_MATCH"); 88 if (MATCH == null) 89 MATCH = ".*"; 90 else 91 System.out.println("Resetting MATCH:" + MATCH); 92 MAIN_DIR = System.getProperty("XML_MAIN_DIR"); 93 if (MAIN_DIR == null) 94 MAIN_DIR = CLDRPaths.MAIN_DIRECTORY; 95 else 96 System.out.println("Resetting MAIN_DIR:" + MAIN_DIR); 97 SKIP_DRAFT = System.getProperty("XML_SKIP_DRAFT") != null; 98 if (SKIP_DRAFT) System.out.println("Skipping Draft locales"); 99 100 double deltaTime = System.currentTimeMillis(); 101 new CLDRTest().run(args); 102 deltaTime = System.currentTimeMillis() - deltaTime; 103 System.out.println("Seconds: " + deltaTime / 1000); 104 105 } 106 TestZZZZHack()107 public void TestZZZZHack() throws IOException { 108 // hack to get file written at the end of run. 109 PrintWriter surveyFile = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "surveyInfo.txt"); 110 for (String s : surveyInfo) { 111 surveyFile.println(s); 112 } 113 surveyFile.close(); 114 } 115 116 /** 117 * TestFmwk boilerplate 118 */ CLDRTest()119 public CLDRTest() throws SAXException, IOException { 120 // TODO parameterize the directory and filter 121 cldrFactory = Factory.make(MAIN_DIR, MATCH); 122 // CLDRKey.main(new String[]{"-mde.*"}); 123 locales = cldrFactory.getAvailable(); 124 languageLocales = cldrFactory.getAvailableLanguages(); 125 resolvedRoot = cldrFactory.make("root", true); 126 /* 127 * PrintWriter out = FileUtilities.openUTF8Writer(Utility.GEN_DIRECTORY + "resolved/", "root.xml"); 128 * CLDRFile temp = (CLDRFile) resolvedRoot.clone(); 129 * temp.write(out); 130 * out.close(); 131 */ 132 resolvedEnglish = cldrFactory.make("en", true); 133 } 134 135 /** 136 * Check to make sure that the currency formats are kosher. 137 */ TestCurrencyFormats()138 public void TestCurrencyFormats() { 139 // String decimal = "//ldml/numbers/decimalFormats/decimalFormatLength/decimalFormat[@type=\"standard\"]/"; 140 // String currency = "//ldml/numbers/currencyFormats/currencyFormatLength/currencyFormat[@type=\"standard\"]/"; 141 for (String locale : locales) { 142 boolean isPOSIX = locale.indexOf("POSIX") >= 0; 143 logln("Testing: " + locale); 144 CLDRFile item = cldrFactory.make(locale, false); 145 for (String xpath : item) { 146 NumericType type = NumericType.getNumericType(xpath); 147 if (type == NumericType.NOT_NUMERIC) continue; 148 String value = item.getStringValue(xpath); 149 // at this point, we only have currency formats 150 String pattern = DisplayAndInputProcessor.getCanonicalPattern(value, type, isPOSIX); 151 if (!pattern.equals(value)) { 152 String draft = ""; 153 if (item.getFullXPath(xpath).indexOf("[@draft=\"unconfirmed\"]") >= 0) draft = " [draft]"; 154 assertEquals(getLocaleAndName(locale) + draft + " " + type + " pattern incorrect", pattern, value); 155 } 156 } 157 } 158 } 159 160 /** 161 * Internal class 162 */ 163 private static class ValueCount { 164 int count = 1; 165 String value; 166 String fullxpath; 167 } 168 169 /** 170 * Verify that if all the children of a language locale do not have the same value for the same key. 171 */ TestCommonChildren()172 public void TestCommonChildren() { 173 if (disableUntilLater("TestCommonChildren")) return; 174 175 Map<String, ValueCount> currentValues = new TreeMap<String, ValueCount>(); 176 Set<String> okValues = new TreeSet<String>(); 177 178 for (String parent : languageLocales) { 179 logln("Testing: " + parent); 180 currentValues.clear(); 181 okValues.clear(); 182 Set<String> availableWithParent = cldrFactory.getAvailableWithParent(parent, true); 183 for (String locale : availableWithParent) { 184 logln("\tTesting: " + locale); 185 CLDRFile item = cldrFactory.make(locale, false); 186 // Walk through all the xpaths, adding to currentValues 187 // Whenever two values for the same xpath are different, we remove from currentValues, and add to 188 // okValues 189 for (String xpath : item) { 190 if (okValues.contains(xpath)) continue; 191 if (xpath.startsWith("//ldml/identity/")) continue; // skip identity elements 192 String v = item.getStringValue(xpath); 193 ValueCount last = currentValues.get(xpath); 194 if (last == null) { 195 ValueCount vc = new ValueCount(); 196 vc.value = v; 197 vc.fullxpath = item.getFullXPath(xpath); 198 currentValues.put(xpath, vc); 199 } else if (v.equals(last.value)) { 200 last.count++; 201 } else { 202 okValues.add(xpath); 203 currentValues.remove(xpath); 204 } 205 } 206 // at the end, only the keys left in currentValues are (possibly) faulty 207 // they are actually bad IFF either 208 // (a) the count is equal to the total (thus all children are the same), or 209 // (b) their value is the same as the parent's resolved value (thus all children are the same or the 210 // same 211 // as the inherited parent value). 212 } 213 if (currentValues.size() == 0) continue; 214 int size = availableWithParent.size(); 215 CLDRFile parentCLDR = cldrFactory.make(parent, true); 216 for (String xpath : currentValues.keySet()) { 217 ValueCount vc = currentValues.get(xpath); 218 if (vc.count == size || (vc.value.equals(parentCLDR.getStringValue(xpath)) 219 && vc.fullxpath.equals(parentCLDR.getStringValue(xpath)))) { 220 String draft = ""; 221 if (vc.fullxpath.indexOf("[@draft=\"unconfirmed\"]") >= 0) draft = " [draft]"; 222 String count = (vc.count == size ? "" : vc.count + "/") + size; 223 warnln(getLocaleAndName(parent) + draft + 224 "\tall children (" + count + ") have same value for:\t" 225 + xpath + ";\t" + vc.value); 226 } 227 } 228 } 229 } 230 231 static String[] EXEMPLAR_SKIPS = { "/hourFormat", "/exemplarCharacters", "/pattern", "/localizedPatternChars" }; 232 233 /** 234 * Check that the exemplars include all characters in the data. 235 */ TestThatExemplarsContainAll()236 public void TestThatExemplarsContainAll() { 237 UnicodeSet allExemplars = new UnicodeSet(); 238 if (disableUntilLater("TestThatExemplarsContainAll")) return; 239 Set<String> counts = new TreeSet<String>(); 240 int totalCount = 0; 241 UnicodeSet localeMissing = new UnicodeSet(); 242 for (String locale : locales) { 243 if (locale.equals("root")) continue; 244 CLDRFile resolved = cldrFactory.make(locale, false); // FIX LATER 245 UnicodeSet exemplars = getFixedExemplarSet(locale, resolved); 246 CLDRFile plain = cldrFactory.make(locale, false); 247 int count = 0; 248 localeMissing.clear(); 249 file: for (String xpath : plain) { 250 for (int i = 0; i < EXEMPLAR_SKIPS.length; ++i) { 251 if (xpath.indexOf(EXEMPLAR_SKIPS[i]) > 0) continue file; // skip some items. 252 } 253 if (SKIP_DRAFT) { 254 String fullxpath = plain.getFullXPath(xpath); 255 if (fullxpath.indexOf("[@draft=\"unconfirmed\"") > 0) continue; 256 } 257 if (xpath.startsWith("//ldml/posix/messages")) continue; 258 String value = plain.getStringValue(xpath); 259 allExemplars.addAll(value); 260 if (!exemplars.containsAll(value)) { 261 count++; 262 UnicodeSet missing = new UnicodeSet().addAll(value).removeAll(exemplars); 263 localeMissing.addAll(missing); 264 logln(getLocaleAndName(locale) + "\t" + xpath + "\t<" + value + "> contains " + missing 265 + ", not in exemplars"); 266 surveyInfo.add(locale + "\t" + xpath + "\t'" + value + "' contains characters " 267 + missing.toPattern(false) + ", which are not in exemplars"); 268 } 269 } 270 NumberFormat nf = new DecimalFormat("000"); 271 if (count != 0) { 272 totalCount += count; 273 counts.add(nf.format(count) + "\t" + getLocaleAndName(locale) + "\t" + localeMissing); 274 } 275 if (localeMissing.size() != 0) { 276 errln(getLocaleAndName(locale) + "\t uses " + localeMissing + ", not in exemplars"); 277 } 278 } 279 for (String c : counts) { 280 logln(c); 281 } 282 logln("Total Count: " + totalCount); 283 System.out.println("All exemplars: " + allExemplars.toPattern(true)); 284 } 285 286 // Get Date-Time in milliseconds getDateTimeinMillis(int year, int month, int date)287 private static long getDateTimeinMillis(int year, int month, int date) { 288 Calendar cal = Calendar.getInstance(); 289 cal.set(year, month, date); 290 return cal.getTimeInMillis(); 291 } 292 293 static final long disableDate = getDateTimeinMillis(2005, 6 - 1, 3); 294 295 /** 296 * 297 */ disableUntilLater(String string)298 private boolean disableUntilLater(String string) { 299 if (new Date().getTime() >= disableDate) return false; 300 warnln("Disabling " + string + " until " + new Date(disableDate)); 301 return true; 302 } 303 304 /** 305 * Internal 306 */ getFixedExemplarSet(String locale, CLDRFile cldrfile)307 private UnicodeSet getFixedExemplarSet(String locale, CLDRFile cldrfile) { 308 UnicodeSet exemplars = getExemplarSet(cldrfile, ""); 309 if (exemplars.size() == 0) { 310 errln(getLocaleAndName(locale) + " has empty exemplar set"); 311 } 312 exemplars.addAll(getExemplarSet(cldrfile, "standard")); 313 UnicodeSet auxiliary = getExemplarSet(cldrfile, "auxiliary"); 314 if (exemplars.containsSome(auxiliary)) { 315 errln(getLocaleAndName(locale) + "Auxiliary & main exemplars should be disjoint, but overlap with " + 316 new UnicodeSet(exemplars).retainAll(auxiliary) + 317 ": change auxiliary to " + auxiliary.removeAll(exemplars)); 318 } 319 exemplars.addAll(auxiliary); 320 exemplars.addAll(commonAndInherited); 321 return exemplars; 322 } 323 324 /** 325 * @return Gets an exemplar set. Also verifies that the set contains no properties. 326 */ getExemplarSet(CLDRFile cldrfile, String type)327 public UnicodeSet getExemplarSet(CLDRFile cldrfile, String type) { 328 if (type.length() != 0) type = "[@type=\"" + type + "\"]"; 329 String v = cldrfile.getStringValue("//ldml/characters/exemplarCharacters" + type); 330 if (v == null) return new UnicodeSet(); 331 String pattern = v; 332 if (pattern.indexOf("[:") >= 0 || pattern.indexOf("\\p{") > 0) { 333 errln(getLocaleName(cldrfile.getLocaleID()) + " exemplar pattern contains property: " + pattern); 334 } 335 try { 336 UnicodeSet result = new UnicodeSet(v, UnicodeSet.CASE); 337 result.remove(0x20); 338 return result; 339 } catch (RuntimeException e) { 340 e.printStackTrace(); 341 errln(getLocaleAndName(cldrfile.getLocaleID()) + " has illegal exemplar set: <" + v + ">"); 342 return new UnicodeSet(); 343 } 344 // if (type.length() != 0) System.out.println("fetched set for " + type); 345 } 346 getLocaleAndName(String locale)347 public String getLocaleAndName(String locale) { 348 return locale + " (" + getLocaleName(locale) + ")"; 349 } 350 351 /** 352 * @return the ID plus its localization (for language, script, and territory IDs only) 353 */ getIDAndLocalization(String id)354 public String getIDAndLocalization(String id) { 355 return id + " " + getLocalization(id); 356 } 357 358 /** 359 * @return the localization (for language, script, and territory IDs only) 360 */ getLocalization(String id)361 public String getLocalization(String id) { 362 if (english == null) english = cldrFactory.make("en", true); 363 if (id.length() == 0) return "?"; 364 // pick on basis of case 365 char ch = id.charAt(0); 366 if ('a' <= ch && ch <= 'z') return getName(english, "languages/language", id); 367 if (id.length() == 4 && 'A' <= ch && ch <= 'Z') return getName(english, "scripts/script", id); 368 return getName(english, "territories/territory", id); 369 } 370 371 /** 372 * Internal 373 */ getIDAndLocalization(Set<String> missing)374 private String getIDAndLocalization(Set<String> missing) { 375 StringBuffer buffer = new StringBuffer(); 376 for (String next : missing) { 377 if (buffer.length() != 0) buffer.append("; "); 378 buffer.append(getIDAndLocalization(next)); 379 } 380 return buffer.toString(); 381 } 382 getLocaleName(String locale)383 public String getLocaleName(String locale) { 384 String name = localeNameCache.get(locale); 385 if (name != null) return name; 386 if (english == null) english = cldrFactory.make("en", true); 387 String result = english.getName(locale); 388 /* 389 * Collection c = Utility.splitList(locale, '_', false, null); 390 * String[] pieces = new String[c.size()]; 391 * c.toArray(pieces); 392 * int i = 0; 393 * String result = getName(english, "languages/language", pieces[i++]); 394 * if (pieces[i].length() == 0) return result; 395 * if (pieces[i].length() == 4) { 396 * result += " " + getName(english, "scripts/script", pieces[i++]); 397 * } 398 * if (pieces[i].length() == 0) return result; 399 * result += " " + getName(english, "territories/territory", pieces[i++]); 400 * if (pieces[i].length() == 0) return result; 401 * result += " " + getName(english, "variant/variants", pieces[i++]); 402 */ 403 localeNameCache.put(locale, result); 404 return result; 405 } 406 407 /** 408 * Internal 409 */ getName(CLDRFile english, String kind, String type)410 private String getName(CLDRFile english, String kind, String type) { 411 String v = english.getStringValue("//ldml/localeDisplayNames/" + kind + "[@type=\"" + type + "\"]"); 412 if (v == null) return "<" + type + ">"; 413 return v; 414 } 415 416 /** 417 * Make sure we are only using attribute values that are in RFC3066bis, the Olson database (with aliases removed) 418 * or ISO 4217 419 * 420 * @throws IOException 421 */ TestForIllegalAttributeValues()422 public void TestForIllegalAttributeValues() { 423 // check for illegal attribute values that are not in the DTD 424 Map<String, Set<String>> result = new TreeMap<String, Set<String>>(); 425 Map<String, Set<String>> totalResult = new TreeMap<String, Set<String>>(); 426 for (String locale : locales) { 427 logln("Testing: " + locale); 428 CLDRFile item = cldrFactory.make(locale, false); 429 result.clear(); 430 Set<String> xpathFailures = null; // don't collect 431 // XPathParts parts; 432 // String xpath; 433 // CLDRFile.StringValue value; 434 // String element; 435 // Map attributes; 436 checkAttributeValidity(item, result, xpathFailures); 437 438 // now show 439 //String localeName = getLocaleAndName(locale); 440 for (Iterator<String> it3 = result.keySet().iterator(); it3.hasNext();) { 441 String code = it3.next(); 442 Set<String> avalues = result.get(code); 443 errln(getLocaleAndName(locale) + "\tillegal attribute value for " + code + ", value:\t" + show(avalues)); 444 Set<String> totalvalues = totalResult.get(code); 445 if (totalvalues == null) totalResult.put(code, totalvalues = new TreeSet<String>()); 446 totalvalues.addAll(avalues); 447 } 448 } 449 for (Iterator<String> it3 = totalResult.keySet().iterator(); it3.hasNext();) { 450 String code = it3.next(); 451 Set<String> avalues = totalResult.get(code); 452 errln("All illegal attribute values for " + code + ", value:\t" + show(avalues)); 453 } 454 } 455 456 /** 457 * Tests whether the display names have any collisions, e.g. if in the fully resolved 458 * locale $ is used for both USD and UAD. 459 * 460 */ TestDisplayNameCollisions()461 public void TestDisplayNameCollisions() { 462 if (disableUntilLater("TestDisplayNameCollisions")) return; 463 464 Map<String, String>[] maps = new HashMap[CLDRFile.LIMIT_TYPES]; 465 for (int i = 0; i < maps.length; ++i) 466 maps[i] = new HashMap<String, String>(); 467 Set<String> collisions = new TreeSet<String>(); 468 for (Iterator<String> it = locales.iterator(); it.hasNext();) { 469 String locale = it.next(); 470 CLDRFile item = cldrFactory.make(locale, true); 471 for (int i = 0; i < maps.length; ++i) 472 maps[i].clear(); 473 collisions.clear(); 474 475 for (Iterator<String> it2 = item.iterator(); it2.hasNext();) { 476 String xpath = it2.next(); 477 int nameType = CLDRFile.getNameType(xpath); 478 if (nameType < 0) continue; 479 String value = item.getStringValue(xpath); 480 String xpath2 = maps[nameType].get(value); 481 if (xpath2 == null) { 482 maps[nameType].put(value, xpath); 483 continue; 484 } 485 collisions.add(CLDRFile.getNameTypeName(nameType) + "\t" + value + "\t" + xpath + "\t" + xpath2); 486 surveyInfo.add(locale + "\t" + xpath + "\t'" + value + "' is a duplicate of what is in " + xpath2); 487 } 488 String name = getLocaleAndName(locale) + "\t"; 489 for (Iterator<String> it2 = collisions.iterator(); it2.hasNext();) { 490 errln(name + it2.next()); 491 } 492 } 493 } 494 495 /** 496 * Checks the validity of attributes, based on StandardCodes. 497 * The invalid codes are added to badCodes, and the failing xpaths are added to xpathFailures. 498 * 499 * @param item 500 * @param badCodes 501 * @param xpathFailures 502 */ checkAttributeValidity(CLDRFile item, Map<String, Set<String>> badCodes, Set<String> xpathFailures)503 public static void checkAttributeValidity(CLDRFile item, Map<String, Set<String>> badCodes, Set<String> xpathFailures) { 504 XPathParts parts = new XPathParts(null, null); 505 for (Iterator<String> it2 = item.iterator(); it2.hasNext();) { 506 String xpath = it2.next(); 507 parts.set(item.getFullXPath(xpath)); 508 for (int i = 0; i < parts.size(); ++i) { 509 if (parts.getAttributeCount(i) == 0) continue; 510 String element = parts.getElement(i); 511 Map<String, String> attributes = parts.getAttributes(i); 512 for (Iterator<String> it3 = attributes.keySet().iterator(); it3.hasNext();) { 513 String attribute = it3.next(); 514 String avalue = attributes.get(attribute); 515 checkValidity(xpath, element, attribute, avalue, badCodes, xpathFailures); 516 } 517 } 518 } 519 } 520 521 /** 522 * Internal 523 */ show(Collection<String> avalues)524 private String show(Collection<String> avalues) { 525 StringBuffer result = new StringBuffer("{"); 526 boolean first = true; 527 for (Iterator<String> it3 = avalues.iterator(); it3.hasNext();) { 528 if (first) 529 first = false; 530 else 531 result.append(", "); 532 result.append(it3.next().toString()); 533 } 534 result.append("}"); 535 return result.toString(); 536 } 537 538 /** 539 * Internal function 540 */ checkValidity(String xpath, String element, String attribute, String avalue, Map<String, Set<String>> results, Set<String> xpathsFailing)541 private static void checkValidity(String xpath, String element, String attribute, String avalue, Map<String, Set<String>> results, 542 Set<String> xpathsFailing) { 543 StandardCodes codes = StandardCodes.make(); 544 if (attribute.equals("type")) { 545 boolean checkReplacements = xpath.indexOf("/identity") < 0; 546 if (element.equals("currency")) 547 checkCodes(xpath, "currency", avalue, codes, results, xpathsFailing, checkReplacements); 548 else if (element.equals("script")) 549 checkCodes(xpath, "script", avalue, codes, results, xpathsFailing, checkReplacements); 550 else if (element.equals("territory")) 551 checkCodes(xpath, "territory", avalue, codes, results, xpathsFailing, checkReplacements); 552 else if (element.equals("language")) 553 checkCodes(xpath, "language", avalue, codes, results, xpathsFailing, checkReplacements); 554 else if (element.equals("zone")) 555 checkCodes(xpath, "tzid", avalue, codes, results, xpathsFailing, checkReplacements); 556 } 557 } 558 559 /** 560 * Internal function 561 * 562 * @param checkReplacements 563 * TODO 564 */ 565 private static void checkCodes(String xpath, String code, String avalue, StandardCodes codes, Map<String, Set<String>> results, 566 Set<String> xpathFailures, boolean checkReplacements) { 567 // ok if code is found AND it has no replacement 568 if (codes.getData(code, avalue) != null 569 && (!checkReplacements || codes.getReplacement(code, avalue) == null)) return; 570 571 if (xpathFailures != null) xpathFailures.add(xpath); 572 if (results == null) return; 573 Set<String> s = results.get(code); 574 if (s == null) { 575 s = new TreeSet<String>(); 576 results.put(code, s); 577 } 578 s.add(avalue); 579 } 580 581 /** 582 * Verify that a small set of locales (currently just English) has everything translated. 583 * 584 * @throws IOException 585 */ 586 public void TestCompleteLocales() { 587 // just test English for now 588 if (english == null) english = cldrFactory.make("en", true); 589 checkTranslatedCodes(english); 590 } 591 592 /** 593 * Tests that the file contains codes for all main display name ids: language, script, territory, tzid, currency. 594 */ 595 private void checkTranslatedCodes(CLDRFile cldrfile) { 596 StandardCodes codes = StandardCodes.make(); 597 checkTranslatedCode(cldrfile, codes, "currency", "//ldml/numbers/currencies/currency", "/displayName"); 598 // can't check timezones for English. 599 // checkTranslatedCode(cldrfile, codes, "tzid", "//ldml/dates/timeZoneNames/zone", ""); 600 checkTranslatedCode(cldrfile, codes, "language", "//ldml/localeDisplayNames/languages/language", ""); 601 checkTranslatedCode(cldrfile, codes, "script", "//ldml/localeDisplayNames/scripts/script", ""); 602 checkTranslatedCode(cldrfile, codes, "territory", "//ldml/localeDisplayNames/territories/territory", ""); 603 checkTranslatedCode(cldrfile, codes, "variant", "//ldml/localeDisplayNames/variants/variant", ""); 604 } 605 606 /** 607 * @param codes 608 * @param type 609 * @param prefix 610 * @param postfix 611 * TODO 612 */ 613 private void checkTranslatedCode(CLDRFile cldrfile, StandardCodes codes, String type, String prefix, String postfix) { 614 615 // TODO, expand to other languages 616 Map<String, Set<String>> completionExceptions = new HashMap<String, Set<String>>(); 617 Set<String> scriptExceptions = new HashSet<String>(); 618 scriptExceptions.add("Cham"); 619 scriptExceptions.add("Thai"); 620 completionExceptions.put("script", scriptExceptions); 621 622 Set<String> codeItems = codes.getGoodAvailableCodes(type); 623 int count = 0; 624 Set<String> exceptions = completionExceptions.get(type); 625 for (String code : codeItems) { 626 String rfcname = codes.getData(type, code); 627 // if (rfcname.equals("ZZ")) continue; 628 ++count; 629 if (rfcname.equals("PRIVATE USE")) continue; 630 String fullFragment = prefix + "[@type=\"" + code + "\"]" + postfix; 631 String v = cldrfile.getStringValue(fullFragment); 632 if (v == null) { 633 errln("Missing translation for:\t<" + type + " type=\"" + code + "\">" + rfcname + "</" + type + ">"); 634 continue; 635 } 636 String translation = v; 637 if (translation.equals(code)) { 638 if (exceptions != null && exceptions.contains(code)) continue; 639 errln("Translation = code for:\t<" + type + " type=\"" + code + "\">" + rfcname + "</" + type + ">"); 640 continue; 641 } 642 if (false && !translation.equalsIgnoreCase(rfcname)) { 643 warnln(type + " translation differs from RFC, check: " + code + "\trfc: " + rfcname + "\tcldr: " 644 + translation); 645 } 646 } 647 logln("Total " + type + ":\t" + count); 648 } 649 650 // <territoryContainment><group type="001" contains="002 009 019 142 150"/> 651 // <languageData><language type="af" scripts="Latn" territories="ZA"/> 652 void getSupplementalData(Map<String, Set<String>> language_scripts, Map<String, Set<String>> language_territories, 653 Map<String, Set<String>> group_territory, 654 Map<String, Set<String>> territory_currencies, Map<String, Map<String, String>> aliases) { 655 boolean SHOW = false; 656 Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); 657 CLDRFile supp = cldrFactory.make(CLDRFile.SUPPLEMENTAL_NAME, false); 658 XPathParts parts = new XPathParts(new UTF16.StringComparator(), null); 659 for (Iterator<String> it = supp.iterator(); it.hasNext();) { 660 String path = it.next(); 661 try { 662 parts.set(supp.getFullXPath(path)); 663 Map<String, String> m; 664 String type = ""; 665 if (aliases != null && parts.findElement("alias") >= 0) { 666 m = parts.findAttributes(type = "languageAlias"); 667 if (m == null) m = parts.findAttributes(type = "territoryAlias"); 668 if (m != null) { 669 Map top = aliases.get(type); 670 if (top == null) aliases.put(type, top = new TreeMap()); 671 top.put(m.get("type"), m.get("replacement")); 672 } 673 } 674 if (territory_currencies != null) { 675 m = parts.findAttributes("region"); 676 if (m != null) { 677 String region = m.get("iso3166"); 678 Set s = territory_currencies.get(region); 679 if (s == null) territory_currencies.put(region, s = new LinkedHashSet()); 680 m = parts.findAttributes("currency"); 681 if (m == null) { 682 warnln("missing currency for region: " + path); 683 continue; 684 } 685 String currency = m.get("iso4217"); 686 s.add(currency); 687 m = parts.findAttributes("alternate"); 688 String alternate = m == null ? null : (String) m.get("iso4217"); 689 if (alternate != null) s.add(alternate); 690 continue; 691 } 692 } 693 m = parts.findAttributes("group"); 694 if (m != null) { 695 if (group_territory == null) continue; 696 type = m.get("type"); 697 String contains = m.get("contains"); 698 group_territory.put(type, new TreeSet(CldrUtility.splitList(contains, ' ', true))); 699 continue; 700 } 701 m = parts.findAttributes("language"); 702 if (m == null) continue; 703 String language = m.get("type"); 704 String scripts = m.get("scripts"); 705 if (scripts == null) 706 language_scripts.put(language, new TreeSet<String>()); 707 else { 708 language_scripts.put(language, new TreeSet<String>(CldrUtility.splitList(scripts, ' ', true))); 709 if (SHOW) 710 System.out.println(getIDAndLocalization(language) + "\t\t" 711 + getIDAndLocalization(language_scripts.get(language))); 712 } 713 String territories = m.get("territories"); 714 if (territories == null) 715 language_territories.put(language, new TreeSet<String>()); 716 else { 717 language_territories.put(language, new TreeSet<String>(CldrUtility.splitList(territories, ' ', true))); 718 if (SHOW) 719 System.out.println(getIDAndLocalization(language) + "\t\t" 720 + getIDAndLocalization(language_territories.get(language))); 721 } 722 } catch (RuntimeException e) { 723 throw (IllegalArgumentException) new IllegalArgumentException("Failure with: " + path).initCause(e); 724 } 725 } 726 } 727 728 /** 729 * Verify that the minimal localizations are present. 730 */ TestMinimalLocalization()731 public void TestMinimalLocalization() throws IOException { 732 if (disableUntilLater("TestMinimalLocalization")) return; 733 734 boolean testDraft = false; 735 Map<String, Set<String>> language_scripts = new HashMap<String, Set<String>>(); 736 Map<String, Set<String>> language_territories = new HashMap<String, Set<String>>(); 737 getSupplementalData(language_scripts, language_territories, null, null, null); 738 LanguageTagParser localIDParser = new LanguageTagParser(); 739 // see http://oss.software.ibm.com/cvs/icu/~checkout~/locale/docs/design/minimal_requirements.htm 740 int[] failureCount = new int[1]; 741 int[] warningCount = new int[1]; 742 for (Iterator<String> it = languageLocales.iterator(); it.hasNext();) { 743 String locale = it.next(); 744 if (locale.equals("root")) continue; 745 // if (!locale.equals("zh_Hant")) continue; 746 747 CLDRFile item = cldrFactory.make(locale, true); 748 if (!testDraft && item.isDraft()) { 749 logln(getLocaleAndName(locale) + "\tskipping draft"); 750 continue; 751 } 752 UnicodeSet exemplars = getFixedExemplarSet(locale, item); 753 CLDRFile missing = SimpleFactory.makeFile(locale); 754 failureCount[0] = 0; 755 warningCount[0] = 0; 756 localIDParser.set(locale); 757 String language = localIDParser.getLanguage(); 758 logln("Testing: " + locale); 759 // languages 760 Set<String> languages = new TreeSet<String>(CldrUtility.MINIMUM_LANGUAGES); 761 languages.add(language); 762 // LANGUAGE_NAME = 0, SCRIPT_NAME = 1, TERRITORY_NAME = 2, VARIANT_NAME = 3, 763 // CURRENCY_NAME = 4, CURRENCY_SYMBOL = 5, TZID = 6 764 765 checkForItems(item, languages, CLDRFile.LANGUAGE_NAME, missing, failureCount, null); 766 767 /* 768 * checkTranslatedCode(cldrfile, codes, "currency", "//ldml/numbers/currencies/currency"); 769 * checkTranslatedCode(cldrfile, codes, "tzid", "//ldml/dates/timeZoneNames/zone"); 770 * checkTranslatedCode(cldrfile, codes, "variant", "//ldml/localeDisplayNames/variants/variant"); 771 */ 772 773 Set<String> scripts = new TreeSet<String>(); 774 scripts.add("Latn"); 775 Set<String> others = language_scripts.get(language); 776 if (others != null) scripts.addAll(others); 777 checkForItems(item, scripts, CLDRFile.SCRIPT_NAME, missing, failureCount, null); 778 779 Set<String> countries = new TreeSet<String>(CldrUtility.MINIMUM_TERRITORIES); 780 others = language_territories.get(language); 781 if (others != null) countries.addAll(others); 782 checkForItems(item, countries, CLDRFile.TERRITORY_NAME, missing, failureCount, null); 783 784 Set<String> currencies = new TreeSet<String>(); 785 StandardCodes sc = StandardCodes.make(); 786 for (Iterator<String> it2 = countries.iterator(); it2.hasNext();) { 787 String country = it2.next(); 788 Set<String> countryCurrencies = sc.getMainCurrencies(country); 789 if (countryCurrencies == null) { 790 errln("Internal Error: no currencies for " + country + ", locale: " + locale); 791 } else { 792 currencies.addAll(countryCurrencies); 793 } 794 } 795 checkForItems(item, currencies, CLDRFile.CURRENCY_NAME, missing, failureCount, null); 796 checkForItems(item, currencies, CLDRFile.CURRENCY_SYMBOL, missing, failureCount, exemplars); 797 798 // context=format and width=wide; context=stand-alone & width=abbreviated 799 Set<String> months = new TreeSet<String>(); 800 for (int i = 1; i <= 12; ++i) 801 months.add(i + ""); 802 Set<String> days = new TreeSet<String>(Arrays.asList(new String[] { "sun", "mon", "tue", "wed", "thu", "fri", "sat" })); 803 for (int i = -7; i < 0; ++i) { 804 checkForItems(item, (i < -4 ? months : days), i, missing, failureCount, null); 805 } 806 807 String filename = "missing_" + locale + ".xml"; 808 if (failureCount[0] > 0 || warningCount[0] > 0) { 809 PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY + "missing/", filename); 810 missing.write(out); 811 out.close(); 812 // String s = getIDAndLocalization(missing); 813 String message = "missing localizations, creating file" 814 + new File(CLDRPaths.GEN_DIRECTORY + "missing/", filename).getCanonicalPath(); 815 if (failureCount[0] > 0) 816 warnln(getLocaleAndName(locale) + "\t" + message); 817 else 818 logln(getLocaleAndName(locale) + "\tpossibly " + message); 819 } else { 820 new File(CLDRPaths.GEN_DIRECTORY + "missing/", filename).delete(); 821 } 822 } 823 } 824 825 /** 826 * Internal 827 */ getDateKey(String monthOrDay, String width, String code)828 private String getDateKey(String monthOrDay, String width, String code) { 829 // String context = width.equals("narrow") ? "format" : "stand-alone"; 830 return "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/" 831 + monthOrDay + "s/" + monthOrDay + "Context[@type=\"format\"]/" 832 + monthOrDay + "Width[@type=\"" + width + "\"]/" + monthOrDay 833 + "[@type=\"" + code + "\"]"; 834 } 835 836 /** 837 * Internal 838 */ getDateKey(int type, String code)839 private String getDateKey(int type, String code) { 840 // type is 6..4 for months abbrev..narrow, 3..0 for days short..narrow 841 int monthOrDayType = 0, widthType = type; 842 if (type >= 4) { 843 monthOrDayType = 1; 844 widthType -= 4; 845 } 846 return getDateKey(MONTHORDAYS[monthOrDayType], WIDTHS[widthType], code); 847 } 848 849 /** 850 * @param item 851 * @param codes 852 * @param missing 853 * @param exemplarTest 854 * TODO 855 * TODO 856 */ checkForItems(CLDRFile item, Set<String> codes, int type, CLDRFile missing, int failureCount[], UnicodeSet exemplarTest)857 private void checkForItems(CLDRFile item, Set<String> codes, int type, CLDRFile missing, int failureCount[], 858 UnicodeSet exemplarTest) { 859 // check codes 860 for (Iterator<String> it2 = codes.iterator(); it2.hasNext();) { 861 String code = it2.next(); 862 String key; 863 if (type >= 0) { 864 key = CLDRFile.getKey(type, code); 865 } else { 866 key = getDateKey(-type - 1, code); 867 } 868 String v = item.getStringValue(key); 869 String rootValue = resolvedRoot.getStringValue(key); 870 if (v == null || v.equals(rootValue) && (exemplarTest == null || !exemplarTest.containsAll(rootValue))) { 871 String englishValue = resolvedEnglish.getStringValue(key); 872 String transValue; 873 if (englishValue != null) { 874 transValue = englishValue; 875 } else { 876 transValue = code; 877 } 878 missing.add(key, "TODO " + transValue); 879 failureCount[0]++; 880 } else { 881 logln("\t" + code + "\t" + v); 882 } 883 } 884 } 885 886 /* 887 * void showTestStr() { 888 * LocaleIDParser lparser = new LocaleIDParser(); 889 * Collection s = split(teststr,',', true, new ArrayList()); 890 * for (Iterator it = s.iterator(); it.hasNext();) { 891 * String item = (String)it.next(); 892 * lparser.set(item.replace('?', '_')); 893 * String region = lparser.getRegion(); 894 * System.out.print(item.replace('?', '-') + " (" + getLocalization(region) + "), "); 895 * //System.out.print(getLocalization(region) + ", "); 896 * } 897 * } 898 * static String teststr = 899 * "en?AG, en?AI, en?AS, en?AU, en?IN, en?BB, en?BE, en?BM, en?BN, en?BS, en?BW, en?BZ, en?CA, en?CK, en?CM, en?DM, en?ER, en?ET, en?FJ, en?FK, en?FM, en?GB, en?GD, en?GH, en?GI, en?GM, en?GU, en?GY, en?HK, en?IE, en?IL, en?IO, en?JM, en?KE, en?KI, en?KN, en?KY, en?LC, en?LR, en?LS, en?MH, en?MP, en?MS, en?MT, en?MU, en?MW, en?NA, en?NF, en?NG, en?NR, en?NU, en?NZ, en?PG, en?PH, en?PK, en?PN, en?PR, en?PW, en?RW, en?SB, en?SC, en?SG, en?SH, en?SL, en?SO, en?SZ, en?TC, en?TK, en?TO, en?TT, en?UG, en?UM, en?US, en?VC, en?VG, en?VI, en?VU, en?WS, en?ZA, en?ZM, en?ZW" 900 * ; 901 */ 902 903 CldrUtility.CollectionTransform EnglishName = new CldrUtility.CollectionTransform() { 904 public Object transform(Object source) { 905 // TODO Auto-generated method stub 906 return getLocalization(source.toString()) + " (" + source + ")"; 907 } 908 }; 909 910 CldrUtility.CollectionTransform EnglishCurrencyName = new CldrUtility.CollectionTransform() { 911 public Object transform(Object source) { 912 if (english == null) english = cldrFactory.make("en", true); 913 return english.getName("currency", source.toString()) + " (" + source + ")"; 914 } 915 }; 916 917 /** 918 * Tests that the supplemental data is well-formed. 919 * 920 */ TestSupplementalData()921 public void TestSupplementalData() { 922 Map<String, Set<String>> language_scripts = new TreeMap<String, Set<String>>(); 923 Map<String, Set<String>> language_territories = new TreeMap<String, Set<String>>(); 924 Map<String, Set<String>> groups = new TreeMap<String, Set<String>>(); 925 Map<String, Set<String>> territory_currencies = new TreeMap<String, Set<String>>(); 926 Map<String, Map<String, String>> aliases = new TreeMap<String, Map<String, String>>(); 927 getSupplementalData(language_scripts, language_territories, groups, territory_currencies, aliases); 928 Set<String> sTerritories = new TreeSet<String>(); 929 for (Iterator<Set<String>> it = language_territories.values().iterator(); it.hasNext();) { 930 sTerritories.addAll(it.next()); 931 } 932 StandardCodes sc = StandardCodes.make(); 933 Set<String> fullTerritories = sc.getAvailableCodes("territory"); 934 Set<String> fullLanguages = sc.getAvailableCodes("language"); 935 936 Set<String> allLanguages = new TreeSet<String>(language_scripts.keySet()); 937 allLanguages.addAll(language_territories.keySet()); 938 for (Iterator<String> it = allLanguages.iterator(); it.hasNext();) { 939 Object language = it.next(); 940 Set<String> scripts = language_scripts.get(language); 941 Set<String> territories = language_territories.get(language); 942 logln(EnglishName.transform(language) 943 + " scripts: " + EnglishName.transform(scripts) 944 + " territories: " + EnglishName.transform(territories)); 945 } 946 947 Map<String, String> changedLanguage = new TreeMap<String, String>(); 948 for (Iterator<String> it = fullLanguages.iterator(); it.hasNext();) { 949 String code = it.next(); 950 List<String> data = sc.getFullData("language", code); 951 if (data.size() < 3) { 952 System.out.println("data problem: " + data); 953 continue; 954 } 955 String replacement = data.get(2); 956 if (!replacement.equals("")) { 957 if (!replacement.equals("--")) changedLanguage.put(code, replacement); 958 continue; 959 } 960 } 961 962 // remove private use, deprecated, groups 963 Set<String> standardTerritories = new TreeSet<String>(); 964 Map<String, String> changedTerritory = new TreeMap<String, String>(); 965 for (Iterator<String> it = fullTerritories.iterator(); it.hasNext();) { 966 String code = it.next(); 967 if (code.equals("200")) continue; // || code.equals("YU") || code.equals("PZ") 968 List<String> data = sc.getFullData("territory", code); 969 if (data.get(0).equals("PRIVATE USE")) continue; 970 if (!data.get(2).equals("")) { 971 if (!data.get(2).equals("--")) changedTerritory.put(code, data.get(2)); 972 continue; 973 } 974 standardTerritories.add(code); 975 } 976 standardTerritories.removeAll(groups.keySet()); 977 978 if (!standardTerritories.containsAll(sTerritories)) { 979 TreeSet<String> extras = new TreeSet<String>(sTerritories); 980 extras.removeAll(standardTerritories); 981 errln("Supplemental Language Territories contain illegal values: " + EnglishName.transform(extras)); 982 } 983 if (!sTerritories.containsAll(standardTerritories)) { 984 TreeSet<String> extras = new TreeSet<String>(standardTerritories); 985 extras.removeAll(sTerritories); 986 warnln("Missing Language Territories: " + EnglishName.transform(extras)); 987 } 988 989 // now test currencies 990 logln("Check that no illegal territories are used"); 991 if (!standardTerritories.containsAll(territory_currencies.keySet())) { 992 TreeSet<String> extras = new TreeSet<String>(territory_currencies.keySet()); 993 extras.removeAll(fullTerritories); 994 if (extras.size() != 0) errln("Currency info -- Illegal Territories: " + EnglishName.transform(extras)); 995 extras = new TreeSet<String>(territory_currencies.keySet()); 996 extras.retainAll(fullTerritories); 997 extras.removeAll(standardTerritories); 998 if (extras.size() != 0) warnln("Currency info -- Archaic Territories: " + EnglishName.transform(extras)); 999 } 1000 logln("Check that no territories are missing"); 1001 if (!territory_currencies.keySet().containsAll(standardTerritories)) { 1002 TreeSet<String> extras = new TreeSet<String>(standardTerritories); 1003 extras.removeAll(territory_currencies.keySet()); 1004 errln("Currency info -- Missing Territories: " + EnglishName.transform(extras)); 1005 } 1006 Set<String> currencies = new TreeSet<String>(); 1007 for (Iterator<Set<String>> it = territory_currencies.values().iterator(); it.hasNext();) { 1008 currencies.addAll(it.next()); 1009 } 1010 logln("Check that no illegal currencies are used"); 1011 Set<String> legalCurrencies = new TreeSet<String>(sc.getAvailableCodes("currency")); 1012 // first remove non-ISO 1013 for (Iterator<String> it = legalCurrencies.iterator(); it.hasNext();) { 1014 String code = it.next(); 1015 List<String> data = sc.getFullData("currency", code); 1016 if ("X".equals(data.get(3))) it.remove(); 1017 } 1018 if (!legalCurrencies.containsAll(currencies)) { 1019 TreeSet<String> extras = new TreeSet<String>(currencies); 1020 extras.removeAll(legalCurrencies); 1021 errln("Currency info -- Illegal Currencies: " + EnglishCurrencyName.transform(extras)); 1022 } 1023 logln("Check that there are no missing currencies"); 1024 if (!currencies.containsAll(legalCurrencies)) { 1025 TreeSet<String> extras = new TreeSet<String>(legalCurrencies); 1026 extras.removeAll(currencies); 1027 Map<String, Set<String>> failures = new TreeMap<String, Set<String>>(); 1028 for (Iterator<String> it = extras.iterator(); it.hasNext();) { 1029 String code = it.next(); 1030 List<String> data = sc.getFullData("currency", code); 1031 if (data.get(1).equals("ZZ")) continue; 1032 String type = data.get(3) + "/" + data.get(1); 1033 Set<String> s = failures.get(type); 1034 if (s == null) failures.put(type, s = new TreeSet<String>()); 1035 s.add(code); 1036 } 1037 for (Iterator<String> it = failures.keySet().iterator(); it.hasNext();) { 1038 String type = it.next(); 1039 Set<String> s = failures.get(type); 1040 warnln("Currency info -- Missing Currencies: " + type + "\t \u2192 " + EnglishCurrencyName.transform(s)); 1041 } 1042 } 1043 logln("Missing English currency names"); 1044 for (Iterator<String> it = legalCurrencies.iterator(); it.hasNext();) { 1045 String currency = it.next(); 1046 String name = english.getName("currency", currency); 1047 if (name == null) { 1048 String standardName = sc.getFullData("currency", currency).get(0); 1049 logln("\t\t\t<currency type=\"" + currency + "\">"); 1050 logln("\t\t\t\t<displayName>" + standardName + "</displayName>"); 1051 logln("\t\t\t</currency>"); 1052 } 1053 } 1054 logln("Check Aliases"); 1055 for (Iterator<String> it = aliases.keySet().iterator(); it.hasNext();) { 1056 // the first part of the mapping had better not be in the standardTerritories 1057 String key = it.next(); 1058 Map<String, String> submap = aliases.get(key); 1059 if (key.equals("territoryAlias")) { 1060 checkEqual(key, submap, changedTerritory); 1061 } else if (key.equals("languageAlias")) { 1062 for (Iterator<String> it2 = submap.keySet().iterator(); it2.hasNext();) { 1063 String k = it2.next(); 1064 String value = submap.get(k); 1065 if (value.indexOf("_") >= 0) it2.remove(); 1066 } 1067 checkEqual(key, submap, changedLanguage); 1068 } 1069 } 1070 } 1071 1072 /** 1073 * 1074 */ checkEqual(String title, Map map1, Map map2)1075 private void checkEqual(String title, Map map1, Map map2) { 1076 Set foo = new TreeSet(map1.keySet()); 1077 foo.removeAll(map2.keySet()); 1078 if (!foo.isEmpty()) errln("Extraneous Aliases: " + title + "\t" + foo); 1079 foo = new TreeSet(map2.keySet()); 1080 foo.removeAll(map1.keySet()); 1081 if (!foo.isEmpty()) errln("Missing Aliases: " + title + "\t" + foo); 1082 foo = map2.keySet(); 1083 foo.retainAll(map1.keySet()); 1084 for (Iterator it = foo.iterator(); it.hasNext();) { 1085 Object key = it.next(); 1086 Object result1 = map1.get(key); 1087 Object result2 = map2.get(key); 1088 if (!result1.equals(result2)) 1089 errln("Missing Aliases: " + title + "\t" + key + "\t" + result1 + " != " + result2); 1090 } 1091 } 1092 1093 /** 1094 * Test that the zone ids are well-formed. 1095 * 1096 */ TestZones()1097 public void TestZones() { 1098 StandardCodes sc = StandardCodes.make(); 1099 1100 Map<String, String> defaultNames = new TreeMap(); 1101 Map<String, String> old_new = sc.getZoneLinkold_new(); 1102 Set<String> core = sc.getZoneData().keySet(); 1103 logln("Checking for collisions with last field"); 1104 for (Iterator<String> it = core.iterator(); it.hasNext();) { 1105 String currentItem = it.next(); 1106 String defaultName = TimezoneFormatter.getFallbackName(currentItem); 1107 String fullName = defaultNames.get(defaultName); 1108 if (fullName == null) 1109 defaultNames.put(defaultName, currentItem); 1110 else { 1111 errln("Collision between: " + currentItem + " AND " + fullName); 1112 } 1113 } 1114 1115 logln("Checking that all links are TO canonical zones"); 1116 Set<String> s = new TreeSet<String>(old_new.values()); 1117 s.removeAll(core); 1118 if (s.size() != 0) { 1119 errln("Links go TO zones that are not canonical! " + s); 1120 } 1121 1122 logln("Checking that no links are FROM canonical zones"); 1123 s = new TreeSet<String>(core); 1124 s.retainAll(old_new.keySet()); 1125 if (s.size() != 0) { 1126 errln("Links go FROM zones that are canonical! " + s); 1127 } 1128 1129 logln("Checking that the zones with rule data are all canonical"); 1130 Set<String> zonesWithRules = sc.getZone_rules().keySet(); 1131 s.clear(); 1132 s.addAll(zonesWithRules); 1133 s.removeAll(core); 1134 if (s.size() != 0) logln("Zones with rules that are not canonical: " + s); 1135 1136 logln("Checking that the rule data are all canonical"); 1137 s.clear(); 1138 s.addAll(core); 1139 s.removeAll(zonesWithRules); 1140 s.removeAll(old_new.keySet()); 1141 if (s.size() != 0) logln("Canonical zones that don't have rules or links: " + s); 1142 1143 for (Iterator<String> it = old_new.keySet().iterator(); it.hasNext();) { 1144 String oldItem = it.next(); 1145 logln("old: " + oldItem + "\tnew: " + old_new.get(oldItem)); 1146 } 1147 Map<String, Set<String>> new_old = new TreeMap<String, Set<String>>(); 1148 for (Iterator<String> it = core.iterator(); it.hasNext();) { 1149 new_old.put(it.next(), new TreeSet<String>()); 1150 } 1151 for (Iterator<String> it = old_new.keySet().iterator(); it.hasNext();) { 1152 String oldItem = it.next(); 1153 String newItem = old_new.get(oldItem); 1154 Set<String> oldItems = new_old.get(newItem); 1155 if (oldItems == null) { // try recursing 1156 logln("!!!!Skipping " + oldItem + " \u2192 " + newItem); 1157 continue; 1158 // new_old.put(oldOne, oldItems = new TreeSet()); 1159 } 1160 oldItems.add(oldItem); 1161 } 1162 for (Iterator<String> it = new_old.keySet().iterator(); it.hasNext();) { 1163 String newOne = it.next(); 1164 Set<String> oldItems = new_old.get(newOne); 1165 logln(newOne + "\t" + oldItems); 1166 } 1167 } 1168 TestNarrowForms()1169 public void TestNarrowForms() { 1170 if (disableUntilLater("TestMinimalLocalization")) return; 1171 1172 for (Iterator<String> it = locales.iterator(); it.hasNext();) { 1173 String locale = it.next(); 1174 logln("Testing: " + getLocaleAndName(locale)); 1175 BreakIterator bi = BreakIterator.getCharacterInstance(new ULocale(locale)); 1176 CLDRFile item = cldrFactory.make(locale, false); 1177 // Walk through all the xpaths, adding to currentValues 1178 // Whenever two values for the same xpath are different, we remove from currentValues, and add to okValues 1179 for (Iterator<String> it2 = item.iterator(); it2.hasNext();) { 1180 String xpath = it2.next(); 1181 if (xpath.indexOf("[@type=\"narrow\"]") >= 0) { 1182 String value = item.getStringValue(xpath); 1183 // logln("\tTesting: " + value + "\t path: " + xpath); 1184 int end = getXGraphemeClusterBoundary(bi, value, 0); 1185 if (end == value.length()) continue; 1186 errln(getLocaleAndName(locale) + "\tillegal narrow value " + value + "\t path: " + xpath); 1187 surveyInfo.add(locale + "\t" + xpath + "\t'" + value + "' is too wide for a \"narrow\" value."); 1188 } 1189 } 1190 } 1191 } 1192 1193 static final UnicodeSet XGRAPHEME = new UnicodeSet("[[:mark:][:grapheme_extend:]]"); 1194 static final UnicodeSet DIGIT = new UnicodeSet("[:decimal_number:]"); 1195 getXGraphemeClusterBoundary(BreakIterator bi, String value, int start)1196 private int getXGraphemeClusterBoundary(BreakIterator bi, String value, int start) { 1197 if (value.length() <= 1) return 1; 1198 1199 bi.setText(value); 1200 if (start != 0) bi.preceding(start + 1); // backup one 1201 int current = bi.next(); 1202 int cp = 0; 1203 // link any digits 1204 if (DIGIT.contains(UTF16.charAt(value, current - 1))) { 1205 current = DIGIT.findIn(value, current, true); 1206 } 1207 // continue collecting any additional characters that are M or grapheme extend 1208 return XGRAPHEME.findIn(value, current, true); 1209 } 1210 } 1211 1212 /* 1213 * private static final int 1214 * HELP1 = 0, 1215 * HELP2 = 1, 1216 * SOURCEDIR = 2, 1217 * DESTDIR = 3, 1218 * MATCH = 4, 1219 * SKIP = 5, 1220 * TZADIR = 6, 1221 * NONVALIDATING = 7, 1222 * SHOW_DTD = 8, 1223 * TRANSLIT = 9; 1224 * options[SOURCEDIR].value 1225 * 1226 * private static final UOption[] options = { 1227 * UOption.HELP_H(), 1228 * UOption.HELP_QUESTION_MARK(), 1229 * UOption.SOURCEDIR().setDefault("C:\\ICU4C\\locale\\common\\main\\"), 1230 * UOption.DESTDIR().setDefault("C:\\DATA\\GEN\\cldr\\mainCheck\\"), 1231 * UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"), 1232 * UOption.create("skip", 'z', UOption.REQUIRES_ARG).setDefault("zh_(C|S|HK|M).*"), 1233 * UOption.create("tzadir", 't', 1234 * UOption.REQUIRES_ARG).setDefault("C:\\ICU4J\\icu4j\\src\\com\\ibm\\icu\\dev\\tool\\cldr\\"), 1235 * UOption.create("nonvalidating", 'n', UOption.NO_ARG), 1236 * UOption.create("dtd", 'w', UOption.NO_ARG), 1237 * UOption.create("transliterate", 'y', UOption.NO_ARG), }; 1238 * 1239 * private static String timeZoneAliasDir = null; 1240 * / 1241 * 1242 * public static void main(String[] args) throws SAXException, IOException { 1243 * UOption.parseArgs(args, options); 1244 * localeList = getMatchingXMLFiles(options[SOURCEDIR].value, options[MATCH].value); 1245 * /* 1246 * log = FileUtilities.openUTF8Writer(options[DESTDIR].value, "log.txt"); 1247 * try { 1248 * for (Iterator it = getMatchingXMLFiles(options[SOURCEDIR].value, options[MATCH].value).iterator(); it.hasNext();) { 1249 * String name = (String) it.next(); 1250 * for (int i = 0; i <= 1; ++i) { 1251 * boolean resolved = i == 1; 1252 * CLDRKey key = make(name, resolved); 1253 * 1254 * PrintWriter pw = FileUtilities.openUTF8Writer(options[DESTDIR].value, name + (resolved ? "_r" : "") + ".txt"); 1255 * write(pw, key); 1256 * pw.close(); 1257 * 1258 * } 1259 * } 1260 * } finally { 1261 * log.close(); 1262 * System.out.println("Done"); 1263 * } 1264 * 1265 * 1266 * <language type="in">Indonesian</language> 1267 * <language type="iw">Hebrew</language> 1268 * <script type="Bali">Balinese</script> 1269 * <script type="Batk">Batak</script> 1270 * <script type="Blis">Blissymbols</script> 1271 * <script type="Brah">Brahmi</script> 1272 * <script type="Bugi">Buginese</script> 1273 * <script type="Cham">Cham</script> 1274 * <script type="Cirt">Cirth</script> 1275 * <script type="Cyrs">Cyrillic (Old Church Slavonic variant)</script> 1276 * <script type="Egyd">Egyptian demotic</script> 1277 * <script type="Egyh">Egyptian hieratic</script> 1278 * <script type="Egyp">Egyptian hieroglyphs</script> 1279 * <script type="Glag">Glagolitic</script> 1280 * <script type="Hmng">Pahawh Hmong</script> 1281 * <script type="Hung">Old Hungarian</script> 1282 * <script type="Inds">Indus (Harappan)</script> 1283 * <script type="Java">Javanese</script> 1284 * <script type="Kali">Kayah Li</script> 1285 * <script type="Khar">Kharoshthi</script> 1286 * <script type="Latf">Latin (Fraktur variant)</script> 1287 * <script type="Latg">Latin (Gaelic variant)</script> 1288 * <script type="Lepc">Lepcha (Rong)</script> 1289 * <script type="Lina">Linear A</script> 1290 * <script type="Mand">Mandaean</script> 1291 * <script type="Maya">Mayan hieroglyphs</script> 1292 * <script type="Mero">Meroitic</script> 1293 * <script type="Orkh">Orkhon</script> 1294 * <script type="Perm">Old Permic</script> 1295 * <script type="Phag">Phags-pa</script> 1296 * <script type="Phnx">Phoenician</script> 1297 * <script type="Plrd">Pollard Phonetic</script> 1298 * <script type="Roro">Rongorongo</script> 1299 * <script type="Sara">Sarati</script> 1300 * <script type="Sylo">Syloti Nagri</script> 1301 * <script type="Syre">Syriac (Estrangelo variant)</script> 1302 * <script type="Syrj">Syriac (Western variant)</script> 1303 * <script type="Syrn">Syriac (Eastern variant)</script> 1304 * <script type="Talu">Tai Lue</script> 1305 * <script type="Teng">Tengwar</script> 1306 * <script type="Tfng">Tifinagh (Berber)</script> 1307 * <script type="Thai">Thai</script> 1308 * <script type="Vaii">Vai</script> 1309 * <script type="Visp">Visible Speech</script> 1310 * <script type="Xpeo">Old Persian</script> 1311 * <script type="Xsux">Cuneiform, Sumero-Akkadian</script> 1312 * <script type="Zxxx">Code for unwritten languages</script> 1313 * <script type="Zzzz">Code for uncoded script</script> 1314 * <territory type="001">World</territory> 1315 * <territory type="002">Africa</territory> 1316 * <territory type="003">North America</territory> 1317 * <territory type="005">South America</territory> 1318 * <territory type="009">Oceania</territory> 1319 * <territory type="011">Western Africa</territory> 1320 * <territory type="013">Central America</territory> 1321 * <territory type="014">Eastern Africa</territory> 1322 * <territory type="015">Northern Africa</territory> 1323 * <territory type="017">Middle Africa</territory> 1324 * <territory type="018">Southern Africa</territory> 1325 * <territory type="019">Americas</territory> 1326 * <territory type="021">Northern America</territory> 1327 * <territory type="029">Caribbean</territory> 1328 * <territory type="030">Eastern Asia</territory> 1329 * <territory type="035">South-eastern Asia</territory> 1330 * <territory type="039">Southern Europe</territory> 1331 * <territory type="053">Australia and New Zealand</territory> 1332 * <territory type="054">Melanesia</territory> 1333 * <territory type="057">Micronesia</territory> 1334 * <territory type="061">Polynesia</territory> 1335 * <territory type="062">South-central Asia</territory> 1336 * <territory type="AX">Aland Islands</territory> 1337 * <territory type="BQ">British Antarctic Territory</territory> 1338 * <territory type="BU">Myanmar</territory> 1339 * <territory type="CS">Czechoslovakia</territory> 1340 * <territory type="CT">Canton and Enderbury Islands</territory> 1341 * <territory type="DD">East Germany</territory> 1342 * <territory type="DY">Benin</territory> 1343 * <territory type="FQ">French Southern and Antarctic Territories</territory> 1344 * <territory type="FX">Metropolitan France</territory> 1345 * <territory type="HV">Burkina Faso</territory> 1346 * <territory type="JT">Johnston Island</territory> 1347 * <territory type="MI">Midway Islands</territory> 1348 * <territory type="NH">Vanuatu</territory> 1349 * <territory type="NQ">Dronning Maud Land</territory> 1350 * <territory type="NT">Neutral Zone</territory> 1351 * <territory type="PC">Pacific Islands Trust Territory</territory> 1352 * <territory type="PU">U.S. Miscellaneous Pacific Islands</territory> 1353 * <territory type="PZ">Panama Canal Zone</territory> 1354 * <territory type="RH">Zimbabwe</territory> 1355 * <territory type="SU">Union of Soviet Socialist Republics</territory> 1356 * <territory type="TP">Timor-Leste</territory> 1357 * <territory type="VD">North Vietnam</territory> 1358 * <territory type="WK">Wake Island</territory> 1359 * <territory type="YD">People's Democratic Republic of Yemen</territory> 1360 * <territory type="ZR">Congo, The Democratic Republic of the</territory> 1361 * <variant type="1901">Traditional German orthography</variant> 1362 * <variant type="1996">German orthography of 1996</variant> 1363 * <variant type="boont">Boontling</variant> 1364 * <variant type="gaulish">Gaulish</variant> 1365 * <variant type="guoyu">Mandarin or Standard Chinese</variant> 1366 * <variant type="hakka">Hakka</variant> 1367 * <variant type="lojban">Lojban</variant> 1368 * <variant type="nedis">Natisone dialect</variant> 1369 * <variant type="rozaj">Resian</variant> 1370 * <variant type="scouse">Scouse</variant> 1371 * <variant type="xiang">Xiang or Hunanese</variant> 1372 * 1373 * 1374 * <currency type="CFP"><displayName>???</displayName></currency> 1375 * <currency type="DDR"><displayName>???</displayName></currency> 1376 * <currency type="EQE"><displayName>???</displayName></currency> 1377 * <currency type="ESA"><displayName>???</displayName></currency> 1378 * <currency type="ESB"><displayName>???</displayName></currency> 1379 * <currency type="JAN"><displayName>???</displayName></currency> 1380 * <currency type="LSM"><displayName>???</displayName></currency> 1381 * <currency type="LUC"><displayName>???</displayName></currency> 1382 * <currency type="LUL"><displayName>???</displayName></currency> 1383 * <currency type="NAM"><displayName>???</displayName></currency> 1384 * <currency type="NEW"><displayName>???</displayName></currency> 1385 * <currency type="RHD"><displayName>???</displayName></currency> 1386 * <currency type="SAN"><displayName>???</displayName></currency> 1387 * <currency type="SDR"><displayName>???</displayName></currency> 1388 * <currency type="SEE"><displayName>???</displayName></currency> 1389 * <currency type="SRI"><displayName>???</displayName></currency> 1390 * <currency type="UAE"><displayName>???</displayName></currency> 1391 * <currency type="UDI"><displayName>???</displayName></currency> 1392 * <currency type="UIC"><displayName>???</displayName></currency> 1393 * <currency type="XAG"><displayName>???</displayName></currency> 1394 * <currency type="XPD"><displayName>???</displayName></currency> 1395 * <currency type="XPT"><displayName>???</displayName></currency> 1396 * <currency type="XRE"><displayName>???</displayName></currency> 1397 * <currency type="XTS"><displayName>???</displayName></currency> 1398 * <currency type="XXX"><displayName>???</displayName></currency> 1399 */ 1400