1 /* 2 ****************************************************************************** 3 * Copyright (C) 2004, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ****************************************************************************** 6 */ 7 package org.unicode.cldr.test; 8 9 import static org.unicode.cldr.util.PathUtilities.getNormalizedPath; 10 11 import java.io.File; 12 import java.io.IOException; 13 import java.io.PrintWriter; 14 import java.util.Arrays; 15 import java.util.Calendar; 16 import java.util.Collection; 17 import java.util.Date; 18 import java.util.HashMap; 19 import java.util.HashSet; 20 import java.util.Iterator; 21 import java.util.LinkedHashSet; 22 import java.util.List; 23 import java.util.Map; 24 import java.util.Set; 25 import java.util.TreeMap; 26 import java.util.TreeSet; 27 28 import org.unicode.cldr.draft.FileUtilities; 29 import org.unicode.cldr.test.DisplayAndInputProcessor.NumericType; 30 import org.unicode.cldr.util.CLDRFile; 31 import org.unicode.cldr.util.CLDRPaths; 32 import org.unicode.cldr.util.CldrUtility; 33 import org.unicode.cldr.util.Factory; 34 import org.unicode.cldr.util.LanguageTagParser; 35 import org.unicode.cldr.util.PathUtilities; 36 import org.unicode.cldr.util.SimpleFactory; 37 import org.unicode.cldr.util.StandardCodes; 38 import org.unicode.cldr.util.TimezoneFormatter; 39 import org.unicode.cldr.util.XPathParts; 40 import org.xml.sax.SAXException; 41 42 import com.ibm.icu.dev.test.TestFmwk; 43 import com.ibm.icu.text.BreakIterator; 44 import com.ibm.icu.text.DecimalFormat; 45 import com.ibm.icu.text.NumberFormat; 46 import com.ibm.icu.text.UTF16; 47 import com.ibm.icu.text.UnicodeSet; 48 import com.ibm.icu.util.ULocale; 49 50 /** 51 * Initial version of CLDR tests. Each test is named TextXXX. To run all the tests, use the options 52 * <blockquote>-nothrow</blockquote> 53 * To run a particular set of tests, include their names, like 54 * <blockquote>-nothrow TestForIllegalAttributeValues TestMinimalLocalization</blockquote> 55 * To show more information (logln), add -verbose 56 * <p> 57 * There are some environment variables that can be used with the test. <br> 58 * -DSHOW_FILES=<anything> shows all create/open of files. <br> 59 * -DXML_MATCH=<regular expression> skips all locales that don't match the regular expression <br> 60 * -DXML_MAIN_DIR=<filesystem directory> resets to a different main directory (eg not cldr/common/main. For example, 61 * some of the tools generate into a locale directory like -DXML_MAIN_DIR=C:\Unicode-CVS2\cldr\common\gen\main\ so this 62 * can be used to check that directory. <br> 63 * -DSKIP_DRAFT=<boolean> skips draft locales if <boolean> is a string starting with T or t 64 */ 65 public class CLDRTest extends TestFmwk { 66 /** 67 * privates 68 */ 69 private static String MATCH; 70 private static String MAIN_DIR; 71 private static boolean SKIP_DRAFT; 72 private Set<String> locales; 73 private Set<String> languageLocales; 74 private Factory cldrFactory; 75 private CLDRFile resolvedRoot; 76 private CLDRFile resolvedEnglish; 77 private final UnicodeSet commonAndInherited = new UnicodeSet( 78 "[[:script=common:][:script=inherited:][:alphabetic=false:]]"); 79 private static final String[] WIDTHS = { "narrow", "wide", "abbreviated", "short" }; 80 private static final String[] MONTHORDAYS = { "day", "month" }; 81 private Map<String, String> localeNameCache = new HashMap<>(); 82 private CLDRFile english = null; 83 84 private Set<String> surveyInfo = new TreeSet<>(); 85 86 /** 87 * TestFmwk boilerplate 88 */ main(String[] args)89 public static void main(String[] args) throws Exception { 90 MATCH = System.getProperty("XML_MATCH"); 91 if (MATCH == null) 92 MATCH = ".*"; 93 else 94 System.out.println("Resetting MATCH:" + MATCH); 95 MAIN_DIR = System.getProperty("XML_MAIN_DIR"); 96 if (MAIN_DIR == null) 97 MAIN_DIR = CLDRPaths.MAIN_DIRECTORY; 98 else 99 System.out.println("Resetting MAIN_DIR:" + MAIN_DIR); 100 SKIP_DRAFT = System.getProperty("XML_SKIP_DRAFT") != null; 101 if (SKIP_DRAFT) System.out.println("Skipping Draft locales"); 102 103 double deltaTime = System.currentTimeMillis(); 104 new CLDRTest().run(args); 105 deltaTime = System.currentTimeMillis() - deltaTime; 106 System.out.println("Seconds: " + deltaTime / 1000); 107 108 } 109 TestZZZZHack()110 public void TestZZZZHack() throws IOException { 111 // hack to get file written at the end of run. 112 PrintWriter surveyFile = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "surveyInfo.txt"); 113 for (String s : surveyInfo) { 114 surveyFile.println(s); 115 } 116 surveyFile.close(); 117 } 118 119 /** 120 * TestFmwk boilerplate 121 */ CLDRTest()122 public CLDRTest() throws SAXException, IOException { 123 // TODO parameterize the directory and filter 124 cldrFactory = Factory.make(MAIN_DIR, MATCH); 125 // CLDRKey.main(new String[]{"-mde.*"}); 126 locales = cldrFactory.getAvailable(); 127 languageLocales = cldrFactory.getAvailableLanguages(); 128 resolvedRoot = cldrFactory.make("root", true); 129 /* 130 * PrintWriter out = FileUtilities.openUTF8Writer(Utility.GEN_DIRECTORY + "resolved/", "root.xml"); 131 * CLDRFile temp = (CLDRFile) resolvedRoot.clone(); 132 * temp.write(out); 133 * out.close(); 134 */ 135 resolvedEnglish = cldrFactory.make("en", true); 136 } 137 138 /** 139 * Check to make sure that the currency formats are kosher. 140 */ TestCurrencyFormats()141 public void TestCurrencyFormats() { 142 // String decimal = "//ldml/numbers/decimalFormats/decimalFormatLength/decimalFormat[@type=\"standard\"]/"; 143 // String currency = "//ldml/numbers/currencyFormats/currencyFormatLength/currencyFormat[@type=\"standard\"]/"; 144 for (String locale : locales) { 145 boolean isPOSIX = locale.indexOf("POSIX") >= 0; 146 logln("Testing: " + locale); 147 CLDRFile item = cldrFactory.make(locale, false); 148 for (String xpath : item) { 149 NumericType type = NumericType.getNumericType(xpath); 150 if (type == NumericType.NOT_NUMERIC) continue; 151 String value = item.getStringValue(xpath); 152 // at this point, we only have currency formats 153 String pattern = DisplayAndInputProcessor.getCanonicalPattern(value, type, isPOSIX); 154 if (!pattern.equals(value)) { 155 String draft = ""; 156 if (item.getFullXPath(xpath).indexOf("[@draft=\"unconfirmed\"]") >= 0) draft = " [draft]"; 157 assertEquals(getLocaleAndName(locale) + draft + " " + type + " pattern incorrect", pattern, value); 158 } 159 } 160 } 161 } 162 163 /** 164 * Internal class 165 */ 166 private static class ValueCount { 167 int count = 1; 168 String value; 169 String fullxpath; 170 } 171 172 /** 173 * Verify that if all the children of a language locale do not have the same value for the same key. 174 */ TestCommonChildren()175 public void TestCommonChildren() { 176 if (disableUntilLater("TestCommonChildren")) return; 177 178 Map<String, ValueCount> currentValues = new TreeMap<>(); 179 Set<String> okValues = new TreeSet<>(); 180 181 for (String parent : languageLocales) { 182 logln("Testing: " + parent); 183 currentValues.clear(); 184 okValues.clear(); 185 Set<String> availableWithParent = cldrFactory.getAvailableWithParent(parent, true); 186 for (String locale : availableWithParent) { 187 logln("\tTesting: " + locale); 188 CLDRFile item = cldrFactory.make(locale, false); 189 // Walk through all the xpaths, adding to currentValues 190 // Whenever two values for the same xpath are different, we remove from currentValues, and add to 191 // okValues 192 for (String xpath : item) { 193 if (okValues.contains(xpath)) continue; 194 if (xpath.startsWith("//ldml/identity/")) continue; // skip identity elements 195 String v = item.getStringValue(xpath); 196 ValueCount last = currentValues.get(xpath); 197 if (last == null) { 198 ValueCount vc = new ValueCount(); 199 vc.value = v; 200 vc.fullxpath = item.getFullXPath(xpath); 201 currentValues.put(xpath, vc); 202 } else if (v.equals(last.value)) { 203 last.count++; 204 } else { 205 okValues.add(xpath); 206 currentValues.remove(xpath); 207 } 208 } 209 // at the end, only the keys left in currentValues are (possibly) faulty 210 // they are actually bad IFF either 211 // (a) the count is equal to the total (thus all children are the same), or 212 // (b) their value is the same as the parent's resolved value (thus all children are the same or the 213 // same 214 // as the inherited parent value). 215 } 216 if (currentValues.size() == 0) continue; 217 int size = availableWithParent.size(); 218 CLDRFile parentCLDR = cldrFactory.make(parent, true); 219 for (String xpath : currentValues.keySet()) { 220 ValueCount vc = currentValues.get(xpath); 221 if (vc.count == size || (vc.value.equals(parentCLDR.getStringValue(xpath)) 222 && vc.fullxpath.equals(parentCLDR.getStringValue(xpath)))) { 223 String draft = ""; 224 if (vc.fullxpath.indexOf("[@draft=\"unconfirmed\"]") >= 0) draft = " [draft]"; 225 String count = (vc.count == size ? "" : vc.count + "/") + size; 226 warnln(getLocaleAndName(parent) + draft + 227 "\tall children (" + count + ") have same value for:\t" 228 + xpath + ";\t" + vc.value); 229 } 230 } 231 } 232 } 233 234 static String[] EXEMPLAR_SKIPS = { "/hourFormat", "/exemplarCharacters", "/pattern", "/localizedPatternChars" }; 235 236 /** 237 * Check that the exemplars include all characters in the data. 238 */ TestThatExemplarsContainAll()239 public void TestThatExemplarsContainAll() { 240 UnicodeSet allExemplars = new UnicodeSet(); 241 if (disableUntilLater("TestThatExemplarsContainAll")) return; 242 Set<String> counts = new TreeSet<>(); 243 int totalCount = 0; 244 UnicodeSet localeMissing = new UnicodeSet(); 245 for (String locale : locales) { 246 if (locale.equals("root")) continue; 247 CLDRFile resolved = cldrFactory.make(locale, false); // FIX LATER 248 UnicodeSet exemplars = getFixedExemplarSet(locale, resolved); 249 CLDRFile plain = cldrFactory.make(locale, false); 250 int count = 0; 251 localeMissing.clear(); 252 file: for (String xpath : plain) { 253 for (int i = 0; i < EXEMPLAR_SKIPS.length; ++i) { 254 if (xpath.indexOf(EXEMPLAR_SKIPS[i]) > 0) continue file; // skip some items. 255 } 256 if (SKIP_DRAFT) { 257 String fullxpath = plain.getFullXPath(xpath); 258 if (fullxpath.indexOf("[@draft=\"unconfirmed\"") > 0) continue; 259 } 260 if (xpath.startsWith("//ldml/posix/messages")) continue; 261 String value = plain.getStringValue(xpath); 262 allExemplars.addAll(value); 263 if (!exemplars.containsAll(value)) { 264 count++; 265 UnicodeSet missing = new UnicodeSet().addAll(value).removeAll(exemplars); 266 localeMissing.addAll(missing); 267 logln(getLocaleAndName(locale) + "\t" + xpath + "\t<" + value + "> contains " + missing 268 + ", not in exemplars"); 269 surveyInfo.add(locale + "\t" + xpath + "\t'" + value + "' contains characters " 270 + missing.toPattern(false) + ", which are not in exemplars"); 271 } 272 } 273 NumberFormat nf = new DecimalFormat("000"); 274 if (count != 0) { 275 totalCount += count; 276 counts.add(nf.format(count) + "\t" + getLocaleAndName(locale) + "\t" + localeMissing); 277 } 278 if (localeMissing.size() != 0) { 279 errln(getLocaleAndName(locale) + "\t uses " + localeMissing + ", not in exemplars"); 280 } 281 } 282 for (String c : counts) { 283 logln(c); 284 } 285 logln("Total Count: " + totalCount); 286 System.out.println("All exemplars: " + allExemplars.toPattern(true)); 287 } 288 289 // Get Date-Time in milliseconds getDateTimeinMillis(int year, int month, int date)290 private static long getDateTimeinMillis(int year, int month, int date) { 291 Calendar cal = Calendar.getInstance(); 292 cal.set(year, month, date); 293 return cal.getTimeInMillis(); 294 } 295 296 static final long disableDate = getDateTimeinMillis(2005, 6 - 1, 3); 297 298 /** 299 * 300 */ disableUntilLater(String string)301 private boolean disableUntilLater(String string) { 302 if (new Date().getTime() >= disableDate) return false; 303 warnln("Disabling " + string + " until " + new Date(disableDate)); 304 return true; 305 } 306 307 /** 308 * Internal 309 */ getFixedExemplarSet(String locale, CLDRFile cldrfile)310 private UnicodeSet getFixedExemplarSet(String locale, CLDRFile cldrfile) { 311 UnicodeSet exemplars = getExemplarSet(cldrfile, ""); 312 if (exemplars.size() == 0) { 313 errln(getLocaleAndName(locale) + " has empty exemplar set"); 314 } 315 exemplars.addAll(getExemplarSet(cldrfile, "standard")); 316 UnicodeSet auxiliary = getExemplarSet(cldrfile, "auxiliary"); 317 if (exemplars.containsSome(auxiliary)) { 318 errln(getLocaleAndName(locale) + "Auxiliary & main exemplars should be disjoint, but overlap with " + 319 new UnicodeSet(exemplars).retainAll(auxiliary) + 320 ": change auxiliary to " + auxiliary.removeAll(exemplars)); 321 } 322 exemplars.addAll(auxiliary); 323 exemplars.addAll(commonAndInherited); 324 return exemplars; 325 } 326 327 /** 328 * @return Gets an exemplar set. Also verifies that the set contains no properties. 329 */ getExemplarSet(CLDRFile cldrfile, String type)330 public UnicodeSet getExemplarSet(CLDRFile cldrfile, String type) { 331 if (type.length() != 0) type = "[@type=\"" + type + "\"]"; 332 String v = cldrfile.getStringValue("//ldml/characters/exemplarCharacters" + type); 333 if (v == null) return new UnicodeSet(); 334 String pattern = v; 335 if (pattern.indexOf("[:") >= 0 || pattern.indexOf("\\p{") > 0) { 336 errln(getLocaleName(cldrfile.getLocaleID()) + " exemplar pattern contains property: " + pattern); 337 } 338 try { 339 UnicodeSet result = new UnicodeSet(v, UnicodeSet.CASE); 340 result.remove(0x20); 341 return result; 342 } catch (RuntimeException e) { 343 e.printStackTrace(); 344 errln(getLocaleAndName(cldrfile.getLocaleID()) + " has illegal exemplar set: <" + v + ">"); 345 return new UnicodeSet(); 346 } 347 // if (type.length() != 0) System.out.println("fetched set for " + type); 348 } 349 getLocaleAndName(String locale)350 public String getLocaleAndName(String locale) { 351 return locale + " (" + getLocaleName(locale) + ")"; 352 } 353 354 /** 355 * @return the ID plus its localization (for language, script, and territory IDs only) 356 */ getIDAndLocalization(String id)357 public String getIDAndLocalization(String id) { 358 return id + " " + getLocalization(id); 359 } 360 361 /** 362 * @return the localization (for language, script, and territory IDs only) 363 */ getLocalization(String id)364 public String getLocalization(String id) { 365 if (english == null) english = cldrFactory.make("en", true); 366 if (id.length() == 0) return "?"; 367 // pick on basis of case 368 char ch = id.charAt(0); 369 if ('a' <= ch && ch <= 'z') return getName(english, "languages/language", id); 370 if (id.length() == 4 && 'A' <= ch && ch <= 'Z') return getName(english, "scripts/script", id); 371 return getName(english, "territories/territory", id); 372 } 373 374 /** 375 * Internal 376 */ getIDAndLocalization(Set<String> missing)377 private String getIDAndLocalization(Set<String> missing) { 378 StringBuffer buffer = new StringBuffer(); 379 for (String next : missing) { 380 if (buffer.length() != 0) buffer.append("; "); 381 buffer.append(getIDAndLocalization(next)); 382 } 383 return buffer.toString(); 384 } 385 getLocaleName(String locale)386 public String getLocaleName(String locale) { 387 String name = localeNameCache.get(locale); 388 if (name != null) return name; 389 if (english == null) english = cldrFactory.make("en", true); 390 String result = english.getName(locale); 391 /* 392 * Collection c = Utility.splitList(locale, '_', false, null); 393 * String[] pieces = new String[c.size()]; 394 * c.toArray(pieces); 395 * int i = 0; 396 * String result = getName(english, "languages/language", pieces[i++]); 397 * if (pieces[i].length() == 0) return result; 398 * if (pieces[i].length() == 4) { 399 * result += " " + getName(english, "scripts/script", pieces[i++]); 400 * } 401 * if (pieces[i].length() == 0) return result; 402 * result += " " + getName(english, "territories/territory", pieces[i++]); 403 * if (pieces[i].length() == 0) return result; 404 * result += " " + getName(english, "variant/variants", pieces[i++]); 405 */ 406 localeNameCache.put(locale, result); 407 return result; 408 } 409 410 /** 411 * Internal 412 */ getName(CLDRFile english, String kind, String type)413 private String getName(CLDRFile english, String kind, String type) { 414 String v = english.getStringValue("//ldml/localeDisplayNames/" + kind + "[@type=\"" + type + "\"]"); 415 if (v == null) return "<" + type + ">"; 416 return v; 417 } 418 419 /** 420 * Make sure we are only using attribute values that are in RFC3066bis, the Olson database (with aliases removed) 421 * or ISO 4217 422 * 423 * @throws IOException 424 */ TestForIllegalAttributeValues()425 public void TestForIllegalAttributeValues() { 426 // check for illegal attribute values that are not in the DTD 427 Map<String, Set<String>> result = new TreeMap<>(); 428 Map<String, Set<String>> totalResult = new TreeMap<>(); 429 for (String locale : locales) { 430 logln("Testing: " + locale); 431 CLDRFile item = cldrFactory.make(locale, false); 432 result.clear(); 433 Set<String> xpathFailures = null; // don't collect 434 // XPathParts parts; 435 // String xpath; 436 // CLDRFile.StringValue value; 437 // String element; 438 // Map attributes; 439 checkAttributeValidity(item, result, xpathFailures); 440 441 // now show 442 //String localeName = getLocaleAndName(locale); 443 for (Iterator<String> it3 = result.keySet().iterator(); it3.hasNext();) { 444 String code = it3.next(); 445 Set<String> avalues = result.get(code); 446 errln(getLocaleAndName(locale) + "\tillegal attribute value for " + code + ", value:\t" + show(avalues)); 447 Set<String> totalvalues = totalResult.get(code); 448 if (totalvalues == null) totalResult.put(code, totalvalues = new TreeSet<>()); 449 totalvalues.addAll(avalues); 450 } 451 } 452 for (Iterator<String> it3 = totalResult.keySet().iterator(); it3.hasNext();) { 453 String code = it3.next(); 454 Set<String> avalues = totalResult.get(code); 455 errln("All illegal attribute values for " + code + ", value:\t" + show(avalues)); 456 } 457 } 458 459 /** 460 * Tests whether the display names have any collisions, e.g. if in the fully resolved 461 * locale $ is used for both USD and UAD. 462 * 463 */ TestDisplayNameCollisions()464 public void TestDisplayNameCollisions() { 465 if (disableUntilLater("TestDisplayNameCollisions")) return; 466 467 Map<String, String>[] maps = new HashMap[CLDRFile.LIMIT_TYPES]; 468 for (int i = 0; i < maps.length; ++i) { 469 maps[i] = new HashMap<>(); 470 } 471 Set<String> collisions = new TreeSet<>(); 472 for (Iterator<String> it = locales.iterator(); it.hasNext();) { 473 String locale = it.next(); 474 CLDRFile item = cldrFactory.make(locale, true); 475 for (int i = 0; i < maps.length; ++i) { 476 maps[i].clear(); 477 } 478 collisions.clear(); 479 480 for (Iterator<String> it2 = item.iterator(); it2.hasNext();) { 481 String xpath = it2.next(); 482 int nameType = CLDRFile.getNameType(xpath); 483 if (nameType < 0) continue; 484 String value = item.getStringValue(xpath); 485 String xpath2 = maps[nameType].get(value); 486 if (xpath2 == null) { 487 maps[nameType].put(value, xpath); 488 continue; 489 } 490 collisions.add(CLDRFile.getNameTypeName(nameType) + "\t" + value + "\t" + xpath + "\t" + xpath2); 491 surveyInfo.add(locale + "\t" + xpath + "\t'" + value + "' is a duplicate of what is in " + xpath2); 492 } 493 String name = getLocaleAndName(locale) + "\t"; 494 for (Iterator<String> it2 = collisions.iterator(); it2.hasNext();) { 495 errln(name + it2.next()); 496 } 497 } 498 } 499 500 /** 501 * Checks the validity of attributes, based on StandardCodes. 502 * The invalid codes are added to badCodes, and the failing xpaths are added to xpathFailures. 503 * 504 * @param item 505 * @param badCodes 506 * @param xpathFailures 507 */ checkAttributeValidity(CLDRFile item, Map<String, Set<String>> badCodes, Set<String> xpathFailures)508 public static void checkAttributeValidity(CLDRFile item, Map<String, Set<String>> badCodes, Set<String> xpathFailures) { 509 for (Iterator<String> it2 = item.iterator(); it2.hasNext();) { 510 String xpath = it2.next(); 511 XPathParts parts = XPathParts.getFrozenInstance(item.getFullXPath(xpath)); 512 for (int i = 0; i < parts.size(); ++i) { 513 if (parts.getAttributeCount(i) == 0) { 514 continue; 515 } 516 String element = parts.getElement(i); 517 Map<String, String> attributes = parts.getAttributes(i); 518 for (Iterator<String> it3 = attributes.keySet().iterator(); it3.hasNext();) { 519 String attribute = it3.next(); 520 String avalue = attributes.get(attribute); 521 checkValidity(xpath, element, attribute, avalue, badCodes, xpathFailures); 522 } 523 } 524 } 525 } 526 527 /** 528 * Internal 529 */ show(Collection<String> avalues)530 private String show(Collection<String> avalues) { 531 StringBuffer result = new StringBuffer("{"); 532 boolean first = true; 533 for (Iterator<String> it3 = avalues.iterator(); it3.hasNext();) { 534 if (first) 535 first = false; 536 else 537 result.append(", "); 538 result.append(it3.next().toString()); 539 } 540 result.append("}"); 541 return result.toString(); 542 } 543 544 /** 545 * Internal function 546 */ checkValidity(String xpath, String element, String attribute, String avalue, Map<String, Set<String>> results, Set<String> xpathsFailing)547 private static void checkValidity(String xpath, String element, String attribute, String avalue, Map<String, Set<String>> results, 548 Set<String> xpathsFailing) { 549 StandardCodes codes = StandardCodes.make(); 550 if (attribute.equals("type")) { 551 boolean checkReplacements = xpath.indexOf("/identity") < 0; 552 if (element.equals("currency")) 553 checkCodes(xpath, "currency", avalue, codes, results, xpathsFailing, checkReplacements); 554 else if (element.equals("script")) 555 checkCodes(xpath, "script", avalue, codes, results, xpathsFailing, checkReplacements); 556 else if (element.equals("territory")) 557 checkCodes(xpath, "territory", avalue, codes, results, xpathsFailing, checkReplacements); 558 else if (element.equals("language")) 559 checkCodes(xpath, "language", avalue, codes, results, xpathsFailing, checkReplacements); 560 else if (element.equals("zone")) 561 checkCodes(xpath, "tzid", avalue, codes, results, xpathsFailing, checkReplacements); 562 } 563 } 564 565 /** 566 * Internal function 567 * 568 * @param checkReplacements 569 * TODO 570 */ 571 private static void checkCodes(String xpath, String code, String avalue, StandardCodes codes, Map<String, Set<String>> results, 572 Set<String> xpathFailures, boolean checkReplacements) { 573 // ok if code is found AND it has no replacement 574 if (codes.getData(code, avalue) != null 575 && (!checkReplacements || codes.getReplacement(code, avalue) == null)) return; 576 577 if (xpathFailures != null) xpathFailures.add(xpath); 578 if (results == null) return; 579 Set<String> s = results.get(code); 580 if (s == null) { 581 s = new TreeSet<>(); 582 results.put(code, s); 583 } 584 s.add(avalue); 585 } 586 587 /** 588 * Verify that a small set of locales (currently just English) has everything translated. 589 * 590 * @throws IOException 591 */ 592 public void TestCompleteLocales() { 593 // just test English for now 594 if (english == null) english = cldrFactory.make("en", true); 595 checkTranslatedCodes(english); 596 } 597 598 /** 599 * Tests that the file contains codes for all main display name ids: language, script, territory, tzid, currency. 600 */ 601 private void checkTranslatedCodes(CLDRFile cldrfile) { 602 StandardCodes codes = StandardCodes.make(); 603 checkTranslatedCode(cldrfile, codes, "currency", "//ldml/numbers/currencies/currency", "/displayName"); 604 // can't check timezones for English. 605 // checkTranslatedCode(cldrfile, codes, "tzid", "//ldml/dates/timeZoneNames/zone", ""); 606 checkTranslatedCode(cldrfile, codes, "language", "//ldml/localeDisplayNames/languages/language", ""); 607 checkTranslatedCode(cldrfile, codes, "script", "//ldml/localeDisplayNames/scripts/script", ""); 608 checkTranslatedCode(cldrfile, codes, "territory", "//ldml/localeDisplayNames/territories/territory", ""); 609 checkTranslatedCode(cldrfile, codes, "variant", "//ldml/localeDisplayNames/variants/variant", ""); 610 } 611 612 /** 613 * @param codes 614 * @param type 615 * @param prefix 616 * @param postfix 617 * TODO 618 */ 619 private void checkTranslatedCode(CLDRFile cldrfile, StandardCodes codes, String type, String prefix, String postfix) { 620 621 // TODO, expand to other languages 622 Map<String, Set<String>> completionExceptions = new HashMap<>(); 623 Set<String> scriptExceptions = new HashSet<>(); 624 scriptExceptions.add("Cham"); 625 scriptExceptions.add("Thai"); 626 scriptExceptions.add("Toto"); 627 completionExceptions.put("script", scriptExceptions); 628 629 Set<String> codeItems = codes.getGoodAvailableCodes(type); 630 int count = 0; 631 Set<String> exceptions = completionExceptions.get(type); 632 for (String code : codeItems) { 633 String rfcname = codes.getData(type, code); 634 // if (rfcname.equals("ZZ")) continue; 635 ++count; 636 if (rfcname.equals("PRIVATE USE")) continue; 637 String fullFragment = prefix + "[@type=\"" + code + "\"]" + postfix; 638 String v = cldrfile.getStringValue(fullFragment); 639 if (v == null) { 640 errln("Missing translation for:\t<" + type + " type=\"" + code + "\">" + rfcname + "</" + type + ">"); 641 continue; 642 } 643 String translation = v; 644 if (translation.equals(code)) { 645 if (exceptions != null && exceptions.contains(code)) continue; 646 errln("Translation = code for:\t<" + type + " type=\"" + code + "\">" + rfcname + "</" + type + ">"); 647 continue; 648 } 649 } 650 logln("Total " + type + ":\t" + count); 651 } 652 653 // <territoryContainment><group type="001" contains="002 009 019 142 150"/> 654 // <languageData><language type="af" scripts="Latn" territories="ZA"/> 655 void getSupplementalData(Map<String, Set<String>> language_scripts, Map<String, Set<String>> language_territories, 656 Map<String, Set<String>> group_territory, 657 Map<String, Set<String>> territory_currencies, Map<String, Map<String, String>> aliases) { 658 659 boolean SHOW = false; 660 Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); 661 CLDRFile supp = cldrFactory.make(CLDRFile.SUPPLEMENTAL_NAME, false); 662 for (Iterator<String> it = supp.iterator(); it.hasNext();) { 663 String path = it.next(); 664 try { 665 XPathParts parts = XPathParts.getFrozenInstance(supp.getFullXPath(path)); 666 Map<String, String> m; 667 String type = ""; 668 if (aliases != null && parts.findElement("alias") >= 0) { 669 m = parts.findAttributes(type = "languageAlias"); 670 if (m == null) m = parts.findAttributes(type = "territoryAlias"); 671 if (m != null) { 672 Map top = aliases.get(type); 673 if (top == null) { 674 aliases.put(type, top = new TreeMap()); 675 } 676 top.put(m.get("type"), m.get("replacement")); 677 } 678 } 679 if (territory_currencies != null) { 680 m = parts.findAttributes("region"); 681 if (m != null) { 682 String region = m.get("iso3166"); 683 Set s = territory_currencies.get(region); 684 if (s == null) { 685 territory_currencies.put(region, s = new LinkedHashSet()); 686 } 687 m = parts.findAttributes("currency"); 688 if (m == null) { 689 warnln("missing currency for region: " + path); 690 continue; 691 } 692 String currency = m.get("iso4217"); 693 s.add(currency); 694 m = parts.findAttributes("alternate"); 695 String alternate = m == null ? null : (String) m.get("iso4217"); 696 if (alternate != null) { 697 s.add(alternate); 698 } 699 continue; 700 } 701 } 702 m = parts.findAttributes("group"); 703 if (m != null) { 704 if (group_territory == null) continue; 705 type = m.get("type"); 706 String contains = m.get("contains"); 707 group_territory.put(type, new TreeSet(CldrUtility.splitList(contains, ' ', true))); 708 continue; 709 } 710 m = parts.findAttributes("language"); 711 if (m == null) continue; 712 String language = m.get("type"); 713 String scripts = m.get("scripts"); 714 if (scripts == null) 715 language_scripts.put(language, new TreeSet<String>()); 716 else { 717 language_scripts.put(language, new TreeSet<>(CldrUtility.splitList(scripts, ' ', true))); 718 if (SHOW) 719 System.out.println(getIDAndLocalization(language) + "\t\t" 720 + getIDAndLocalization(language_scripts.get(language))); 721 } 722 String territories = m.get("territories"); 723 if (territories == null) 724 language_territories.put(language, new TreeSet<String>()); 725 else { 726 language_territories.put(language, new TreeSet<>(CldrUtility.splitList(territories, ' ', true))); 727 if (SHOW) 728 System.out.println(getIDAndLocalization(language) + "\t\t" 729 + getIDAndLocalization(language_territories.get(language))); 730 } 731 } catch (RuntimeException e) { 732 throw (IllegalArgumentException) new IllegalArgumentException("Failure with: " + path).initCause(e); 733 } 734 } 735 } 736 737 /** 738 * Verify that the minimal localizations are present. 739 */ TestMinimalLocalization()740 public void TestMinimalLocalization() throws IOException { 741 if (disableUntilLater("TestMinimalLocalization")) return; 742 743 boolean testDraft = false; 744 Map<String, Set<String>> language_scripts = new HashMap<>(); 745 Map<String, Set<String>> language_territories = new HashMap<>(); 746 getSupplementalData(language_scripts, language_territories, null, null, null); 747 LanguageTagParser localIDParser = new LanguageTagParser(); 748 // see http://oss.software.ibm.com/cvs/icu/~checkout~/locale/docs/design/minimal_requirements.htm 749 int[] failureCount = new int[1]; 750 int[] warningCount = new int[1]; 751 for (Iterator<String> it = languageLocales.iterator(); it.hasNext();) { 752 String locale = it.next(); 753 if (locale.equals("root")) continue; 754 // if (!locale.equals("zh_Hant")) continue; 755 756 CLDRFile item = cldrFactory.make(locale, true); 757 if (!testDraft && item.isDraft()) { 758 logln(getLocaleAndName(locale) + "\tskipping draft"); 759 continue; 760 } 761 UnicodeSet exemplars = getFixedExemplarSet(locale, item); 762 CLDRFile missing = SimpleFactory.makeFile(locale); 763 failureCount[0] = 0; 764 warningCount[0] = 0; 765 localIDParser.set(locale); 766 String language = localIDParser.getLanguage(); 767 logln("Testing: " + locale); 768 // languages 769 Set<String> languages = new TreeSet<>(CldrUtility.MINIMUM_LANGUAGES); 770 languages.add(language); 771 // LANGUAGE_NAME = 0, SCRIPT_NAME = 1, TERRITORY_NAME = 2, VARIANT_NAME = 3, 772 // CURRENCY_NAME = 4, CURRENCY_SYMBOL = 5, TZID = 6 773 774 checkForItems(item, languages, CLDRFile.LANGUAGE_NAME, missing, failureCount, null); 775 776 /* 777 * checkTranslatedCode(cldrfile, codes, "currency", "//ldml/numbers/currencies/currency"); 778 * checkTranslatedCode(cldrfile, codes, "tzid", "//ldml/dates/timeZoneNames/zone"); 779 * checkTranslatedCode(cldrfile, codes, "variant", "//ldml/localeDisplayNames/variants/variant"); 780 */ 781 782 Set<String> scripts = new TreeSet<>(); 783 scripts.add("Latn"); 784 Set<String> others = language_scripts.get(language); 785 if (others != null) scripts.addAll(others); 786 checkForItems(item, scripts, CLDRFile.SCRIPT_NAME, missing, failureCount, null); 787 788 Set<String> countries = new TreeSet<>(CldrUtility.MINIMUM_TERRITORIES); 789 others = language_territories.get(language); 790 if (others != null) countries.addAll(others); 791 checkForItems(item, countries, CLDRFile.TERRITORY_NAME, missing, failureCount, null); 792 793 Set<String> currencies = new TreeSet<>(); 794 StandardCodes sc = StandardCodes.make(); 795 for (Iterator<String> it2 = countries.iterator(); it2.hasNext();) { 796 String country = it2.next(); 797 Set<String> countryCurrencies = sc.getMainCurrencies(country); 798 if (countryCurrencies == null) { 799 errln("Internal Error: no currencies for " + country + ", locale: " + locale); 800 } else { 801 currencies.addAll(countryCurrencies); 802 } 803 } 804 checkForItems(item, currencies, CLDRFile.CURRENCY_NAME, missing, failureCount, null); 805 checkForItems(item, currencies, CLDRFile.CURRENCY_SYMBOL, missing, failureCount, exemplars); 806 807 // context=format and width=wide; context=stand-alone & width=abbreviated 808 Set<String> months = new TreeSet<>(); 809 for (int i = 1; i <= 12; ++i) 810 months.add(i + ""); 811 Set<String> days = new TreeSet<>(Arrays.asList(new String[] { "sun", "mon", "tue", "wed", "thu", "fri", "sat" })); 812 for (int i = -7; i < 0; ++i) { 813 checkForItems(item, (i < -4 ? months : days), i, missing, failureCount, null); 814 } 815 816 String filename = "missing_" + locale + ".xml"; 817 if (failureCount[0] > 0 || warningCount[0] > 0) { 818 PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY + "missing/", filename); 819 missing.write(out); 820 out.close(); 821 // String s = getIDAndLocalization(missing); 822 String message = "missing localizations, creating file" 823 + getNormalizedPath(CLDRPaths.GEN_DIRECTORY, "missing", filename); 824 if (failureCount[0] > 0) 825 warnln(getLocaleAndName(locale) + "\t" + message); 826 else 827 logln(getLocaleAndName(locale) + "\tpossibly " + message); 828 } else { 829 new File(CLDRPaths.GEN_DIRECTORY + "missing/", filename).delete(); 830 } 831 } 832 } 833 834 /** 835 * Internal 836 */ getDateKey(String monthOrDay, String width, String code)837 private String getDateKey(String monthOrDay, String width, String code) { 838 // String context = width.equals("narrow") ? "format" : "stand-alone"; 839 return "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/" 840 + monthOrDay + "s/" + monthOrDay + "Context[@type=\"format\"]/" 841 + monthOrDay + "Width[@type=\"" + width + "\"]/" + monthOrDay 842 + "[@type=\"" + code + "\"]"; 843 } 844 845 /** 846 * Internal 847 */ getDateKey(int type, String code)848 private String getDateKey(int type, String code) { 849 // type is 6..4 for months abbrev..narrow, 3..0 for days short..narrow 850 int monthOrDayType = 0, widthType = type; 851 if (type >= 4) { 852 monthOrDayType = 1; 853 widthType -= 4; 854 } 855 return getDateKey(MONTHORDAYS[monthOrDayType], WIDTHS[widthType], code); 856 } 857 858 /** 859 * @param item 860 * @param codes 861 * @param missing 862 * @param exemplarTest 863 * TODO 864 * TODO 865 */ checkForItems(CLDRFile item, Set<String> codes, int type, CLDRFile missing, int failureCount[], UnicodeSet exemplarTest)866 private void checkForItems(CLDRFile item, Set<String> codes, int type, CLDRFile missing, int failureCount[], 867 UnicodeSet exemplarTest) { 868 // check codes 869 for (Iterator<String> it2 = codes.iterator(); it2.hasNext();) { 870 String code = it2.next(); 871 String key; 872 if (type >= 0) { 873 key = CLDRFile.getKey(type, code); 874 } else { 875 key = getDateKey(-type - 1, code); 876 } 877 String v = item.getStringValue(key); 878 String rootValue = resolvedRoot.getStringValue(key); 879 if (v == null || v.equals(rootValue) && (exemplarTest == null || !exemplarTest.containsAll(rootValue))) { 880 String englishValue = resolvedEnglish.getStringValue(key); 881 String transValue; 882 if (englishValue != null) { 883 transValue = englishValue; 884 } else { 885 transValue = code; 886 } 887 missing.add(key, "TODO " + transValue); 888 failureCount[0]++; 889 } else { 890 logln("\t" + code + "\t" + v); 891 } 892 } 893 } 894 895 /* 896 * void showTestStr() { 897 * LocaleIDParser lparser = new LocaleIDParser(); 898 * Collection s = split(teststr,',', true, new ArrayList()); 899 * for (Iterator it = s.iterator(); it.hasNext();) { 900 * String item = (String)it.next(); 901 * lparser.set(item.replace('?', '_')); 902 * String region = lparser.getRegion(); 903 * System.out.print(item.replace('?', '-') + " (" + getLocalization(region) + "), "); 904 * //System.out.print(getLocalization(region) + ", "); 905 * } 906 * } 907 * static String teststr = 908 * "en?AG, en?AI, en?AS, en?AU, en?IN, en?BB, en?BE, en?BM, en?BN, en?BS, en?BW, en?BZ, en?CA, en?CK, en?CM, en?DM, en?ER, en?ET, en?FJ, en?FK, en?FM, en?GB, en?GD, en?GH, en?GI, en?GM, en?GU, en?GY, en?HK, en?IE, en?IL, en?IO, en?JM, en?KE, en?KI, en?KN, en?KY, en?LC, en?LR, en?LS, en?MH, en?MP, en?MS, en?MT, en?MU, en?MW, en?NA, en?NF, en?NG, en?NR, en?NU, en?NZ, en?PG, en?PH, en?PK, en?PN, en?PR, en?PW, en?RW, en?SB, en?SC, en?SG, en?SH, en?SL, en?SO, en?SZ, en?TC, en?TK, en?TO, en?TT, en?UG, en?UM, en?US, en?VC, en?VG, en?VI, en?VU, en?WS, en?ZA, en?ZM, en?ZW" 909 * ; 910 */ 911 912 CldrUtility.CollectionTransform EnglishName = new CldrUtility.CollectionTransform() { 913 @Override 914 public Object transform(Object source) { 915 // TODO Auto-generated method stub 916 return getLocalization(source.toString()) + " (" + source + ")"; 917 } 918 }; 919 920 CldrUtility.CollectionTransform EnglishCurrencyName = new CldrUtility.CollectionTransform() { 921 @Override 922 public Object transform(Object source) { 923 if (english == null) english = cldrFactory.make("en", true); 924 return english.getName("currency", source.toString()) + " (" + source + ")"; 925 } 926 }; 927 928 /** 929 * Tests that the supplemental data is well-formed. 930 * 931 */ TestSupplementalData()932 public void TestSupplementalData() { 933 Map<String, Set<String>> language_scripts = new TreeMap<>(); 934 Map<String, Set<String>> language_territories = new TreeMap<>(); 935 Map<String, Set<String>> groups = new TreeMap<>(); 936 Map<String, Set<String>> territory_currencies = new TreeMap<>(); 937 Map<String, Map<String, String>> aliases = new TreeMap<>(); 938 getSupplementalData(language_scripts, language_territories, groups, territory_currencies, aliases); 939 Set<String> sTerritories = new TreeSet<>(); 940 for (Iterator<Set<String>> it = language_territories.values().iterator(); it.hasNext();) { 941 sTerritories.addAll(it.next()); 942 } 943 StandardCodes sc = StandardCodes.make(); 944 Set<String> fullTerritories = sc.getAvailableCodes("territory"); 945 Set<String> fullLanguages = sc.getAvailableCodes("language"); 946 947 Set<String> allLanguages = new TreeSet<>(language_scripts.keySet()); 948 allLanguages.addAll(language_territories.keySet()); 949 for (Iterator<String> it = allLanguages.iterator(); it.hasNext();) { 950 Object language = it.next(); 951 Set<String> scripts = language_scripts.get(language); 952 Set<String> territories = language_territories.get(language); 953 logln(EnglishName.transform(language) 954 + " scripts: " + EnglishName.transform(scripts) 955 + " territories: " + EnglishName.transform(territories)); 956 } 957 958 Map<String, String> changedLanguage = new TreeMap<>(); 959 for (Iterator<String> it = fullLanguages.iterator(); it.hasNext();) { 960 String code = it.next(); 961 List<String> data = sc.getFullData("language", code); 962 if (data.size() < 3) { 963 System.out.println("data problem: " + data); 964 continue; 965 } 966 String replacement = data.get(2); 967 if (!replacement.equals("")) { 968 if (!replacement.equals("--")) changedLanguage.put(code, replacement); 969 continue; 970 } 971 } 972 973 // remove private use, deprecated, groups 974 Set<String> standardTerritories = new TreeSet<>(); 975 Map<String, String> changedTerritory = new TreeMap<>(); 976 for (Iterator<String> it = fullTerritories.iterator(); it.hasNext();) { 977 String code = it.next(); 978 if (code.equals("200")) continue; // || code.equals("YU") || code.equals("PZ") 979 List<String> data = sc.getFullData("territory", code); 980 if (data.get(0).equals("PRIVATE USE")) continue; 981 if (!data.get(2).equals("")) { 982 if (!data.get(2).equals("--")) changedTerritory.put(code, data.get(2)); 983 continue; 984 } 985 standardTerritories.add(code); 986 } 987 standardTerritories.removeAll(groups.keySet()); 988 989 if (!standardTerritories.containsAll(sTerritories)) { 990 TreeSet<String> extras = new TreeSet<>(sTerritories); 991 extras.removeAll(standardTerritories); 992 errln("Supplemental Language Territories contain illegal values: " + EnglishName.transform(extras)); 993 } 994 if (!sTerritories.containsAll(standardTerritories)) { 995 TreeSet<String> extras = new TreeSet<>(standardTerritories); 996 extras.removeAll(sTerritories); 997 warnln("Missing Language Territories: " + EnglishName.transform(extras)); 998 } 999 1000 // now test currencies 1001 logln("Check that no illegal territories are used"); 1002 if (!standardTerritories.containsAll(territory_currencies.keySet())) { 1003 TreeSet<String> extras = new TreeSet<>(territory_currencies.keySet()); 1004 extras.removeAll(fullTerritories); 1005 if (extras.size() != 0) errln("Currency info -- Illegal Territories: " + EnglishName.transform(extras)); 1006 extras = new TreeSet<>(territory_currencies.keySet()); 1007 extras.retainAll(fullTerritories); 1008 extras.removeAll(standardTerritories); 1009 if (extras.size() != 0) warnln("Currency info -- Archaic Territories: " + EnglishName.transform(extras)); 1010 } 1011 logln("Check that no territories are missing"); 1012 if (!territory_currencies.keySet().containsAll(standardTerritories)) { 1013 TreeSet<String> extras = new TreeSet<>(standardTerritories); 1014 extras.removeAll(territory_currencies.keySet()); 1015 errln("Currency info -- Missing Territories: " + EnglishName.transform(extras)); 1016 } 1017 Set<String> currencies = new TreeSet<>(); 1018 for (Iterator<Set<String>> it = territory_currencies.values().iterator(); it.hasNext();) { 1019 currencies.addAll(it.next()); 1020 } 1021 logln("Check that no illegal currencies are used"); 1022 Set<String> legalCurrencies = new TreeSet<>(sc.getAvailableCodes("currency")); 1023 // first remove non-ISO 1024 for (Iterator<String> it = legalCurrencies.iterator(); it.hasNext();) { 1025 String code = it.next(); 1026 List<String> data = sc.getFullData("currency", code); 1027 if ("X".equals(data.get(3))) it.remove(); 1028 } 1029 if (!legalCurrencies.containsAll(currencies)) { 1030 TreeSet<String> extras = new TreeSet<>(currencies); 1031 extras.removeAll(legalCurrencies); 1032 errln("Currency info -- Illegal Currencies: " + EnglishCurrencyName.transform(extras)); 1033 } 1034 logln("Check that there are no missing currencies"); 1035 if (!currencies.containsAll(legalCurrencies)) { 1036 TreeSet<String> extras = new TreeSet<>(legalCurrencies); 1037 extras.removeAll(currencies); 1038 Map<String, Set<String>> failures = new TreeMap<>(); 1039 for (Iterator<String> it = extras.iterator(); it.hasNext();) { 1040 String code = it.next(); 1041 List<String> data = sc.getFullData("currency", code); 1042 if (data.get(1).equals("ZZ")) continue; 1043 String type = data.get(3) + "/" + data.get(1); 1044 Set<String> s = failures.get(type); 1045 if (s == null) failures.put(type, s = new TreeSet<>()); 1046 s.add(code); 1047 } 1048 for (Iterator<String> it = failures.keySet().iterator(); it.hasNext();) { 1049 String type = it.next(); 1050 Set<String> s = failures.get(type); 1051 warnln("Currency info -- Missing Currencies: " + type + "\t \u2192 " + EnglishCurrencyName.transform(s)); 1052 } 1053 } 1054 logln("Missing English currency names"); 1055 for (Iterator<String> it = legalCurrencies.iterator(); it.hasNext();) { 1056 String currency = it.next(); 1057 String name = english.getName("currency", currency); 1058 if (name == null) { 1059 String standardName = sc.getFullData("currency", currency).get(0); 1060 logln("\t\t\t<currency type=\"" + currency + "\">"); 1061 logln("\t\t\t\t<displayName>" + standardName + "</displayName>"); 1062 logln("\t\t\t</currency>"); 1063 } 1064 } 1065 logln("Check Aliases"); 1066 for (Iterator<String> it = aliases.keySet().iterator(); it.hasNext();) { 1067 // the first part of the mapping had better not be in the standardTerritories 1068 String key = it.next(); 1069 Map<String, String> submap = aliases.get(key); 1070 if (key.equals("territoryAlias")) { 1071 checkEqual(key, submap, changedTerritory); 1072 } else if (key.equals("languageAlias")) { 1073 for (Iterator<String> it2 = submap.keySet().iterator(); it2.hasNext();) { 1074 String k = it2.next(); 1075 String value = submap.get(k); 1076 if (value.indexOf("_") >= 0) it2.remove(); 1077 } 1078 checkEqual(key, submap, changedLanguage); 1079 } 1080 } 1081 } 1082 1083 /** 1084 * 1085 */ checkEqual(String title, Map map1, Map map2)1086 private void checkEqual(String title, Map map1, Map map2) { 1087 Set foo = new TreeSet(map1.keySet()); 1088 foo.removeAll(map2.keySet()); 1089 if (!foo.isEmpty()) errln("Extraneous Aliases: " + title + "\t" + foo); 1090 foo = new TreeSet(map2.keySet()); 1091 foo.removeAll(map1.keySet()); 1092 if (!foo.isEmpty()) errln("Missing Aliases: " + title + "\t" + foo); 1093 foo = map2.keySet(); 1094 foo.retainAll(map1.keySet()); 1095 for (Iterator it = foo.iterator(); it.hasNext();) { 1096 Object key = it.next(); 1097 Object result1 = map1.get(key); 1098 Object result2 = map2.get(key); 1099 if (!result1.equals(result2)) 1100 errln("Missing Aliases: " + title + "\t" + key + "\t" + result1 + " != " + result2); 1101 } 1102 } 1103 1104 /** 1105 * Test that the zone ids are well-formed. 1106 * 1107 */ TestZones()1108 public void TestZones() { 1109 StandardCodes sc = StandardCodes.make(); 1110 1111 Map<String, String> defaultNames = new TreeMap(); 1112 Map<String, String> old_new = sc.getZoneLinkold_new(); 1113 Set<String> core = sc.getZoneData().keySet(); 1114 logln("Checking for collisions with last field"); 1115 for (Iterator<String> it = core.iterator(); it.hasNext();) { 1116 String currentItem = it.next(); 1117 String defaultName = TimezoneFormatter.getFallbackName(currentItem); 1118 String fullName = defaultNames.get(defaultName); 1119 if (fullName == null) 1120 defaultNames.put(defaultName, currentItem); 1121 else { 1122 errln("Collision between: " + currentItem + " AND " + fullName); 1123 } 1124 } 1125 1126 logln("Checking that all links are TO canonical zones"); 1127 Set<String> s = new TreeSet<>(old_new.values()); 1128 s.removeAll(core); 1129 if (s.size() != 0) { 1130 errln("Links go TO zones that are not canonical! " + s); 1131 } 1132 1133 logln("Checking that no links are FROM canonical zones"); 1134 s = new TreeSet<>(core); 1135 s.retainAll(old_new.keySet()); 1136 if (s.size() != 0) { 1137 errln("Links go FROM zones that are canonical! " + s); 1138 } 1139 1140 logln("Checking that the zones with rule data are all canonical"); 1141 Set<String> zonesWithRules = sc.getZone_rules().keySet(); 1142 s.clear(); 1143 s.addAll(zonesWithRules); 1144 s.removeAll(core); 1145 if (s.size() != 0) logln("Zones with rules that are not canonical: " + s); 1146 1147 logln("Checking that the rule data are all canonical"); 1148 s.clear(); 1149 s.addAll(core); 1150 s.removeAll(zonesWithRules); 1151 s.removeAll(old_new.keySet()); 1152 if (s.size() != 0) logln("Canonical zones that don't have rules or links: " + s); 1153 1154 for (Iterator<String> it = old_new.keySet().iterator(); it.hasNext();) { 1155 String oldItem = it.next(); 1156 logln("old: " + oldItem + "\tnew: " + old_new.get(oldItem)); 1157 } 1158 Map<String, Set<String>> new_old = new TreeMap<>(); 1159 for (Iterator<String> it = core.iterator(); it.hasNext();) { 1160 new_old.put(it.next(), new TreeSet<String>()); 1161 } 1162 for (Iterator<String> it = old_new.keySet().iterator(); it.hasNext();) { 1163 String oldItem = it.next(); 1164 String newItem = old_new.get(oldItem); 1165 Set<String> oldItems = new_old.get(newItem); 1166 if (oldItems == null) { // try recursing 1167 logln("!!!!Skipping " + oldItem + " \u2192 " + newItem); 1168 continue; 1169 // new_old.put(oldOne, oldItems = new TreeSet()); 1170 } 1171 oldItems.add(oldItem); 1172 } 1173 for (Iterator<String> it = new_old.keySet().iterator(); it.hasNext();) { 1174 String newOne = it.next(); 1175 Set<String> oldItems = new_old.get(newOne); 1176 logln(newOne + "\t" + oldItems); 1177 } 1178 } 1179 TestNarrowForms()1180 public void TestNarrowForms() { 1181 if (disableUntilLater("TestMinimalLocalization")) return; 1182 1183 for (Iterator<String> it = locales.iterator(); it.hasNext();) { 1184 String locale = it.next(); 1185 logln("Testing: " + getLocaleAndName(locale)); 1186 BreakIterator bi = BreakIterator.getCharacterInstance(new ULocale(locale)); 1187 CLDRFile item = cldrFactory.make(locale, false); 1188 // Walk through all the xpaths, adding to currentValues 1189 // Whenever two values for the same xpath are different, we remove from currentValues, and add to okValues 1190 for (Iterator<String> it2 = item.iterator(); it2.hasNext();) { 1191 String xpath = it2.next(); 1192 if (xpath.indexOf("[@type=\"narrow\"]") >= 0) { 1193 String value = item.getStringValue(xpath); 1194 // logln("\tTesting: " + value + "\t path: " + xpath); 1195 int end = getXGraphemeClusterBoundary(bi, value, 0); 1196 if (end == value.length()) continue; 1197 errln(getLocaleAndName(locale) + "\tillegal narrow value " + value + "\t path: " + xpath); 1198 surveyInfo.add(locale + "\t" + xpath + "\t'" + value + "' is too wide for a \"narrow\" value."); 1199 } 1200 } 1201 } 1202 } 1203 1204 static final UnicodeSet XGRAPHEME = new UnicodeSet("[[:mark:][:grapheme_extend:]]"); 1205 static final UnicodeSet DIGIT = new UnicodeSet("[:decimal_number:]"); 1206 getXGraphemeClusterBoundary(BreakIterator bi, String value, int start)1207 private int getXGraphemeClusterBoundary(BreakIterator bi, String value, int start) { 1208 if (value.length() <= 1) return 1; 1209 1210 bi.setText(value); 1211 if (start != 0) bi.preceding(start + 1); // backup one 1212 int current = bi.next(); 1213 // link any digits 1214 if (DIGIT.contains(UTF16.charAt(value, current - 1))) { 1215 current = DIGIT.findIn(value, current, true); 1216 } 1217 // continue collecting any additional characters that are M or grapheme extend 1218 return XGRAPHEME.findIn(value, current, true); 1219 } 1220 } 1221