1 /* 2 ****************************************************************************** 3 * Copyright (C) 2004, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ****************************************************************************** 6 */ 7 package org.unicode.cldr.test; 8 9 import static org.unicode.cldr.util.PathUtilities.getNormalizedPath; 10 11 import java.io.File; 12 import java.io.IOException; 13 import java.io.PrintWriter; 14 import java.util.Arrays; 15 import java.util.Calendar; 16 import java.util.Collection; 17 import java.util.Date; 18 import java.util.HashMap; 19 import java.util.HashSet; 20 import java.util.Iterator; 21 import java.util.LinkedHashSet; 22 import java.util.List; 23 import java.util.Map; 24 import java.util.Set; 25 import java.util.TreeMap; 26 import java.util.TreeSet; 27 28 import org.unicode.cldr.draft.FileUtilities; 29 import org.unicode.cldr.test.DisplayAndInputProcessor.NumericType; 30 import org.unicode.cldr.util.CLDRFile; 31 import org.unicode.cldr.util.CLDRPaths; 32 import org.unicode.cldr.util.CldrUtility; 33 import org.unicode.cldr.util.Factory; 34 import org.unicode.cldr.util.LanguageTagParser; 35 import org.unicode.cldr.util.PathUtilities; 36 import org.unicode.cldr.util.SimpleFactory; 37 import org.unicode.cldr.util.StandardCodes; 38 import org.unicode.cldr.util.TimezoneFormatter; 39 import org.unicode.cldr.util.XPathParts; 40 import org.xml.sax.SAXException; 41 42 import com.ibm.icu.dev.test.TestFmwk; 43 import com.ibm.icu.text.BreakIterator; 44 import com.ibm.icu.text.DecimalFormat; 45 import com.ibm.icu.text.NumberFormat; 46 import com.ibm.icu.text.UTF16; 47 import com.ibm.icu.text.UnicodeSet; 48 import com.ibm.icu.util.ULocale; 49 50 /** 51 * Initial version of CLDR tests. Each test is named TextXXX. To run all the tests, use the options 52 * <blockquote>-nothrow</blockquote> 53 * To run a particular set of tests, include their names, like 54 * <blockquote>-nothrow TestForIllegalAttributeValues TestMinimalLocalization</blockquote> 55 * To show more information (logln), add -verbose 56 * <p> 57 * There are some environment variables that can be used with the test. <br> 58 * -DSHOW_FILES=<anything> shows all create/open of files. <br> 59 * -DXML_MATCH=<regular expression> skips all locales that don't match the regular expression <br> 60 * -DXML_MAIN_DIR=<filesystem directory> resets to a different main directory (eg not cldr/common/main. For example, 61 * some of the tools generate into a locale directory like -DXML_MAIN_DIR=C:\Unicode-CVS2\cldr\common\gen\main\ so this 62 * can be used to check that directory. <br> 63 * -DSKIP_DRAFT=<boolean> skips draft locales if <boolean> is a string starting with T or t 64 */ 65 public class CLDRTest extends TestFmwk { 66 /** 67 * privates 68 */ 69 private static String MATCH; 70 private static String MAIN_DIR; 71 private static boolean SKIP_DRAFT; 72 private Set<String> locales; 73 private Set<String> languageLocales; 74 private Factory cldrFactory; 75 private CLDRFile resolvedRoot; 76 private CLDRFile resolvedEnglish; 77 private final UnicodeSet commonAndInherited = new UnicodeSet( 78 "[[:script=common:][:script=inherited:][:alphabetic=false:]]"); 79 private static final String[] WIDTHS = { "narrow", "wide", "abbreviated", "short" }; 80 private static final String[] MONTHORDAYS = { "day", "month" }; 81 private Map<String, String> localeNameCache = new HashMap<>(); 82 private CLDRFile english = null; 83 84 private Set<String> surveyInfo = new TreeSet<>(); 85 86 /** 87 * TestFmwk boilerplate 88 */ main(String[] args)89 public static void main(String[] args) throws Exception { 90 MATCH = System.getProperty("XML_MATCH"); 91 if (MATCH == null) 92 MATCH = ".*"; 93 else 94 System.out.println("Resetting MATCH:" + MATCH); 95 MAIN_DIR = System.getProperty("XML_MAIN_DIR"); 96 if (MAIN_DIR == null) 97 MAIN_DIR = CLDRPaths.MAIN_DIRECTORY; 98 else 99 System.out.println("Resetting MAIN_DIR:" + MAIN_DIR); 100 SKIP_DRAFT = System.getProperty("XML_SKIP_DRAFT") != null; 101 if (SKIP_DRAFT) System.out.println("Skipping Draft locales"); 102 103 double deltaTime = System.currentTimeMillis(); 104 new CLDRTest().run(args); 105 deltaTime = System.currentTimeMillis() - deltaTime; 106 System.out.println("Seconds: " + deltaTime / 1000); 107 108 } 109 TestZZZZHack()110 public void TestZZZZHack() throws IOException { 111 // hack to get file written at the end of run. 112 PrintWriter surveyFile = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "surveyInfo.txt"); 113 for (String s : surveyInfo) { 114 surveyFile.println(s); 115 } 116 surveyFile.close(); 117 } 118 119 /** 120 * TestFmwk boilerplate 121 */ CLDRTest()122 public CLDRTest() throws SAXException, IOException { 123 // TODO parameterize the directory and filter 124 cldrFactory = Factory.make(MAIN_DIR, MATCH); 125 // CLDRKey.main(new String[]{"-mde.*"}); 126 locales = cldrFactory.getAvailable(); 127 languageLocales = cldrFactory.getAvailableLanguages(); 128 resolvedRoot = cldrFactory.make("root", true); 129 /* 130 * PrintWriter out = FileUtilities.openUTF8Writer(Utility.GEN_DIRECTORY + "resolved/", "root.xml"); 131 * CLDRFile temp = (CLDRFile) resolvedRoot.clone(); 132 * temp.write(out); 133 * out.close(); 134 */ 135 resolvedEnglish = cldrFactory.make("en", true); 136 } 137 138 /** 139 * Check to make sure that the currency formats are kosher. 140 */ TestCurrencyFormats()141 public void TestCurrencyFormats() { 142 // String decimal = "//ldml/numbers/decimalFormats/decimalFormatLength/decimalFormat[@type=\"standard\"]/"; 143 // String currency = "//ldml/numbers/currencyFormats/currencyFormatLength/currencyFormat[@type=\"standard\"]/"; 144 for (String locale : locales) { 145 boolean isPOSIX = locale.indexOf("POSIX") >= 0; 146 logln("Testing: " + locale); 147 CLDRFile item = cldrFactory.make(locale, false); 148 for (String xpath : item) { 149 NumericType type = NumericType.getNumericType(xpath); 150 if (type == NumericType.NOT_NUMERIC) continue; 151 String value = item.getStringValue(xpath); 152 // at this point, we only have currency formats 153 String pattern = DisplayAndInputProcessor.getCanonicalPattern(value, type, isPOSIX); 154 if (!pattern.equals(value)) { 155 String draft = ""; 156 if (item.getFullXPath(xpath).indexOf("[@draft=\"unconfirmed\"]") >= 0) draft = " [draft]"; 157 assertEquals(getLocaleAndName(locale) + draft + " " + type + " pattern incorrect", pattern, value); 158 } 159 } 160 } 161 } 162 163 /** 164 * Internal class 165 */ 166 private static class ValueCount { 167 int count = 1; 168 String value; 169 String fullxpath; 170 } 171 172 /** 173 * Verify that if all the children of a language locale do not have the same value for the same key. 174 */ TestCommonChildren()175 public void TestCommonChildren() { 176 if (disableUntilLater("TestCommonChildren")) return; 177 178 Map<String, ValueCount> currentValues = new TreeMap<>(); 179 Set<String> okValues = new TreeSet<>(); 180 181 for (String parent : languageLocales) { 182 logln("Testing: " + parent); 183 currentValues.clear(); 184 okValues.clear(); 185 Set<String> availableWithParent = cldrFactory.getAvailableWithParent(parent, true); 186 for (String locale : availableWithParent) { 187 logln("\tTesting: " + locale); 188 CLDRFile item = cldrFactory.make(locale, false); 189 // Walk through all the xpaths, adding to currentValues 190 // Whenever two values for the same xpath are different, we remove from currentValues, and add to 191 // okValues 192 for (String xpath : item) { 193 if (okValues.contains(xpath)) continue; 194 if (xpath.startsWith("//ldml/identity/")) continue; // skip identity elements 195 String v = item.getStringValue(xpath); 196 ValueCount last = currentValues.get(xpath); 197 if (last == null) { 198 ValueCount vc = new ValueCount(); 199 vc.value = v; 200 vc.fullxpath = item.getFullXPath(xpath); 201 currentValues.put(xpath, vc); 202 } else if (v.equals(last.value)) { 203 last.count++; 204 } else { 205 okValues.add(xpath); 206 currentValues.remove(xpath); 207 } 208 } 209 // at the end, only the keys left in currentValues are (possibly) faulty 210 // they are actually bad IFF either 211 // (a) the count is equal to the total (thus all children are the same), or 212 // (b) their value is the same as the parent's resolved value (thus all children are the same or the 213 // same 214 // as the inherited parent value). 215 } 216 if (currentValues.size() == 0) continue; 217 int size = availableWithParent.size(); 218 CLDRFile parentCLDR = cldrFactory.make(parent, true); 219 for (String xpath : currentValues.keySet()) { 220 ValueCount vc = currentValues.get(xpath); 221 if (vc.count == size || (vc.value.equals(parentCLDR.getStringValue(xpath)) 222 && vc.fullxpath.equals(parentCLDR.getStringValue(xpath)))) { 223 String draft = ""; 224 if (vc.fullxpath.indexOf("[@draft=\"unconfirmed\"]") >= 0) draft = " [draft]"; 225 String count = (vc.count == size ? "" : vc.count + "/") + size; 226 warnln(getLocaleAndName(parent) + draft + 227 "\tall children (" + count + ") have same value for:\t" 228 + xpath + ";\t" + vc.value); 229 } 230 } 231 } 232 } 233 234 static String[] EXEMPLAR_SKIPS = { "/hourFormat", "/exemplarCharacters", "/pattern", "/localizedPatternChars" }; 235 236 /** 237 * Check that the exemplars include all characters in the data. 238 */ TestThatExemplarsContainAll()239 public void TestThatExemplarsContainAll() { 240 UnicodeSet allExemplars = new UnicodeSet(); 241 if (disableUntilLater("TestThatExemplarsContainAll")) return; 242 Set<String> counts = new TreeSet<>(); 243 int totalCount = 0; 244 UnicodeSet localeMissing = new UnicodeSet(); 245 for (String locale : locales) { 246 if (locale.equals("root")) continue; 247 CLDRFile resolved = cldrFactory.make(locale, false); // FIX LATER 248 UnicodeSet exemplars = getFixedExemplarSet(locale, resolved); 249 CLDRFile plain = cldrFactory.make(locale, false); 250 int count = 0; 251 localeMissing.clear(); 252 file: for (String xpath : plain) { 253 for (int i = 0; i < EXEMPLAR_SKIPS.length; ++i) { 254 if (xpath.indexOf(EXEMPLAR_SKIPS[i]) > 0) continue file; // skip some items. 255 } 256 if (SKIP_DRAFT) { 257 String fullxpath = plain.getFullXPath(xpath); 258 if (fullxpath.indexOf("[@draft=\"unconfirmed\"") > 0) continue; 259 } 260 if (xpath.startsWith("//ldml/posix/messages")) continue; 261 String value = plain.getStringValue(xpath); 262 allExemplars.addAll(value); 263 if (!exemplars.containsAll(value)) { 264 count++; 265 UnicodeSet missing = new UnicodeSet().addAll(value).removeAll(exemplars); 266 localeMissing.addAll(missing); 267 logln(getLocaleAndName(locale) + "\t" + xpath + "\t<" + value + "> contains " + missing 268 + ", not in exemplars"); 269 surveyInfo.add(locale + "\t" + xpath + "\t'" + value + "' contains characters " 270 + missing.toPattern(false) + ", which are not in exemplars"); 271 } 272 } 273 NumberFormat nf = new DecimalFormat("000"); 274 if (count != 0) { 275 totalCount += count; 276 counts.add(nf.format(count) + "\t" + getLocaleAndName(locale) + "\t" + localeMissing); 277 } 278 if (localeMissing.size() != 0) { 279 errln(getLocaleAndName(locale) + "\t uses " + localeMissing + ", not in exemplars"); 280 } 281 } 282 for (String c : counts) { 283 logln(c); 284 } 285 logln("Total Count: " + totalCount); 286 System.out.println("All exemplars: " + allExemplars.toPattern(true)); 287 } 288 289 // Get Date-Time in milliseconds getDateTimeinMillis(int year, int month, int date)290 private static long getDateTimeinMillis(int year, int month, int date) { 291 Calendar cal = Calendar.getInstance(); 292 cal.set(year, month, date); 293 return cal.getTimeInMillis(); 294 } 295 296 static final long disableDate = getDateTimeinMillis(2005, 6 - 1, 3); 297 298 /** 299 * 300 */ disableUntilLater(String string)301 private boolean disableUntilLater(String string) { 302 if (new Date().getTime() >= disableDate) return false; 303 warnln("Disabling " + string + " until " + new Date(disableDate)); 304 return true; 305 } 306 307 /** 308 * Internal 309 */ getFixedExemplarSet(String locale, CLDRFile cldrfile)310 private UnicodeSet getFixedExemplarSet(String locale, CLDRFile cldrfile) { 311 UnicodeSet exemplars = getExemplarSet(cldrfile, ""); 312 if (exemplars.size() == 0) { 313 errln(getLocaleAndName(locale) + " has empty exemplar set"); 314 } 315 exemplars.addAll(getExemplarSet(cldrfile, "standard")); 316 UnicodeSet auxiliary = getExemplarSet(cldrfile, "auxiliary"); 317 if (exemplars.containsSome(auxiliary)) { 318 errln(getLocaleAndName(locale) + "Auxiliary & main exemplars should be disjoint, but overlap with " + 319 new UnicodeSet(exemplars).retainAll(auxiliary) + 320 ": change auxiliary to " + auxiliary.removeAll(exemplars)); 321 } 322 exemplars.addAll(auxiliary); 323 exemplars.addAll(commonAndInherited); 324 return exemplars; 325 } 326 327 /** 328 * @return Gets an exemplar set. Also verifies that the set contains no properties. 329 */ getExemplarSet(CLDRFile cldrfile, String type)330 public UnicodeSet getExemplarSet(CLDRFile cldrfile, String type) { 331 if (type.length() != 0) type = "[@type=\"" + type + "\"]"; 332 String v = cldrfile.getStringValue("//ldml/characters/exemplarCharacters" + type); 333 if (v == null) return new UnicodeSet(); 334 String pattern = v; 335 if (pattern.indexOf("[:") >= 0 || pattern.indexOf("\\p{") > 0) { 336 errln(getLocaleName(cldrfile.getLocaleID()) + " exemplar pattern contains property: " + pattern); 337 } 338 try { 339 UnicodeSet result = new UnicodeSet(v, UnicodeSet.CASE); 340 result.remove(0x20); 341 return result; 342 } catch (RuntimeException e) { 343 e.printStackTrace(); 344 errln(getLocaleAndName(cldrfile.getLocaleID()) + " has illegal exemplar set: <" + v + ">"); 345 return new UnicodeSet(); 346 } 347 // if (type.length() != 0) System.out.println("fetched set for " + type); 348 } 349 getLocaleAndName(String locale)350 public String getLocaleAndName(String locale) { 351 return locale + " (" + getLocaleName(locale) + ")"; 352 } 353 354 /** 355 * @return the ID plus its localization (for language, script, and territory IDs only) 356 */ getIDAndLocalization(String id)357 public String getIDAndLocalization(String id) { 358 return id + " " + getLocalization(id); 359 } 360 361 /** 362 * @return the localization (for language, script, and territory IDs only) 363 */ getLocalization(String id)364 public String getLocalization(String id) { 365 if (english == null) english = cldrFactory.make("en", true); 366 if (id.length() == 0) return "?"; 367 // pick on basis of case 368 char ch = id.charAt(0); 369 if ('a' <= ch && ch <= 'z') return getName(english, "languages/language", id); 370 if (id.length() == 4 && 'A' <= ch && ch <= 'Z') return getName(english, "scripts/script", id); 371 return getName(english, "territories/territory", id); 372 } 373 374 /** 375 * Internal 376 */ getIDAndLocalization(Set<String> missing)377 private String getIDAndLocalization(Set<String> missing) { 378 StringBuffer buffer = new StringBuffer(); 379 for (String next : missing) { 380 if (buffer.length() != 0) buffer.append("; "); 381 buffer.append(getIDAndLocalization(next)); 382 } 383 return buffer.toString(); 384 } 385 getLocaleName(String locale)386 public String getLocaleName(String locale) { 387 String name = localeNameCache.get(locale); 388 if (name != null) return name; 389 if (english == null) english = cldrFactory.make("en", true); 390 String result = english.getName(locale); 391 /* 392 * Collection c = Utility.splitList(locale, '_', false, null); 393 * String[] pieces = new String[c.size()]; 394 * c.toArray(pieces); 395 * int i = 0; 396 * String result = getName(english, "languages/language", pieces[i++]); 397 * if (pieces[i].length() == 0) return result; 398 * if (pieces[i].length() == 4) { 399 * result += " " + getName(english, "scripts/script", pieces[i++]); 400 * } 401 * if (pieces[i].length() == 0) return result; 402 * result += " " + getName(english, "territories/territory", pieces[i++]); 403 * if (pieces[i].length() == 0) return result; 404 * result += " " + getName(english, "variant/variants", pieces[i++]); 405 */ 406 localeNameCache.put(locale, result); 407 return result; 408 } 409 410 /** 411 * Internal 412 */ getName(CLDRFile english, String kind, String type)413 private String getName(CLDRFile english, String kind, String type) { 414 String v = english.getStringValue("//ldml/localeDisplayNames/" + kind + "[@type=\"" + type + "\"]"); 415 if (v == null) return "<" + type + ">"; 416 return v; 417 } 418 419 /** 420 * Make sure we are only using attribute values that are in RFC3066bis, the Olson database (with aliases removed) 421 * or ISO 4217 422 * 423 * @throws IOException 424 */ TestForIllegalAttributeValues()425 public void TestForIllegalAttributeValues() { 426 // check for illegal attribute values that are not in the DTD 427 Map<String, Set<String>> result = new TreeMap<>(); 428 Map<String, Set<String>> totalResult = new TreeMap<>(); 429 for (String locale : locales) { 430 logln("Testing: " + locale); 431 CLDRFile item = cldrFactory.make(locale, false); 432 result.clear(); 433 Set<String> xpathFailures = null; // don't collect 434 // XPathParts parts; 435 // String xpath; 436 // CLDRFile.StringValue value; 437 // String element; 438 // Map attributes; 439 checkAttributeValidity(item, result, xpathFailures); 440 441 // now show 442 //String localeName = getLocaleAndName(locale); 443 for (Iterator<String> it3 = result.keySet().iterator(); it3.hasNext();) { 444 String code = it3.next(); 445 Set<String> avalues = result.get(code); 446 errln(getLocaleAndName(locale) + "\tillegal attribute value for " + code + ", value:\t" + show(avalues)); 447 Set<String> totalvalues = totalResult.get(code); 448 if (totalvalues == null) totalResult.put(code, totalvalues = new TreeSet<>()); 449 totalvalues.addAll(avalues); 450 } 451 } 452 for (Iterator<String> it3 = totalResult.keySet().iterator(); it3.hasNext();) { 453 String code = it3.next(); 454 Set<String> avalues = totalResult.get(code); 455 errln("All illegal attribute values for " + code + ", value:\t" + show(avalues)); 456 } 457 } 458 459 /** 460 * Tests whether the display names have any collisions, e.g. if in the fully resolved 461 * locale $ is used for both USD and UAD. 462 * 463 */ TestDisplayNameCollisions()464 public void TestDisplayNameCollisions() { 465 if (disableUntilLater("TestDisplayNameCollisions")) return; 466 467 Map<String, String>[] maps = new HashMap[CLDRFile.LIMIT_TYPES]; 468 for (int i = 0; i < maps.length; ++i) { 469 maps[i] = new HashMap<>(); 470 } 471 Set<String> collisions = new TreeSet<>(); 472 for (Iterator<String> it = locales.iterator(); it.hasNext();) { 473 String locale = it.next(); 474 CLDRFile item = cldrFactory.make(locale, true); 475 for (int i = 0; i < maps.length; ++i) { 476 maps[i].clear(); 477 } 478 collisions.clear(); 479 480 for (Iterator<String> it2 = item.iterator(); it2.hasNext();) { 481 String xpath = it2.next(); 482 int nameType = CLDRFile.getNameType(xpath); 483 if (nameType < 0) continue; 484 String value = item.getStringValue(xpath); 485 String xpath2 = maps[nameType].get(value); 486 if (xpath2 == null) { 487 maps[nameType].put(value, xpath); 488 continue; 489 } 490 collisions.add(CLDRFile.getNameTypeName(nameType) + "\t" + value + "\t" + xpath + "\t" + xpath2); 491 surveyInfo.add(locale + "\t" + xpath + "\t'" + value + "' is a duplicate of what is in " + xpath2); 492 } 493 String name = getLocaleAndName(locale) + "\t"; 494 for (Iterator<String> it2 = collisions.iterator(); it2.hasNext();) { 495 errln(name + it2.next()); 496 } 497 } 498 } 499 500 /** 501 * Checks the validity of attributes, based on StandardCodes. 502 * The invalid codes are added to badCodes, and the failing xpaths are added to xpathFailures. 503 * 504 * @param item 505 * @param badCodes 506 * @param xpathFailures 507 */ checkAttributeValidity(CLDRFile item, Map<String, Set<String>> badCodes, Set<String> xpathFailures)508 public static void checkAttributeValidity(CLDRFile item, Map<String, Set<String>> badCodes, Set<String> xpathFailures) { 509 for (Iterator<String> it2 = item.iterator(); it2.hasNext();) { 510 String xpath = it2.next(); 511 XPathParts parts = XPathParts.getFrozenInstance(item.getFullXPath(xpath)); 512 for (int i = 0; i < parts.size(); ++i) { 513 if (parts.getAttributeCount(i) == 0) { 514 continue; 515 } 516 String element = parts.getElement(i); 517 Map<String, String> attributes = parts.getAttributes(i); 518 for (Iterator<String> it3 = attributes.keySet().iterator(); it3.hasNext();) { 519 String attribute = it3.next(); 520 String avalue = attributes.get(attribute); 521 checkValidity(xpath, element, attribute, avalue, badCodes, xpathFailures); 522 } 523 } 524 } 525 } 526 527 /** 528 * Internal 529 */ show(Collection<String> avalues)530 private String show(Collection<String> avalues) { 531 StringBuffer result = new StringBuffer("{"); 532 boolean first = true; 533 for (Iterator<String> it3 = avalues.iterator(); it3.hasNext();) { 534 if (first) 535 first = false; 536 else 537 result.append(", "); 538 result.append(it3.next().toString()); 539 } 540 result.append("}"); 541 return result.toString(); 542 } 543 544 /** 545 * Internal function 546 */ checkValidity(String xpath, String element, String attribute, String avalue, Map<String, Set<String>> results, Set<String> xpathsFailing)547 private static void checkValidity(String xpath, String element, String attribute, String avalue, Map<String, Set<String>> results, 548 Set<String> xpathsFailing) { 549 StandardCodes codes = StandardCodes.make(); 550 if (attribute.equals("type")) { 551 boolean checkReplacements = xpath.indexOf("/identity") < 0; 552 if (element.equals("currency")) 553 checkCodes(xpath, "currency", avalue, codes, results, xpathsFailing, checkReplacements); 554 else if (element.equals("script")) 555 checkCodes(xpath, "script", avalue, codes, results, xpathsFailing, checkReplacements); 556 else if (element.equals("territory")) 557 checkCodes(xpath, "territory", avalue, codes, results, xpathsFailing, checkReplacements); 558 else if (element.equals("language")) 559 checkCodes(xpath, "language", avalue, codes, results, xpathsFailing, checkReplacements); 560 else if (element.equals("zone")) 561 checkCodes(xpath, "tzid", avalue, codes, results, xpathsFailing, checkReplacements); 562 } 563 } 564 565 /** 566 * Internal function 567 * 568 * @param checkReplacements 569 * TODO 570 */ 571 private static void checkCodes(String xpath, String code, String avalue, StandardCodes codes, Map<String, Set<String>> results, 572 Set<String> xpathFailures, boolean checkReplacements) { 573 // ok if code is found AND it has no replacement 574 if (codes.getData(code, avalue) != null 575 && (!checkReplacements || codes.getReplacement(code, avalue) == null)) return; 576 577 if (xpathFailures != null) xpathFailures.add(xpath); 578 if (results == null) return; 579 Set<String> s = results.get(code); 580 if (s == null) { 581 s = new TreeSet<>(); 582 results.put(code, s); 583 } 584 s.add(avalue); 585 } 586 587 /** 588 * Verify that a small set of locales (currently just English) has everything translated. 589 * 590 * @throws IOException 591 */ 592 public void TestCompleteLocales() { 593 // just test English for now 594 if (english == null) english = cldrFactory.make("en", true); 595 checkTranslatedCodes(english); 596 } 597 598 /** 599 * Tests that the file contains codes for all main display name ids: language, script, territory, tzid, currency. 600 */ 601 private void checkTranslatedCodes(CLDRFile cldrfile) { 602 StandardCodes codes = StandardCodes.make(); 603 checkTranslatedCode(cldrfile, codes, "currency", "//ldml/numbers/currencies/currency", "/displayName"); 604 // can't check timezones for English. 605 // checkTranslatedCode(cldrfile, codes, "tzid", "//ldml/dates/timeZoneNames/zone", ""); 606 checkTranslatedCode(cldrfile, codes, "language", "//ldml/localeDisplayNames/languages/language", ""); 607 checkTranslatedCode(cldrfile, codes, "script", "//ldml/localeDisplayNames/scripts/script", ""); 608 checkTranslatedCode(cldrfile, codes, "territory", "//ldml/localeDisplayNames/territories/territory", ""); 609 checkTranslatedCode(cldrfile, codes, "variant", "//ldml/localeDisplayNames/variants/variant", ""); 610 } 611 612 /** 613 * @param codes 614 * @param type 615 * @param prefix 616 * @param postfix 617 * TODO 618 */ 619 private void checkTranslatedCode(CLDRFile cldrfile, StandardCodes codes, String type, String prefix, String postfix) { 620 621 // TODO, expand to other languages 622 Map<String, Set<String>> completionExceptions = new HashMap<>(); 623 Set<String> scriptExceptions = new HashSet<>(); 624 scriptExceptions.add("Cham"); 625 scriptExceptions.add("Thai"); 626 completionExceptions.put("script", scriptExceptions); 627 628 Set<String> codeItems = codes.getGoodAvailableCodes(type); 629 int count = 0; 630 Set<String> exceptions = completionExceptions.get(type); 631 for (String code : codeItems) { 632 String rfcname = codes.getData(type, code); 633 // if (rfcname.equals("ZZ")) continue; 634 ++count; 635 if (rfcname.equals("PRIVATE USE")) continue; 636 String fullFragment = prefix + "[@type=\"" + code + "\"]" + postfix; 637 String v = cldrfile.getStringValue(fullFragment); 638 if (v == null) { 639 errln("Missing translation for:\t<" + type + " type=\"" + code + "\">" + rfcname + "</" + type + ">"); 640 continue; 641 } 642 String translation = v; 643 if (translation.equals(code)) { 644 if (exceptions != null && exceptions.contains(code)) continue; 645 errln("Translation = code for:\t<" + type + " type=\"" + code + "\">" + rfcname + "</" + type + ">"); 646 continue; 647 } 648 } 649 logln("Total " + type + ":\t" + count); 650 } 651 652 // <territoryContainment><group type="001" contains="002 009 019 142 150"/> 653 // <languageData><language type="af" scripts="Latn" territories="ZA"/> 654 void getSupplementalData(Map<String, Set<String>> language_scripts, Map<String, Set<String>> language_territories, 655 Map<String, Set<String>> group_territory, 656 Map<String, Set<String>> territory_currencies, Map<String, Map<String, String>> aliases) { 657 658 boolean SHOW = false; 659 Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); 660 CLDRFile supp = cldrFactory.make(CLDRFile.SUPPLEMENTAL_NAME, false); 661 for (Iterator<String> it = supp.iterator(); it.hasNext();) { 662 String path = it.next(); 663 try { 664 XPathParts parts = XPathParts.getFrozenInstance(supp.getFullXPath(path)); 665 Map<String, String> m; 666 String type = ""; 667 if (aliases != null && parts.findElement("alias") >= 0) { 668 m = parts.findAttributes(type = "languageAlias"); 669 if (m == null) m = parts.findAttributes(type = "territoryAlias"); 670 if (m != null) { 671 Map top = aliases.get(type); 672 if (top == null) { 673 aliases.put(type, top = new TreeMap()); 674 } 675 top.put(m.get("type"), m.get("replacement")); 676 } 677 } 678 if (territory_currencies != null) { 679 m = parts.findAttributes("region"); 680 if (m != null) { 681 String region = m.get("iso3166"); 682 Set s = territory_currencies.get(region); 683 if (s == null) { 684 territory_currencies.put(region, s = new LinkedHashSet()); 685 } 686 m = parts.findAttributes("currency"); 687 if (m == null) { 688 warnln("missing currency for region: " + path); 689 continue; 690 } 691 String currency = m.get("iso4217"); 692 s.add(currency); 693 m = parts.findAttributes("alternate"); 694 String alternate = m == null ? null : (String) m.get("iso4217"); 695 if (alternate != null) { 696 s.add(alternate); 697 } 698 continue; 699 } 700 } 701 m = parts.findAttributes("group"); 702 if (m != null) { 703 if (group_territory == null) continue; 704 type = m.get("type"); 705 String contains = m.get("contains"); 706 group_territory.put(type, new TreeSet(CldrUtility.splitList(contains, ' ', true))); 707 continue; 708 } 709 m = parts.findAttributes("language"); 710 if (m == null) continue; 711 String language = m.get("type"); 712 String scripts = m.get("scripts"); 713 if (scripts == null) 714 language_scripts.put(language, new TreeSet<String>()); 715 else { 716 language_scripts.put(language, new TreeSet<>(CldrUtility.splitList(scripts, ' ', true))); 717 if (SHOW) 718 System.out.println(getIDAndLocalization(language) + "\t\t" 719 + getIDAndLocalization(language_scripts.get(language))); 720 } 721 String territories = m.get("territories"); 722 if (territories == null) 723 language_territories.put(language, new TreeSet<String>()); 724 else { 725 language_territories.put(language, new TreeSet<>(CldrUtility.splitList(territories, ' ', true))); 726 if (SHOW) 727 System.out.println(getIDAndLocalization(language) + "\t\t" 728 + getIDAndLocalization(language_territories.get(language))); 729 } 730 } catch (RuntimeException e) { 731 throw (IllegalArgumentException) new IllegalArgumentException("Failure with: " + path).initCause(e); 732 } 733 } 734 } 735 736 /** 737 * Verify that the minimal localizations are present. 738 */ TestMinimalLocalization()739 public void TestMinimalLocalization() throws IOException { 740 if (disableUntilLater("TestMinimalLocalization")) return; 741 742 boolean testDraft = false; 743 Map<String, Set<String>> language_scripts = new HashMap<>(); 744 Map<String, Set<String>> language_territories = new HashMap<>(); 745 getSupplementalData(language_scripts, language_territories, null, null, null); 746 LanguageTagParser localIDParser = new LanguageTagParser(); 747 // see http://oss.software.ibm.com/cvs/icu/~checkout~/locale/docs/design/minimal_requirements.htm 748 int[] failureCount = new int[1]; 749 int[] warningCount = new int[1]; 750 for (Iterator<String> it = languageLocales.iterator(); it.hasNext();) { 751 String locale = it.next(); 752 if (locale.equals("root")) continue; 753 // if (!locale.equals("zh_Hant")) continue; 754 755 CLDRFile item = cldrFactory.make(locale, true); 756 if (!testDraft && item.isDraft()) { 757 logln(getLocaleAndName(locale) + "\tskipping draft"); 758 continue; 759 } 760 UnicodeSet exemplars = getFixedExemplarSet(locale, item); 761 CLDRFile missing = SimpleFactory.makeFile(locale); 762 failureCount[0] = 0; 763 warningCount[0] = 0; 764 localIDParser.set(locale); 765 String language = localIDParser.getLanguage(); 766 logln("Testing: " + locale); 767 // languages 768 Set<String> languages = new TreeSet<>(CldrUtility.MINIMUM_LANGUAGES); 769 languages.add(language); 770 // LANGUAGE_NAME = 0, SCRIPT_NAME = 1, TERRITORY_NAME = 2, VARIANT_NAME = 3, 771 // CURRENCY_NAME = 4, CURRENCY_SYMBOL = 5, TZID = 6 772 773 checkForItems(item, languages, CLDRFile.LANGUAGE_NAME, missing, failureCount, null); 774 775 /* 776 * checkTranslatedCode(cldrfile, codes, "currency", "//ldml/numbers/currencies/currency"); 777 * checkTranslatedCode(cldrfile, codes, "tzid", "//ldml/dates/timeZoneNames/zone"); 778 * checkTranslatedCode(cldrfile, codes, "variant", "//ldml/localeDisplayNames/variants/variant"); 779 */ 780 781 Set<String> scripts = new TreeSet<>(); 782 scripts.add("Latn"); 783 Set<String> others = language_scripts.get(language); 784 if (others != null) scripts.addAll(others); 785 checkForItems(item, scripts, CLDRFile.SCRIPT_NAME, missing, failureCount, null); 786 787 Set<String> countries = new TreeSet<>(CldrUtility.MINIMUM_TERRITORIES); 788 others = language_territories.get(language); 789 if (others != null) countries.addAll(others); 790 checkForItems(item, countries, CLDRFile.TERRITORY_NAME, missing, failureCount, null); 791 792 Set<String> currencies = new TreeSet<>(); 793 StandardCodes sc = StandardCodes.make(); 794 for (Iterator<String> it2 = countries.iterator(); it2.hasNext();) { 795 String country = it2.next(); 796 Set<String> countryCurrencies = sc.getMainCurrencies(country); 797 if (countryCurrencies == null) { 798 errln("Internal Error: no currencies for " + country + ", locale: " + locale); 799 } else { 800 currencies.addAll(countryCurrencies); 801 } 802 } 803 checkForItems(item, currencies, CLDRFile.CURRENCY_NAME, missing, failureCount, null); 804 checkForItems(item, currencies, CLDRFile.CURRENCY_SYMBOL, missing, failureCount, exemplars); 805 806 // context=format and width=wide; context=stand-alone & width=abbreviated 807 Set<String> months = new TreeSet<>(); 808 for (int i = 1; i <= 12; ++i) 809 months.add(i + ""); 810 Set<String> days = new TreeSet<>(Arrays.asList(new String[] { "sun", "mon", "tue", "wed", "thu", "fri", "sat" })); 811 for (int i = -7; i < 0; ++i) { 812 checkForItems(item, (i < -4 ? months : days), i, missing, failureCount, null); 813 } 814 815 String filename = "missing_" + locale + ".xml"; 816 if (failureCount[0] > 0 || warningCount[0] > 0) { 817 PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY + "missing/", filename); 818 missing.write(out); 819 out.close(); 820 // String s = getIDAndLocalization(missing); 821 String message = "missing localizations, creating file" 822 + getNormalizedPath(CLDRPaths.GEN_DIRECTORY, "missing", filename); 823 if (failureCount[0] > 0) 824 warnln(getLocaleAndName(locale) + "\t" + message); 825 else 826 logln(getLocaleAndName(locale) + "\tpossibly " + message); 827 } else { 828 new File(CLDRPaths.GEN_DIRECTORY + "missing/", filename).delete(); 829 } 830 } 831 } 832 833 /** 834 * Internal 835 */ getDateKey(String monthOrDay, String width, String code)836 private String getDateKey(String monthOrDay, String width, String code) { 837 // String context = width.equals("narrow") ? "format" : "stand-alone"; 838 return "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/" 839 + monthOrDay + "s/" + monthOrDay + "Context[@type=\"format\"]/" 840 + monthOrDay + "Width[@type=\"" + width + "\"]/" + monthOrDay 841 + "[@type=\"" + code + "\"]"; 842 } 843 844 /** 845 * Internal 846 */ getDateKey(int type, String code)847 private String getDateKey(int type, String code) { 848 // type is 6..4 for months abbrev..narrow, 3..0 for days short..narrow 849 int monthOrDayType = 0, widthType = type; 850 if (type >= 4) { 851 monthOrDayType = 1; 852 widthType -= 4; 853 } 854 return getDateKey(MONTHORDAYS[monthOrDayType], WIDTHS[widthType], code); 855 } 856 857 /** 858 * @param item 859 * @param codes 860 * @param missing 861 * @param exemplarTest 862 * TODO 863 * TODO 864 */ checkForItems(CLDRFile item, Set<String> codes, int type, CLDRFile missing, int failureCount[], UnicodeSet exemplarTest)865 private void checkForItems(CLDRFile item, Set<String> codes, int type, CLDRFile missing, int failureCount[], 866 UnicodeSet exemplarTest) { 867 // check codes 868 for (Iterator<String> it2 = codes.iterator(); it2.hasNext();) { 869 String code = it2.next(); 870 String key; 871 if (type >= 0) { 872 key = CLDRFile.getKey(type, code); 873 } else { 874 key = getDateKey(-type - 1, code); 875 } 876 String v = item.getStringValue(key); 877 String rootValue = resolvedRoot.getStringValue(key); 878 if (v == null || v.equals(rootValue) && (exemplarTest == null || !exemplarTest.containsAll(rootValue))) { 879 String englishValue = resolvedEnglish.getStringValue(key); 880 String transValue; 881 if (englishValue != null) { 882 transValue = englishValue; 883 } else { 884 transValue = code; 885 } 886 missing.add(key, "TODO " + transValue); 887 failureCount[0]++; 888 } else { 889 logln("\t" + code + "\t" + v); 890 } 891 } 892 } 893 894 /* 895 * void showTestStr() { 896 * LocaleIDParser lparser = new LocaleIDParser(); 897 * Collection s = split(teststr,',', true, new ArrayList()); 898 * for (Iterator it = s.iterator(); it.hasNext();) { 899 * String item = (String)it.next(); 900 * lparser.set(item.replace('?', '_')); 901 * String region = lparser.getRegion(); 902 * System.out.print(item.replace('?', '-') + " (" + getLocalization(region) + "), "); 903 * //System.out.print(getLocalization(region) + ", "); 904 * } 905 * } 906 * static String teststr = 907 * "en?AG, en?AI, en?AS, en?AU, en?IN, en?BB, en?BE, en?BM, en?BN, en?BS, en?BW, en?BZ, en?CA, en?CK, en?CM, en?DM, en?ER, en?ET, en?FJ, en?FK, en?FM, en?GB, en?GD, en?GH, en?GI, en?GM, en?GU, en?GY, en?HK, en?IE, en?IL, en?IO, en?JM, en?KE, en?KI, en?KN, en?KY, en?LC, en?LR, en?LS, en?MH, en?MP, en?MS, en?MT, en?MU, en?MW, en?NA, en?NF, en?NG, en?NR, en?NU, en?NZ, en?PG, en?PH, en?PK, en?PN, en?PR, en?PW, en?RW, en?SB, en?SC, en?SG, en?SH, en?SL, en?SO, en?SZ, en?TC, en?TK, en?TO, en?TT, en?UG, en?UM, en?US, en?VC, en?VG, en?VI, en?VU, en?WS, en?ZA, en?ZM, en?ZW" 908 * ; 909 */ 910 911 CldrUtility.CollectionTransform EnglishName = new CldrUtility.CollectionTransform() { 912 @Override 913 public Object transform(Object source) { 914 // TODO Auto-generated method stub 915 return getLocalization(source.toString()) + " (" + source + ")"; 916 } 917 }; 918 919 CldrUtility.CollectionTransform EnglishCurrencyName = new CldrUtility.CollectionTransform() { 920 @Override 921 public Object transform(Object source) { 922 if (english == null) english = cldrFactory.make("en", true); 923 return english.getName("currency", source.toString()) + " (" + source + ")"; 924 } 925 }; 926 927 /** 928 * Tests that the supplemental data is well-formed. 929 * 930 */ TestSupplementalData()931 public void TestSupplementalData() { 932 Map<String, Set<String>> language_scripts = new TreeMap<>(); 933 Map<String, Set<String>> language_territories = new TreeMap<>(); 934 Map<String, Set<String>> groups = new TreeMap<>(); 935 Map<String, Set<String>> territory_currencies = new TreeMap<>(); 936 Map<String, Map<String, String>> aliases = new TreeMap<>(); 937 getSupplementalData(language_scripts, language_territories, groups, territory_currencies, aliases); 938 Set<String> sTerritories = new TreeSet<>(); 939 for (Iterator<Set<String>> it = language_territories.values().iterator(); it.hasNext();) { 940 sTerritories.addAll(it.next()); 941 } 942 StandardCodes sc = StandardCodes.make(); 943 Set<String> fullTerritories = sc.getAvailableCodes("territory"); 944 Set<String> fullLanguages = sc.getAvailableCodes("language"); 945 946 Set<String> allLanguages = new TreeSet<>(language_scripts.keySet()); 947 allLanguages.addAll(language_territories.keySet()); 948 for (Iterator<String> it = allLanguages.iterator(); it.hasNext();) { 949 Object language = it.next(); 950 Set<String> scripts = language_scripts.get(language); 951 Set<String> territories = language_territories.get(language); 952 logln(EnglishName.transform(language) 953 + " scripts: " + EnglishName.transform(scripts) 954 + " territories: " + EnglishName.transform(territories)); 955 } 956 957 Map<String, String> changedLanguage = new TreeMap<>(); 958 for (Iterator<String> it = fullLanguages.iterator(); it.hasNext();) { 959 String code = it.next(); 960 List<String> data = sc.getFullData("language", code); 961 if (data.size() < 3) { 962 System.out.println("data problem: " + data); 963 continue; 964 } 965 String replacement = data.get(2); 966 if (!replacement.equals("")) { 967 if (!replacement.equals("--")) changedLanguage.put(code, replacement); 968 continue; 969 } 970 } 971 972 // remove private use, deprecated, groups 973 Set<String> standardTerritories = new TreeSet<>(); 974 Map<String, String> changedTerritory = new TreeMap<>(); 975 for (Iterator<String> it = fullTerritories.iterator(); it.hasNext();) { 976 String code = it.next(); 977 if (code.equals("200")) continue; // || code.equals("YU") || code.equals("PZ") 978 List<String> data = sc.getFullData("territory", code); 979 if (data.get(0).equals("PRIVATE USE")) continue; 980 if (!data.get(2).equals("")) { 981 if (!data.get(2).equals("--")) changedTerritory.put(code, data.get(2)); 982 continue; 983 } 984 standardTerritories.add(code); 985 } 986 standardTerritories.removeAll(groups.keySet()); 987 988 if (!standardTerritories.containsAll(sTerritories)) { 989 TreeSet<String> extras = new TreeSet<>(sTerritories); 990 extras.removeAll(standardTerritories); 991 errln("Supplemental Language Territories contain illegal values: " + EnglishName.transform(extras)); 992 } 993 if (!sTerritories.containsAll(standardTerritories)) { 994 TreeSet<String> extras = new TreeSet<>(standardTerritories); 995 extras.removeAll(sTerritories); 996 warnln("Missing Language Territories: " + EnglishName.transform(extras)); 997 } 998 999 // now test currencies 1000 logln("Check that no illegal territories are used"); 1001 if (!standardTerritories.containsAll(territory_currencies.keySet())) { 1002 TreeSet<String> extras = new TreeSet<>(territory_currencies.keySet()); 1003 extras.removeAll(fullTerritories); 1004 if (extras.size() != 0) errln("Currency info -- Illegal Territories: " + EnglishName.transform(extras)); 1005 extras = new TreeSet<>(territory_currencies.keySet()); 1006 extras.retainAll(fullTerritories); 1007 extras.removeAll(standardTerritories); 1008 if (extras.size() != 0) warnln("Currency info -- Archaic Territories: " + EnglishName.transform(extras)); 1009 } 1010 logln("Check that no territories are missing"); 1011 if (!territory_currencies.keySet().containsAll(standardTerritories)) { 1012 TreeSet<String> extras = new TreeSet<>(standardTerritories); 1013 extras.removeAll(territory_currencies.keySet()); 1014 errln("Currency info -- Missing Territories: " + EnglishName.transform(extras)); 1015 } 1016 Set<String> currencies = new TreeSet<>(); 1017 for (Iterator<Set<String>> it = territory_currencies.values().iterator(); it.hasNext();) { 1018 currencies.addAll(it.next()); 1019 } 1020 logln("Check that no illegal currencies are used"); 1021 Set<String> legalCurrencies = new TreeSet<>(sc.getAvailableCodes("currency")); 1022 // first remove non-ISO 1023 for (Iterator<String> it = legalCurrencies.iterator(); it.hasNext();) { 1024 String code = it.next(); 1025 List<String> data = sc.getFullData("currency", code); 1026 if ("X".equals(data.get(3))) it.remove(); 1027 } 1028 if (!legalCurrencies.containsAll(currencies)) { 1029 TreeSet<String> extras = new TreeSet<>(currencies); 1030 extras.removeAll(legalCurrencies); 1031 errln("Currency info -- Illegal Currencies: " + EnglishCurrencyName.transform(extras)); 1032 } 1033 logln("Check that there are no missing currencies"); 1034 if (!currencies.containsAll(legalCurrencies)) { 1035 TreeSet<String> extras = new TreeSet<>(legalCurrencies); 1036 extras.removeAll(currencies); 1037 Map<String, Set<String>> failures = new TreeMap<>(); 1038 for (Iterator<String> it = extras.iterator(); it.hasNext();) { 1039 String code = it.next(); 1040 List<String> data = sc.getFullData("currency", code); 1041 if (data.get(1).equals("ZZ")) continue; 1042 String type = data.get(3) + "/" + data.get(1); 1043 Set<String> s = failures.get(type); 1044 if (s == null) failures.put(type, s = new TreeSet<>()); 1045 s.add(code); 1046 } 1047 for (Iterator<String> it = failures.keySet().iterator(); it.hasNext();) { 1048 String type = it.next(); 1049 Set<String> s = failures.get(type); 1050 warnln("Currency info -- Missing Currencies: " + type + "\t \u2192 " + EnglishCurrencyName.transform(s)); 1051 } 1052 } 1053 logln("Missing English currency names"); 1054 for (Iterator<String> it = legalCurrencies.iterator(); it.hasNext();) { 1055 String currency = it.next(); 1056 String name = english.getName("currency", currency); 1057 if (name == null) { 1058 String standardName = sc.getFullData("currency", currency).get(0); 1059 logln("\t\t\t<currency type=\"" + currency + "\">"); 1060 logln("\t\t\t\t<displayName>" + standardName + "</displayName>"); 1061 logln("\t\t\t</currency>"); 1062 } 1063 } 1064 logln("Check Aliases"); 1065 for (Iterator<String> it = aliases.keySet().iterator(); it.hasNext();) { 1066 // the first part of the mapping had better not be in the standardTerritories 1067 String key = it.next(); 1068 Map<String, String> submap = aliases.get(key); 1069 if (key.equals("territoryAlias")) { 1070 checkEqual(key, submap, changedTerritory); 1071 } else if (key.equals("languageAlias")) { 1072 for (Iterator<String> it2 = submap.keySet().iterator(); it2.hasNext();) { 1073 String k = it2.next(); 1074 String value = submap.get(k); 1075 if (value.indexOf("_") >= 0) it2.remove(); 1076 } 1077 checkEqual(key, submap, changedLanguage); 1078 } 1079 } 1080 } 1081 1082 /** 1083 * 1084 */ checkEqual(String title, Map map1, Map map2)1085 private void checkEqual(String title, Map map1, Map map2) { 1086 Set foo = new TreeSet(map1.keySet()); 1087 foo.removeAll(map2.keySet()); 1088 if (!foo.isEmpty()) errln("Extraneous Aliases: " + title + "\t" + foo); 1089 foo = new TreeSet(map2.keySet()); 1090 foo.removeAll(map1.keySet()); 1091 if (!foo.isEmpty()) errln("Missing Aliases: " + title + "\t" + foo); 1092 foo = map2.keySet(); 1093 foo.retainAll(map1.keySet()); 1094 for (Iterator it = foo.iterator(); it.hasNext();) { 1095 Object key = it.next(); 1096 Object result1 = map1.get(key); 1097 Object result2 = map2.get(key); 1098 if (!result1.equals(result2)) 1099 errln("Missing Aliases: " + title + "\t" + key + "\t" + result1 + " != " + result2); 1100 } 1101 } 1102 1103 /** 1104 * Test that the zone ids are well-formed. 1105 * 1106 */ TestZones()1107 public void TestZones() { 1108 StandardCodes sc = StandardCodes.make(); 1109 1110 Map<String, String> defaultNames = new TreeMap(); 1111 Map<String, String> old_new = sc.getZoneLinkold_new(); 1112 Set<String> core = sc.getZoneData().keySet(); 1113 logln("Checking for collisions with last field"); 1114 for (Iterator<String> it = core.iterator(); it.hasNext();) { 1115 String currentItem = it.next(); 1116 String defaultName = TimezoneFormatter.getFallbackName(currentItem); 1117 String fullName = defaultNames.get(defaultName); 1118 if (fullName == null) 1119 defaultNames.put(defaultName, currentItem); 1120 else { 1121 errln("Collision between: " + currentItem + " AND " + fullName); 1122 } 1123 } 1124 1125 logln("Checking that all links are TO canonical zones"); 1126 Set<String> s = new TreeSet<>(old_new.values()); 1127 s.removeAll(core); 1128 if (s.size() != 0) { 1129 errln("Links go TO zones that are not canonical! " + s); 1130 } 1131 1132 logln("Checking that no links are FROM canonical zones"); 1133 s = new TreeSet<>(core); 1134 s.retainAll(old_new.keySet()); 1135 if (s.size() != 0) { 1136 errln("Links go FROM zones that are canonical! " + s); 1137 } 1138 1139 logln("Checking that the zones with rule data are all canonical"); 1140 Set<String> zonesWithRules = sc.getZone_rules().keySet(); 1141 s.clear(); 1142 s.addAll(zonesWithRules); 1143 s.removeAll(core); 1144 if (s.size() != 0) logln("Zones with rules that are not canonical: " + s); 1145 1146 logln("Checking that the rule data are all canonical"); 1147 s.clear(); 1148 s.addAll(core); 1149 s.removeAll(zonesWithRules); 1150 s.removeAll(old_new.keySet()); 1151 if (s.size() != 0) logln("Canonical zones that don't have rules or links: " + s); 1152 1153 for (Iterator<String> it = old_new.keySet().iterator(); it.hasNext();) { 1154 String oldItem = it.next(); 1155 logln("old: " + oldItem + "\tnew: " + old_new.get(oldItem)); 1156 } 1157 Map<String, Set<String>> new_old = new TreeMap<>(); 1158 for (Iterator<String> it = core.iterator(); it.hasNext();) { 1159 new_old.put(it.next(), new TreeSet<String>()); 1160 } 1161 for (Iterator<String> it = old_new.keySet().iterator(); it.hasNext();) { 1162 String oldItem = it.next(); 1163 String newItem = old_new.get(oldItem); 1164 Set<String> oldItems = new_old.get(newItem); 1165 if (oldItems == null) { // try recursing 1166 logln("!!!!Skipping " + oldItem + " \u2192 " + newItem); 1167 continue; 1168 // new_old.put(oldOne, oldItems = new TreeSet()); 1169 } 1170 oldItems.add(oldItem); 1171 } 1172 for (Iterator<String> it = new_old.keySet().iterator(); it.hasNext();) { 1173 String newOne = it.next(); 1174 Set<String> oldItems = new_old.get(newOne); 1175 logln(newOne + "\t" + oldItems); 1176 } 1177 } 1178 TestNarrowForms()1179 public void TestNarrowForms() { 1180 if (disableUntilLater("TestMinimalLocalization")) return; 1181 1182 for (Iterator<String> it = locales.iterator(); it.hasNext();) { 1183 String locale = it.next(); 1184 logln("Testing: " + getLocaleAndName(locale)); 1185 BreakIterator bi = BreakIterator.getCharacterInstance(new ULocale(locale)); 1186 CLDRFile item = cldrFactory.make(locale, false); 1187 // Walk through all the xpaths, adding to currentValues 1188 // Whenever two values for the same xpath are different, we remove from currentValues, and add to okValues 1189 for (Iterator<String> it2 = item.iterator(); it2.hasNext();) { 1190 String xpath = it2.next(); 1191 if (xpath.indexOf("[@type=\"narrow\"]") >= 0) { 1192 String value = item.getStringValue(xpath); 1193 // logln("\tTesting: " + value + "\t path: " + xpath); 1194 int end = getXGraphemeClusterBoundary(bi, value, 0); 1195 if (end == value.length()) continue; 1196 errln(getLocaleAndName(locale) + "\tillegal narrow value " + value + "\t path: " + xpath); 1197 surveyInfo.add(locale + "\t" + xpath + "\t'" + value + "' is too wide for a \"narrow\" value."); 1198 } 1199 } 1200 } 1201 } 1202 1203 static final UnicodeSet XGRAPHEME = new UnicodeSet("[[:mark:][:grapheme_extend:]]"); 1204 static final UnicodeSet DIGIT = new UnicodeSet("[:decimal_number:]"); 1205 getXGraphemeClusterBoundary(BreakIterator bi, String value, int start)1206 private int getXGraphemeClusterBoundary(BreakIterator bi, String value, int start) { 1207 if (value.length() <= 1) return 1; 1208 1209 bi.setText(value); 1210 if (start != 0) bi.preceding(start + 1); // backup one 1211 int current = bi.next(); 1212 // link any digits 1213 if (DIGIT.contains(UTF16.charAt(value, current - 1))) { 1214 current = DIGIT.findIn(value, current, true); 1215 } 1216 // continue collecting any additional characters that are M or grapheme extend 1217 return XGRAPHEME.findIn(value, current, true); 1218 } 1219 } 1220