1 package org.unicode.cldr.unittest; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.base.Objects; 5 import com.google.common.collect.ImmutableMultimap; 6 import com.google.common.collect.ImmutableSet; 7 import com.google.common.collect.Multimap; 8 import com.google.common.collect.TreeMultimap; 9 import com.ibm.icu.impl.Relation; 10 import com.ibm.icu.impl.Row; 11 import com.ibm.icu.impl.Row.R2; 12 import com.ibm.icu.impl.Row.R3; 13 import com.ibm.icu.impl.Utility; 14 import com.ibm.icu.lang.UCharacter; 15 import com.ibm.icu.text.Collator; 16 import com.ibm.icu.text.DecimalFormat; 17 import com.ibm.icu.text.Normalizer; 18 import com.ibm.icu.text.NumberFormat; 19 import com.ibm.icu.text.UTF16; 20 import com.ibm.icu.text.UnicodeSet; 21 import com.ibm.icu.text.UnicodeSetIterator; 22 import com.ibm.icu.util.Currency; 23 import com.ibm.icu.util.ULocale; 24 import java.io.File; 25 import java.io.IOException; 26 import java.io.InputStream; 27 import java.io.PrintWriter; 28 import java.io.StringWriter; 29 import java.util.ArrayList; 30 import java.util.Arrays; 31 import java.util.Collection; 32 import java.util.Collections; 33 import java.util.Comparator; 34 import java.util.EnumSet; 35 import java.util.HashSet; 36 import java.util.Iterator; 37 import java.util.LinkedHashSet; 38 import java.util.List; 39 import java.util.Map; 40 import java.util.Map.Entry; 41 import java.util.Set; 42 import java.util.TreeMap; 43 import java.util.TreeSet; 44 import org.unicode.cldr.test.DisplayAndInputProcessor; 45 import org.unicode.cldr.tool.CldrVersion; 46 import org.unicode.cldr.tool.LikelySubtags; 47 import org.unicode.cldr.util.Builder; 48 import org.unicode.cldr.util.CLDRConfig; 49 import org.unicode.cldr.util.CLDRFile; 50 import org.unicode.cldr.util.CLDRFile.DraftStatus; 51 import org.unicode.cldr.util.CLDRFile.Status; 52 import org.unicode.cldr.util.CLDRFile.WinningChoice; 53 import org.unicode.cldr.util.CLDRPaths; 54 import org.unicode.cldr.util.ChainedMap; 55 import org.unicode.cldr.util.ChainedMap.M4; 56 import org.unicode.cldr.util.CharacterFallbacks; 57 import org.unicode.cldr.util.CldrUtility; 58 import org.unicode.cldr.util.Counter; 59 import org.unicode.cldr.util.DiscreteComparator; 60 import org.unicode.cldr.util.DiscreteComparator.Ordering; 61 import org.unicode.cldr.util.DoctypeXmlStreamWrapper; 62 import org.unicode.cldr.util.DtdData; 63 import org.unicode.cldr.util.DtdData.Attribute; 64 import org.unicode.cldr.util.DtdData.Element; 65 import org.unicode.cldr.util.DtdData.ElementType; 66 import org.unicode.cldr.util.DtdType; 67 import org.unicode.cldr.util.DtdType.DtdStatus; 68 import org.unicode.cldr.util.ElementAttributeInfo; 69 import org.unicode.cldr.util.Factory; 70 import org.unicode.cldr.util.InputStreamFactory; 71 import org.unicode.cldr.util.LanguageTagParser; 72 import org.unicode.cldr.util.Level; 73 import org.unicode.cldr.util.LocaleIDParser; 74 import org.unicode.cldr.util.Pair; 75 import org.unicode.cldr.util.PathHeader; 76 import org.unicode.cldr.util.PathUtilities; 77 import org.unicode.cldr.util.StandardCodes; 78 import org.unicode.cldr.util.SupplementalDataInfo; 79 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 80 import org.unicode.cldr.util.SupplementalDataInfo.PluralType; 81 import org.unicode.cldr.util.TestCLDRPaths; 82 import org.unicode.cldr.util.XMLFileReader; 83 import org.unicode.cldr.util.XPathParts; 84 import org.xml.sax.ErrorHandler; 85 import org.xml.sax.InputSource; 86 import org.xml.sax.SAXException; 87 import org.xml.sax.SAXParseException; 88 import org.xml.sax.XMLReader; 89 90 public class TestBasic extends TestFmwkPlus { 91 92 private static final boolean DEBUG = false; 93 94 static CLDRConfig testInfo = CLDRConfig.getInstance(); 95 96 private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO = 97 testInfo.getSupplementalDataInfo(); 98 99 private static final ImmutableSet<Pair<String, String>> knownElementExceptions = 100 ImmutableSet.of(Pair.of("ldml", "usesMetazone"), Pair.of("ldmlICU", "usesMetazone")); 101 102 private static final ImmutableSet<Pair<String, String>> knownAttributeExceptions = 103 ImmutableSet.of( 104 Pair.of("ldml", "version"), 105 Pair.of("supplementalData", "version"), 106 Pair.of("ldmlICU", "version"), 107 Pair.of("layout", "standard"), 108 Pair.of("currency", "id"), // for v1.1.1 109 Pair.of("monthNames", "type"), // for v1.1.1 110 Pair.of("alias", "type") // for v1.1.1 111 ); 112 113 private static final ImmutableSet<Pair<String, String>> knownChildExceptions = 114 ImmutableSet.of( 115 Pair.of("abbreviationFallback", "special"), 116 Pair.of("inList", "special"), 117 Pair.of("preferenceOrdering", "special")); 118 119 /** 120 * Simple test that loads each file in the cldr directory, thus verifying that the DTD works, 121 * and also checks that the PrettyPaths work. 122 * 123 * @author markdavis 124 */ main(String[] args)125 public static void main(String[] args) { 126 new TestBasic().run(args); 127 } 128 129 private static final ImmutableSet<String> skipAttributes = 130 ImmutableSet.of("alt", "draft", "references"); 131 132 private final ImmutableSet<String> eightPointLocales = 133 ImmutableSet.of( 134 "ar", "ca", "cs", "da", "de", "el", "es", "fi", "fr", "he", "hi", "hr", "hu", 135 "id", "it", "ja", "ko", "lt", "lv", "nl", "no", "pl", "pt", "pt_PT", "ro", "ru", 136 "sk", "sl", "sr", "sv", "th", "tr", "uk", "vi", "zh", "zh_Hant"); 137 138 // private final boolean showForceZoom = Utility.getProperty("forcezoom", 139 // false); 140 141 private final boolean resolved = CldrUtility.getProperty("resolved", false); 142 143 private final Exception[] internalException = new Exception[1]; 144 TestDtds()145 public void TestDtds() throws IOException { 146 Relation<Row.R2<DtdType, String>, String> foundAttributes = 147 Relation.of(new TreeMap<Row.R2<DtdType, String>, Set<String>>(), TreeSet.class); 148 final CLDRConfig config = CLDRConfig.getInstance(); 149 final File basedir = config.getCldrBaseDirectory(); 150 List<TimingInfo> data = new ArrayList<>(); 151 152 for (String subdir : CLDRConfig.getCLDRDataDirectories()) { 153 checkDtds(new File(basedir, subdir), 0, foundAttributes, data); 154 } 155 if (foundAttributes.size() > 0) { 156 showFoundElements(foundAttributes); 157 } 158 if (isVerbose()) { 159 long totalBytes = 0; 160 long totalNanos = 0; 161 for (TimingInfo i : data) { 162 long length = i.file.length(); 163 totalBytes += length; 164 totalNanos += i.nanos; 165 logln(i.nanos + "\t" + length + "\t" + i.file); 166 } 167 logln(totalNanos + "\t" + totalBytes); 168 } 169 } 170 checkDtds( File directoryFile, int level, Relation<R2<DtdType, String>, String> foundAttributes, List<TimingInfo> data)171 private void checkDtds( 172 File directoryFile, 173 int level, 174 Relation<R2<DtdType, String>, String> foundAttributes, 175 List<TimingInfo> data) 176 throws IOException { 177 boolean deepCheck = getInclusion() >= 10; 178 if (directoryFile.getName().equals("import") 179 && directoryFile.getParentFile().getName().equals("keyboards")) { 180 return; // skip imports 181 } 182 File[] listFiles = directoryFile.listFiles(); 183 String normalizedPath = PathUtilities.getNormalizedPathString(directoryFile); 184 String indent = Utility.repeat("\t", level); 185 if (listFiles == null) { 186 throw new IllegalArgumentException(indent + "Empty directory: " + normalizedPath); 187 } 188 logln("Checking files for DTD errors in: " + indent + normalizedPath); 189 for (File fileName : listFiles) { 190 String name = fileName.getName(); 191 if (CLDRConfig.isJunkFile(name)) { 192 continue; 193 } else if (fileName.isDirectory()) { 194 checkDtds(fileName, level + 1, foundAttributes, data); 195 } else if (fileName.getPath().contains("/keyboards/3.0/") 196 && logKnownIssue( 197 "CLDR-17574", "With v46, parsing issues for keyboard xml files")) { 198 ; // do nothing, skip test 199 } else if (name.endsWith(".xml")) { 200 data.add(check(fileName)); 201 if (deepCheck // takes too long to do all the time 202 ) { 203 CLDRFile cldrfile = 204 CLDRFile.loadFromFile(fileName, "temp", DraftStatus.unconfirmed); 205 for (String xpath : cldrfile) { 206 String fullPath = cldrfile.getFullXPath(xpath); 207 if (fullPath == null) { 208 fullPath = cldrfile.getFullXPath(xpath); 209 assertNotNull("", fullPath); 210 continue; 211 } 212 XPathParts parts = XPathParts.getFrozenInstance(fullPath); 213 DtdType type = parts.getDtdData().dtdType; 214 for (int i = 0; i < parts.size(); ++i) { 215 String element = parts.getElement(i); 216 R2<DtdType, String> typeElement = Row.of(type, element); 217 if (parts.getAttributeCount(i) == 0) { 218 foundAttributes.put(typeElement, "NONE"); 219 } else { 220 for (String attribute : parts.getAttributeKeys(i)) { 221 foundAttributes.put(typeElement, attribute); 222 } 223 } 224 } 225 } 226 } 227 } 228 } 229 } 230 showFoundElements(Relation<Row.R2<DtdType, String>, String> foundAttributes)231 public void showFoundElements(Relation<Row.R2<DtdType, String>, String> foundAttributes) { 232 Relation<Row.R2<DtdType, String>, String> theoryAttributes = 233 Relation.of(new TreeMap<Row.R2<DtdType, String>, Set<String>>(), TreeSet.class); 234 for (DtdType type : DtdType.values()) { 235 if (type.getStatus() != DtdType.DtdStatus.active) { 236 continue; 237 } 238 DtdData dtdData = DtdData.getInstance(type); 239 for (Element element : dtdData.getElementFromName().values()) { 240 String name = element.getName(); 241 Set<Attribute> attributes = element.getAttributes().keySet(); 242 R2<DtdType, String> typeElement = Row.of(type, name); 243 if (attributes.isEmpty()) { 244 theoryAttributes.put(typeElement, "NONE"); 245 } else { 246 for (Attribute attribute : attributes) { 247 theoryAttributes.put(typeElement, attribute.name); 248 } 249 } 250 } 251 } 252 Relation<String, R3<Boolean, DtdType, String>> attributesToTypeElementUsed = 253 Relation.of( 254 new TreeMap<String, Set<R3<Boolean, DtdType, String>>>(), 255 LinkedHashSet.class); 256 257 for (Entry<R2<DtdType, String>, Set<String>> s : theoryAttributes.keyValuesSet()) { 258 R2<DtdType, String> typeElement = s.getKey(); 259 Set<String> theoryAttributeSet = s.getValue(); 260 DtdType type = typeElement.get0(); 261 String element = typeElement.get1(); 262 if (element.equals("ANY") || element.equals("#PCDATA")) { 263 continue; 264 } 265 boolean deprecatedElement = 266 SUPPLEMENTAL_DATA_INFO.isDeprecated(type, element, "*", "*"); 267 String header = type + "\t" + element + "\t" + (deprecatedElement ? "X" : "") + "\t"; 268 Set<String> usedAttributes = foundAttributes.get(typeElement); 269 Set<String> unusedAttributes = new LinkedHashSet<>(theoryAttributeSet); 270 if (usedAttributes == null) { 271 logln( 272 header 273 + "<NOT-FOUND>\t\t" 274 + siftDeprecated( 275 type, 276 element, 277 unusedAttributes, 278 attributesToTypeElementUsed, 279 false)); 280 continue; 281 } 282 unusedAttributes.removeAll(usedAttributes); 283 logln( 284 header 285 + siftDeprecated( 286 type, 287 element, 288 usedAttributes, 289 attributesToTypeElementUsed, 290 true) 291 + "\t" 292 + siftDeprecated( 293 type, 294 element, 295 unusedAttributes, 296 attributesToTypeElementUsed, 297 false)); 298 } 299 300 logln("Undeprecated Attributes\t"); 301 for (Entry<String, R3<Boolean, DtdType, String>> s : 302 attributesToTypeElementUsed.keyValueSet()) { 303 R3<Boolean, DtdType, String> typeElementUsed = s.getValue(); 304 logln( 305 s.getKey() 306 + "\t" 307 + typeElementUsed.get0() 308 + "\t" 309 + typeElementUsed.get1() 310 + "\t" 311 + typeElementUsed.get2()); 312 } 313 } 314 siftDeprecated( DtdType type, String element, Set<String> attributeSet, Relation<String, R3<Boolean, DtdType, String>> attributesToTypeElementUsed, boolean used)315 private String siftDeprecated( 316 DtdType type, 317 String element, 318 Set<String> attributeSet, 319 Relation<String, R3<Boolean, DtdType, String>> attributesToTypeElementUsed, 320 boolean used) { 321 StringBuilder b = new StringBuilder(); 322 StringBuilder bdep = new StringBuilder(); 323 for (String attribute : attributeSet) { 324 String attributeName = 325 "«" 326 + attribute 327 + (!"NONE".equals(attribute) 328 && CLDRFile.isDistinguishing(type, element, attribute) 329 ? "*" 330 : "") 331 + "»"; 332 if (!"NONE".equals(attribute) 333 && SUPPLEMENTAL_DATA_INFO.isDeprecated(type, element, attribute, "*")) { 334 if (bdep.length() != 0) { 335 bdep.append(" "); 336 } 337 bdep.append(attributeName); 338 } else { 339 if (b.length() != 0) { 340 b.append(" "); 341 } 342 b.append(attributeName); 343 if (!"NONE".equals(attribute)) { 344 attributesToTypeElementUsed.put(attribute, Row.of(used, type, element)); 345 } 346 } 347 } 348 return b.toString() + "\t" + bdep.toString(); 349 } 350 351 class MyErrorHandler implements ErrorHandler { 352 @Override error(SAXParseException exception)353 public void error(SAXParseException exception) throws SAXException { 354 errln("error: " + XMLFileReader.showSAX(exception)); 355 throw exception; 356 } 357 358 @Override fatalError(SAXParseException exception)359 public void fatalError(SAXParseException exception) throws SAXException { 360 errln("fatalError: " + XMLFileReader.showSAX(exception)); 361 throw exception; 362 } 363 364 @Override warning(SAXParseException exception)365 public void warning(SAXParseException exception) throws SAXException { 366 errln("warning: " + XMLFileReader.showSAX(exception)); 367 throw exception; 368 } 369 } 370 371 private class TimingInfo { 372 File file; 373 long nanos; 374 } 375 check(File systemID)376 public TimingInfo check(File systemID) { 377 long start = System.nanoTime(); 378 try (InputStream fis = InputStreamFactory.createInputStream(systemID)) { 379 // FileInputStream fis = new FileInputStream(systemID); 380 XMLReader xmlReader = XMLFileReader.createXMLReader(true); 381 xmlReader.setErrorHandler(new MyErrorHandler()); 382 InputSource is = new InputSource(fis); 383 is.setSystemId(systemID.toString()); 384 DoctypeXmlStreamWrapper.wrap(is); 385 xmlReader.parse(is); 386 // fis.close(); 387 } catch (SAXException | IOException e) { 388 errln("\t" + "Can't read " + systemID + "\t" + e.getClass() + "\t" + e.getMessage()); 389 } 390 // catch (SAXParseException e) { 391 // errln("\t" + "Can't read " + systemID + "\t" + e.getClass() + "\t" + 392 // e.getMessage()); 393 // } catch (IOException e) { 394 // errln("\t" + "Can't read " + systemID + "\t" + e.getClass() + "\t" + 395 // e.getMessage()); 396 // } 397 TimingInfo timingInfo = new TimingInfo(); 398 timingInfo.nanos = System.nanoTime() - start; 399 timingInfo.file = systemID; 400 return timingInfo; 401 } 402 TestCurrencyFallback()403 public void TestCurrencyFallback() { 404 Factory cldrFactory = testInfo.getCldrFactory(); 405 Set<String> currencies = StandardCodes.make().getAvailableCodes("currency"); 406 407 final UnicodeSet CHARACTERS_THAT_SHOULD_HAVE_FALLBACKS = 408 new UnicodeSet("[[:sc:]-[\\u0000-\\u00FF]]").freeze(); 409 410 CharacterFallbacks fallbacks = CharacterFallbacks.make(); 411 412 for (String locale : cldrFactory.getAvailable()) { 413 if (!StandardCodes.isLocaleAtLeastBasic(locale)) { 414 continue; 415 } 416 CLDRFile file = testInfo.getCLDRFile(locale, false); 417 if (file.isNonInheriting()) continue; 418 419 final UnicodeSet OK_CURRENCY_FALLBACK = 420 new UnicodeSet("[\\u0000-\\u00FF]") 421 .addAll(safeExemplars(file, "")) 422 .addAll(safeExemplars(file, "auxiliary")) 423 .freeze(); 424 UnicodeSet badSoFar = new UnicodeSet(); 425 426 for (Iterator<String> it = file.iterator(); it.hasNext(); ) { 427 String path = it.next(); 428 if (path.endsWith("/alias")) { 429 continue; 430 } 431 String value = file.getStringValue(path); 432 433 // check for special characters 434 if (CHARACTERS_THAT_SHOULD_HAVE_FALLBACKS.containsSome(value)) { 435 XPathParts parts = XPathParts.getFrozenInstance(path); 436 if (!parts.getElement(-1).equals("symbol")) { 437 continue; 438 } 439 // We don't care about fallbacks for narrow currency symbols 440 if ("narrow".equals(parts.getAttributeValue(-1, "alt"))) { 441 continue; 442 } 443 String currencyType = parts.getAttributeValue(-2, "type"); 444 445 UnicodeSet fishy = 446 new UnicodeSet() 447 .addAll(value) 448 .retainAll(CHARACTERS_THAT_SHOULD_HAVE_FALLBACKS) 449 .removeAll(badSoFar); 450 for (UnicodeSetIterator it2 = new UnicodeSetIterator(fishy); it2.next(); ) { 451 final int fishyCodepoint = it2.codepoint; 452 List<String> fallbackList = fallbacks.getSubstitutes(fishyCodepoint); 453 454 String nfkc = Normalizer.normalize(fishyCodepoint, Normalizer.NFKC); 455 if (!nfkc.equals(UTF16.valueOf(fishyCodepoint))) { 456 if (fallbackList == null) { 457 fallbackList = new ArrayList<>(); 458 } else { 459 fallbackList = new ArrayList<>(fallbackList); // writable 460 } 461 fallbackList.add(nfkc); 462 } 463 // later test for all Latin-1 464 if (fallbackList == null) { 465 if (locale.equals("nqo") 466 && logKnownIssue("CLDR-16987", "fishy fallback test")) { 467 continue; 468 } 469 errln( 470 "Locale:\t" 471 + locale 472 + ";\tCharacter with no fallback:\t" 473 + it2.getString() 474 + "\t" 475 + UCharacter.getName(fishyCodepoint)); 476 badSoFar.add(fishyCodepoint); 477 } else { 478 String fallback = null; 479 for (String fb : fallbackList) { 480 if (OK_CURRENCY_FALLBACK.containsAll(fb)) { 481 if (!fb.equals(currencyType) && currencies.contains(fb)) { 482 errln( 483 "Locale:\t" 484 + locale 485 + ";\tCurrency:\t" 486 + currencyType 487 + ";\tFallback converts to different code!:\t" 488 + fb 489 + "\t" 490 + it2.getString() 491 + "\t" 492 + UCharacter.getName(fishyCodepoint)); 493 } 494 if (fallback == null) { 495 fallback = fb; 496 } 497 } 498 } 499 if (fallback == null) { 500 errln( 501 "Locale:\t" 502 + locale 503 + ";\tCharacter with no good fallback (exemplars+Latin1):\t" 504 + it2.getString() 505 + "\t" 506 + UCharacter.getName(fishyCodepoint)); 507 badSoFar.add(fishyCodepoint); 508 } else { 509 logln( 510 "Locale:\t" 511 + locale 512 + ";\tCharacter with good fallback:\t" 513 + it2.getString() 514 + " " 515 + UCharacter.getName(fishyCodepoint) 516 + " => " 517 + fallback); 518 // badSoFar.add(fishyCodepoint); 519 } 520 } 521 } 522 } 523 } 524 } 525 } 526 TestAbstractPaths()527 public void TestAbstractPaths() { 528 Factory cldrFactory = testInfo.getCldrFactory(); 529 CLDRFile english = testInfo.getEnglish(); 530 Map<String, Counter<Level>> abstactPaths = new TreeMap<>(); 531 RegexTransform abstractPathTransform = 532 new RegexTransform(RegexTransform.Processing.ONE_PASS) 533 .add("//ldml/", "") 534 .add("\\[@alt=\"[^\"]*\"\\]", "") 535 .add("=\"[^\"]*\"", "=\"*\"") 536 .add("([^]])\\[", "$1\t[") 537 .add("([^]])/", "$1\t/") 538 .add("/", "\t"); 539 540 for (String locale : getInclusion() <= 5 ? eightPointLocales : cldrFactory.getAvailable()) { 541 CLDRFile file = testInfo.getCLDRFile(locale, resolved); 542 if (file.isNonInheriting()) continue; 543 logln(locale + "\t-\t" + english.getName(locale)); 544 545 for (Iterator<String> it = file.iterator(); it.hasNext(); ) { 546 String path = it.next(); 547 if (path.endsWith("/alias")) { 548 continue; 549 } 550 // collect abstracted paths 551 String abstractPath = abstractPathTransform.transform(path); 552 Level level = SUPPLEMENTAL_DATA_INFO.getCoverageLevel(path, locale); 553 if (level == Level.OPTIONAL) { 554 level = Level.COMPREHENSIVE; 555 } 556 Counter<Level> row = abstactPaths.get(abstractPath); 557 if (row == null) { 558 abstactPaths.put(abstractPath, row = new Counter<>()); 559 } 560 row.add(level, 1); 561 } 562 } 563 logln(CldrUtility.LINE_SEPARATOR + "Abstract Paths"); 564 for (Entry<String, Counter<Level>> pathInfo : abstactPaths.entrySet()) { 565 String path = pathInfo.getKey(); 566 Counter<Level> counter = pathInfo.getValue(); 567 logln(counter.getTotal() + "\t" + getCoverage(counter) + "\t" + path); 568 } 569 } 570 getCoverage(Counter<Level> counter)571 private CharSequence getCoverage(Counter<Level> counter) { 572 StringBuilder result = new StringBuilder(); 573 boolean first = true; 574 for (Level level : counter.getKeysetSortedByKey()) { 575 if (first) { 576 first = false; 577 } else { 578 result.append(' '); 579 } 580 result.append("L").append(level.ordinal()).append("=").append(counter.get(level)); 581 } 582 return result; 583 } 584 585 // public void TestCLDRFileCache() { 586 // long start = System.nanoTime(); 587 // Factory cldrFactory = testInfo.getCldrFactory(); 588 // String unusualLocale = "hi"; 589 // CLDRFile file = cldrFactory.make(unusualLocale, true); 590 // long afterOne = System.nanoTime(); 591 // logln("First: " + (afterOne-start)); 592 // CLDRFile file2 = cldrFactory.make(unusualLocale, true); 593 // long afterTwo = System.nanoTime(); 594 // logln("Second: " + (afterTwo-afterOne)); 595 // } 596 // TestPaths()597 public void TestPaths() { 598 Relation<String, String> distinguishing = 599 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 600 Relation<String, String> nonDistinguishing = 601 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 602 Factory cldrFactory = testInfo.getCldrFactory(); 603 CLDRFile english = testInfo.getEnglish(); 604 605 Relation<String, String> pathToLocale = 606 Relation.of( 607 new TreeMap<String, Set<String>>(CLDRFile.getComparator(DtdType.ldml)), 608 TreeSet.class, 609 null); 610 Set<String> localesToTest = 611 getInclusion() <= 5 ? eightPointLocales : cldrFactory.getAvailable(); 612 for (String locale : localesToTest) { 613 CLDRFile file = testInfo.getCLDRFile(locale, resolved); 614 DtdType dtdType = null; 615 if (file.isNonInheriting()) continue; 616 DisplayAndInputProcessor displayAndInputProcessor = 617 new DisplayAndInputProcessor(file, false); 618 619 logln(locale + "\t-\t" + english.getName(locale)); 620 621 for (Iterator<String> it = file.iterator(); it.hasNext(); ) { 622 String path = it.next(); 623 if (dtdType == null) { 624 dtdType = DtdType.fromPath(path); 625 } 626 627 if (path.endsWith("/alias")) { 628 continue; 629 } 630 String value = file.getStringValue(path); 631 if (value == null) { 632 throw new IllegalArgumentException( 633 locale + "\tError: in null value at " + path); 634 } 635 636 String displayValue = displayAndInputProcessor.processForDisplay(path, value); 637 if (!displayValue.equals(value)) { 638 logln( 639 "\t" 640 + locale 641 + "\tdisplayAndInputProcessor changes display value <" 642 + value 643 + ">\t=>\t<" 644 + displayValue 645 + ">\t\t" 646 + path); 647 } 648 String inputValue = 649 displayAndInputProcessor.processInput( 650 path, displayValue, internalException); 651 if (internalException[0] != null) { 652 errln( 653 "\t" 654 + locale 655 + "\tdisplayAndInputProcessor internal error <" 656 + value 657 + ">\t=>\t<" 658 + inputValue 659 + ">\t\t" 660 + path); 661 internalException[0].printStackTrace(System.out); 662 } 663 if (isVerbose() && !inputValue.equals(value)) { 664 displayAndInputProcessor.processInput(path, value, internalException); // for 665 // debugging 666 logln( 667 "\t" 668 + locale 669 + "\tdisplayAndInputProcessor changes input value <" 670 + value 671 + ">\t=>\t<" 672 + inputValue 673 + ">\t\t" 674 + path); 675 } 676 677 pathToLocale.put(path, locale); 678 679 // also check for non-distinguishing attributes 680 if (path.contains("/identity")) continue; 681 682 String fullPath = file.getFullXPath(path); 683 XPathParts parts = XPathParts.getFrozenInstance(fullPath); 684 for (int i = 0; i < parts.size(); ++i) { 685 if (parts.getAttributeCount(i) == 0) { 686 continue; 687 } 688 String element = parts.getElement(i); 689 for (String attribute : parts.getAttributeKeys(i)) { 690 if (skipAttributes.contains(attribute)) continue; 691 if (CLDRFile.isDistinguishing(dtdType, element, attribute)) { 692 distinguishing.put(element, attribute); 693 } else { 694 nonDistinguishing.put(element, attribute); 695 } 696 } 697 } 698 } 699 } 700 701 if (isVerbose()) { 702 System.out.format( 703 "Distinguishing Elements: %s" + CldrUtility.LINE_SEPARATOR, distinguishing); 704 System.out.format( 705 "Nondistinguishing Elements: %s" + CldrUtility.LINE_SEPARATOR, 706 nonDistinguishing); 707 System.out.format("Skipped %s" + CldrUtility.LINE_SEPARATOR, skipAttributes); 708 } 709 } 710 711 /** The verbose output shows the results of 1..3 \u00a4 signs. */ checkCurrency()712 public void checkCurrency() { 713 Map<String, Set<R2<String, Integer>>> results = 714 new TreeMap<>(Collator.getInstance(ULocale.ENGLISH)); 715 for (ULocale locale : ULocale.getAvailableLocales()) { 716 if (locale.getCountry().length() != 0) { 717 continue; 718 } 719 for (int i = 1; i < 4; ++i) { 720 NumberFormat format = getCurrencyInstance(locale, i); 721 for (Currency c : 722 new Currency[] { 723 Currency.getInstance("USD"), 724 Currency.getInstance("EUR"), 725 Currency.getInstance("INR") 726 }) { 727 format.setCurrency(c); 728 final String formatted = format.format(12345.67); 729 Set<R2<String, Integer>> set = results.get(formatted); 730 if (set == null) { 731 results.put(formatted, set = new TreeSet<>()); 732 } 733 set.add(Row.of(locale.toString(), i)); 734 } 735 } 736 } 737 for (String formatted : results.keySet()) { 738 logln(formatted + "\t" + results.get(formatted)); 739 } 740 } 741 getCurrencyInstance(ULocale locale, int type)742 private static NumberFormat getCurrencyInstance(ULocale locale, int type) { 743 NumberFormat format = NumberFormat.getCurrencyInstance(locale); 744 if (type > 1) { 745 DecimalFormat format2 = (DecimalFormat) format; 746 String pattern = format2.toPattern(); 747 String replacement = "\u00a4\u00a4"; 748 for (int i = 2; i < type; ++i) { 749 replacement += "\u00a4"; 750 } 751 pattern = pattern.replace("\u00a4", replacement); 752 format2.applyPattern(pattern); 753 } 754 return format; 755 } 756 safeExemplars(CLDRFile file, String string)757 private UnicodeSet safeExemplars(CLDRFile file, String string) { 758 final UnicodeSet result = file.getExemplarSet(string, WinningChoice.NORMAL); 759 return result != null ? result : new UnicodeSet(); 760 } 761 TestAPath()762 public void TestAPath() { 763 // <month type="1">1</month> 764 String path = 765 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"abbreviated\"]/month[@type=\"1\"]"; 766 CLDRFile root = testInfo.getRoot(); 767 logln("path: " + path); 768 String fullpath = root.getFullXPath(path); 769 logln("fullpath: " + fullpath); 770 String value = root.getStringValue(path); 771 logln("value: " + value); 772 Status status = new Status(); 773 String source = root.getSourceLocaleID(path, status); 774 logln("locale: " + source); 775 logln("status: " + status); 776 } 777 TestDefaultContents()778 public void TestDefaultContents() { 779 Set<String> defaultContents = Inheritance.defaultContents; 780 Multimap<String, String> parentToChildren = Inheritance.parentToChildren; 781 782 // Put a list of locales that should be default content here. 783 final String expectDC[] = { 784 "os_GE" // see CLDR-14118 785 }; 786 for (final String locale : expectDC) { 787 assertTrue( 788 "expect " + locale + " to be a default content locale", 789 defaultContents.contains(locale)); 790 } 791 792 if (DEBUG) { 793 Inheritance.showChain("", "", "root"); 794 } 795 796 for (String locale : defaultContents) { 797 CLDRFile cldrFile; 798 try { 799 cldrFile = testInfo.getCLDRFile(locale, false); 800 } catch (RuntimeException e) { 801 logln("Can't open default content file:\t" + locale); 802 continue; 803 } 804 // we check that the default content locale is always empty 805 for (Iterator<String> it = cldrFile.iterator(); it.hasNext(); ) { 806 String path = it.next(); 807 if (path.contains("/identity")) { 808 continue; 809 } 810 errln("Default content file not empty:\t" + locale); 811 showDifferences(locale); 812 break; 813 } 814 } 815 816 // check that if a locale has any children, that exactly one of them is 817 // the default content. Ignore locales with variants 818 819 for (Entry<String, Collection<String>> localeAndKids : 820 parentToChildren.asMap().entrySet()) { 821 String locale = localeAndKids.getKey(); 822 if (locale.equals("root")) { 823 continue; 824 } 825 826 Collection<String> rawChildren = localeAndKids.getValue(); 827 828 // remove variant children 829 Set<String> children = new LinkedHashSet<>(); 830 for (String child : rawChildren) { 831 if (new LocaleIDParser().set(child).getVariants().length == 0) { 832 children.add(child); 833 } 834 } 835 if (children.isEmpty()) { 836 continue; 837 } 838 839 Set<String> defaultContentChildren = new LinkedHashSet<>(children); 840 defaultContentChildren.retainAll(defaultContents); 841 if (defaultContentChildren.size() == 1) { 842 continue; 843 // If we're already down to the region level then it's OK not to have 844 // default contents. 845 } else if (!new LocaleIDParser().set(locale).getRegion().isEmpty()) { 846 continue; 847 } else if (defaultContentChildren.isEmpty()) { 848 Object possible = highestShared(locale, children); 849 errln( 850 "Locale has children but is missing default contents locale: " 851 + locale 852 + ", children: " 853 + children 854 + "; possible fixes for children:\n" 855 + possible); 856 } else { 857 errln( 858 "Locale has too many defaultContent locales!!: " 859 + locale 860 + ", defaultContents: " 861 + defaultContentChildren); 862 } 863 } 864 865 // check that each default content locale is likely-subtag equivalent to 866 // its parent. 867 868 for (String locale : defaultContents) { 869 String maxLocale = LikelySubtags.maximize(locale, likelyData); 870 String localeParent = LocaleIDParser.getParent(locale); 871 String maxLocaleParent = LikelySubtags.maximize(localeParent, likelyData); 872 if (locale.equals("ar_001") || locale.equals("nb")) { 873 logln( 874 "Known exception to likelyMax(locale=" 875 + locale 876 + ")" 877 + " == " 878 + "likelyMax(defaultContent=" 879 + localeParent 880 + ")"); 881 continue; 882 } 883 assertEquals( 884 "likelyMax(locale=" 885 + locale 886 + ")" 887 + " == " 888 + "likelyMax(defaultContent=" 889 + localeParent 890 + ")", 891 maxLocaleParent, 892 maxLocale); 893 } 894 } 895 highestShared(String parent, Set<String> children)896 private String highestShared(String parent, Set<String> children) { 897 M4<PathHeader, String, String, Boolean> data = 898 ChainedMap.of( 899 new TreeMap<PathHeader, Object>(), 900 new TreeMap<String, Object>(), 901 new TreeMap<String, Object>(), 902 Boolean.class); 903 CLDRFile parentFile = testInfo.getCLDRFile(parent, true); 904 PathHeader.Factory phf = PathHeader.getFactory(testInfo.getEnglish()); 905 for (String child : children) { 906 CLDRFile cldrFile = testInfo.getCLDRFile(child, false); 907 for (String path : cldrFile) { 908 if (path.contains("/identity")) { 909 continue; 910 } 911 if (path.contains("provisional") || path.contains("unconfirmed")) { 912 continue; 913 } 914 String value = cldrFile.getStringValue(path); 915 // double-check 916 String parentValue = parentFile.getStringValue(path); 917 if (value.equals(parentValue)) { 918 continue; 919 } 920 PathHeader ph = phf.fromPath(path); 921 data.put(ph, value, child, Boolean.TRUE); 922 data.put(ph, parentValue == null ? "∅∅∅" : parentValue, child, Boolean.TRUE); 923 } 924 } 925 StringBuilder result = new StringBuilder(); 926 for (Entry<PathHeader, Map<String, Map<String, Boolean>>> entry : data) { 927 for (Entry<String, Map<String, Boolean>> item : entry.getValue().entrySet()) { 928 result.append("\n") 929 .append(entry.getKey()) 930 .append("\t") 931 .append(item.getKey() + "\t" + item.getValue().keySet()); 932 } 933 } 934 return result.toString(); 935 } 936 937 public static class Inheritance { 938 public static final Set<String> defaultContents = 939 SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales(); 940 public static final Multimap<String, String> parentToChildren; 941 942 static { 943 Multimap<String, String> _parentToChildren = TreeMultimap.create(); 944 for (String child : testInfo.getCldrFactory().getAvailable()) { 945 if (child.equals("root")) { 946 continue; 947 } 948 String localeParent = LocaleIDParser.getParent(child); _parentToChildren.put(localeParent, child)949 _parentToChildren.put(localeParent, child); 950 } 951 parentToChildren = ImmutableMultimap.copyOf(_parentToChildren); 952 } 953 showChain(String prefix, String gparent, String current)954 public static void showChain(String prefix, String gparent, String current) { 955 Collection<String> children = parentToChildren.get(current); 956 if (children == null) { 957 throw new IllegalArgumentException(); 958 } 959 prefix += 960 current 961 + (defaultContents.contains(current) ? "*" : "") 962 + (isLikelyEquivalent(gparent, current) ? "~" : "") 963 + "\t"; 964 965 // find leaves 966 Set<String> parents = new LinkedHashSet<>(children); 967 parents.retainAll(parentToChildren.keySet()); 968 Set<String> leaves = new LinkedHashSet<>(children); 969 leaves.removeAll(parentToChildren.keySet()); 970 if (!leaves.isEmpty()) { 971 List<String> presentation = new ArrayList<>(); 972 boolean gotDc = false; 973 for (String s : leaves) { 974 String shown = s; 975 if (isLikelyEquivalent(current, s)) { 976 shown += "~"; 977 } 978 if (defaultContents.contains(s)) { 979 gotDc = true; 980 shown += "*"; 981 } 982 if (!shown.equals(s)) { 983 presentation.add(0, shown); 984 } else { 985 presentation.add(shown); 986 } 987 } 988 if (!gotDc) { 989 int debug = 0; 990 } 991 if (leaves.size() == 1) { 992 System.out.println(prefix + Joiner.on(" ").join(presentation)); 993 } else { 994 System.out.println(prefix + "{" + Joiner.on(" ").join(presentation) + "}"); 995 } 996 } 997 for (String parent : parents) { 998 showChain(prefix, current, parent); 999 } 1000 } 1001 isLikelyEquivalent(String locale1, String locale2)1002 static boolean isLikelyEquivalent(String locale1, String locale2) { 1003 if (locale1.equals(locale2)) { 1004 return true; 1005 } 1006 try { 1007 String maxLocale1 = LikelySubtags.maximize(locale1, likelyData); 1008 String maxLocale2 = LikelySubtags.maximize(locale2, likelyData); 1009 return maxLocale1 != null && Objects.equal(maxLocale1, maxLocale2); 1010 } catch (Exception e) { 1011 return false; 1012 } 1013 } 1014 } 1015 1016 static final Map<String, String> likelyData = SUPPLEMENTAL_DATA_INFO.getLikelySubtags(); 1017 1018 private static final EnumSet<CldrVersion> badLdmlICUVersions = 1019 EnumSet.of( 1020 CldrVersion.v1_1_1, CldrVersion.v1_2, CldrVersion.v1_4_1, CldrVersion.v1_5_1); 1021 TestLikelySubtagsComplete()1022 public void TestLikelySubtagsComplete() { 1023 LanguageTagParser ltp = new LanguageTagParser(); 1024 for (String locale : testInfo.getCldrFactory().getAvailable()) { 1025 if (locale.equals("root")) { 1026 continue; 1027 } 1028 String maxLocale = LikelySubtags.maximize(locale, likelyData); 1029 if (maxLocale == null) { 1030 errln("Locale missing likely subtag: " + locale); 1031 continue; 1032 } 1033 ltp.set(maxLocale); 1034 if (ltp.getLanguage().isEmpty() 1035 || ltp.getScript().isEmpty() 1036 || ltp.getRegion().isEmpty()) { 1037 errln("Locale has defective likely subtag: " + locale + " => " + maxLocale); 1038 } 1039 } 1040 } 1041 showDifferences(String locale)1042 private void showDifferences(String locale) { 1043 CLDRFile cldrFile = testInfo.getCLDRFile(locale, false); 1044 final String localeParent = LocaleIDParser.getParent(locale); 1045 CLDRFile parentFile = testInfo.getCLDRFile(localeParent, true); 1046 int funnyCount = 0; 1047 for (Iterator<String> it = cldrFile.iterator("", cldrFile.getComparator()); 1048 it.hasNext(); ) { 1049 String path = it.next(); 1050 if (path.contains("/identity")) { 1051 continue; 1052 } 1053 final String fullXPath = cldrFile.getFullXPath(path); 1054 if (fullXPath.contains("[@draft=\"unconfirmed\"]") 1055 || fullXPath.contains("[@draft=\"provisional\"]")) { 1056 funnyCount++; 1057 continue; 1058 } 1059 logln("\tpath:\t" + path); 1060 logln("\t\t" + locale + " value:\t<" + cldrFile.getStringValue(path) + ">"); 1061 final String parentFullPath = parentFile.getFullXPath(path); 1062 logln("\t\t" + localeParent + " value:\t<" + parentFile.getStringValue(path) + ">"); 1063 logln("\t\t" + locale + " fullpath:\t" + fullXPath); 1064 logln("\t\t" + localeParent + " fullpath:\t" + parentFullPath); 1065 } 1066 logln("\tCount of non-approved:\t" + funnyCount); 1067 } 1068 1069 enum MissingType { 1070 plurals, 1071 main_exemplars, 1072 no_main, 1073 collation, 1074 index_exemplars, 1075 punct_exemplars 1076 } 1077 TestCoreData()1078 public void TestCoreData() { 1079 Set<String> availableLanguages = testInfo.getCldrFactory().getAvailableLanguages(); 1080 PluralInfo rootRules = SUPPLEMENTAL_DATA_INFO.getPlurals(PluralType.cardinal, "root"); 1081 Multimap<MissingType, Comparable> errors = TreeMultimap.create(); 1082 errors.put(MissingType.collation, "?"); 1083 1084 Multimap<MissingType, Comparable> warnings = TreeMultimap.create(); 1085 warnings.put(MissingType.collation, "?"); 1086 warnings.put(MissingType.index_exemplars, "?"); 1087 warnings.put(MissingType.punct_exemplars, "?"); 1088 1089 Set<String> collations = new HashSet<>(); 1090 1091 // collect collation info 1092 Factory collationFactory = 1093 Factory.make(CLDRPaths.COLLATION_DIRECTORY, ".*", DraftStatus.contributed); 1094 for (String localeID : collationFactory.getAvailable()) { 1095 if (isTopLevel(localeID)) { 1096 collations.add(localeID); 1097 } 1098 } 1099 logln(collations.toString()); 1100 1101 Set<String> allLanguages = 1102 Builder.with(new TreeSet<String>()) 1103 .addAll(collations) 1104 .addAll(availableLanguages) 1105 .freeze(); 1106 1107 for (String localeID : allLanguages) { 1108 if (localeID.equals("root")) { 1109 continue; // skip script locales 1110 } 1111 if (!isTopLevel(localeID)) { 1112 continue; 1113 } 1114 if (!StandardCodes.isLocaleAtLeastBasic(localeID)) { 1115 continue; 1116 } 1117 errors.clear(); 1118 warnings.clear(); 1119 1120 String name = 1121 "Locale:" + localeID + " (" + testInfo.getEnglish().getName(localeID) + ")"; 1122 1123 if (!collations.contains(localeID)) { 1124 warnings.put(MissingType.collation, "missing"); 1125 logln(name + " is missing " + MissingType.collation.toString()); 1126 } 1127 1128 try { 1129 CLDRFile cldrFile = 1130 testInfo.getCldrFactory().make(localeID, false, DraftStatus.contributed); 1131 1132 String wholeFileAlias = cldrFile.getStringValue("//ldml/alias"); 1133 if (wholeFileAlias != null) { 1134 logln("Whole-file alias:" + name); 1135 continue; 1136 } 1137 1138 PluralInfo pluralInfo = 1139 SUPPLEMENTAL_DATA_INFO.getPlurals(PluralType.cardinal, localeID); 1140 if (pluralInfo == rootRules) { 1141 logln(name + " is missing " + MissingType.plurals.toString()); 1142 warnings.put(MissingType.plurals, "missing"); 1143 } 1144 UnicodeSet main = cldrFile.getExemplarSet("", WinningChoice.WINNING); 1145 if (main == null || main.isEmpty()) { 1146 errln(" " + name + " is missing " + MissingType.main_exemplars.toString()); 1147 errors.put(MissingType.main_exemplars, "missing"); 1148 } 1149 UnicodeSet index = cldrFile.getExemplarSet("index", WinningChoice.WINNING); 1150 if (index == null || index.isEmpty()) { 1151 logln(name + " is missing " + MissingType.index_exemplars.toString()); 1152 warnings.put(MissingType.index_exemplars, "missing"); 1153 } 1154 UnicodeSet punctuation = 1155 cldrFile.getExemplarSet("punctuation", WinningChoice.WINNING); 1156 if (punctuation == null || punctuation.isEmpty()) { 1157 logln(name + " is missing " + MissingType.punct_exemplars.toString()); 1158 warnings.put(MissingType.punct_exemplars, "missing"); 1159 } 1160 } catch (Exception e) { 1161 StringWriter x = new StringWriter(); 1162 PrintWriter pw = new PrintWriter(x); 1163 e.printStackTrace(pw); 1164 pw.flush(); 1165 errln(" " + name + " is missing main locale data." + x); 1166 errors.put(MissingType.no_main, x.toString()); 1167 } 1168 1169 // report errors 1170 1171 if (errors.isEmpty() && warnings.isEmpty()) { 1172 logln(name + ": No problems..."); 1173 } 1174 } 1175 } 1176 isTopLevel(String localeID)1177 private boolean isTopLevel(String localeID) { 1178 return "root".equals(LocaleIDParser.getParent(localeID)); 1179 } 1180 1181 /** Tests that every dtd item is connected from root */ TestDtdCompleteness()1182 public void TestDtdCompleteness() { 1183 for (DtdType type : DtdType.values()) { 1184 if (type.getStatus() != DtdType.DtdStatus.active) { 1185 continue; 1186 } 1187 DtdData dtdData = DtdData.getInstance(type); 1188 Set<Element> descendents = new LinkedHashSet<>(); 1189 dtdData.getDescendents(dtdData.ROOT, descendents); 1190 Set<Element> elements = dtdData.getElements(); 1191 if (!elements.equals(descendents)) { 1192 for (Element e : elements) { 1193 if (!descendents.contains(e) 1194 && !e.equals(dtdData.PCDATA) 1195 && !e.equals(dtdData.ANY)) { 1196 errln(type + ": Element " + e + " not contained in descendents of ROOT."); 1197 } 1198 } 1199 for (Element e : descendents) { 1200 if (!elements.contains(e)) { 1201 errln(type + ": Element " + e + ", descendent of ROOT, not in elements."); 1202 } 1203 } 1204 } 1205 LinkedHashSet<Element> all = new LinkedHashSet<>(descendents); 1206 all.addAll(elements); 1207 Set<Attribute> attributes = dtdData.getAttributes(); 1208 for (Attribute a : attributes) { 1209 if (!elements.contains(a.element)) { 1210 errln(type + ": Attribute " + a + " isn't for any element."); 1211 } 1212 } 1213 } 1214 } 1215 TestBasicDTDCompatibility()1216 public void TestBasicDTDCompatibility() { 1217 1218 if (!TestCLDRPaths.canUseArchiveDirectory()) { 1219 return; 1220 } 1221 1222 final String oldCommon = CldrVersion.LAST_RELEASE_VERSION.getBaseDirectory() + "/common"; 1223 1224 // set up exceptions 1225 Set<String> changedToEmpty = 1226 new HashSet<>( 1227 Arrays.asList( 1228 new String[] { 1229 "version", 1230 "languageCoverage", 1231 "scriptCoverage", 1232 "territoryCoverage", 1233 "currencyCoverage", 1234 "timezoneCoverage", 1235 "skipDefaultLocale" 1236 })); 1237 Set<String> PCDATA = new HashSet<>(); 1238 PCDATA.add("PCDATA"); 1239 Set<String> EMPTY = new HashSet<>(); 1240 EMPTY.add("EMPTY"); 1241 Set<String> VERSION = new HashSet<>(); 1242 VERSION.add("version"); 1243 1244 // test all DTDs 1245 for (DtdType dtd : DtdType.values()) { 1246 if (dtd.getStatus() != DtdType.DtdStatus.active) { 1247 continue; 1248 } 1249 if (dtd.firstVersion != null 1250 && CldrVersion.LAST_RELEASE_VERSION.isOlderThan( 1251 CldrVersion.from(dtd.firstVersion))) { 1252 continue; // DTD didn't exist in last release 1253 } 1254 if (dtd == DtdType.ldmlICU) continue; 1255 try { 1256 ElementAttributeInfo oldDtd = ElementAttributeInfo.getInstance(oldCommon, dtd); 1257 ElementAttributeInfo newDtd = ElementAttributeInfo.getInstance(dtd); 1258 1259 if (oldDtd == newDtd) { 1260 continue; 1261 } 1262 Relation<String, String> oldElement2Children = oldDtd.getElement2Children(); 1263 Relation<String, String> newElement2Children = newDtd.getElement2Children(); 1264 1265 Relation<String, String> oldElement2Attributes = oldDtd.getElement2Attributes(); 1266 Relation<String, String> newElement2Attributes = newDtd.getElement2Attributes(); 1267 1268 for (String element : oldElement2Children.keySet()) { 1269 Set<String> oldChildren = oldElement2Children.getAll(element); 1270 Set<String> newChildren = newElement2Children.getAll(element); 1271 if (newChildren == null) { 1272 if (!knownElementExceptions.contains(Pair.of(dtd.toString(), element))) { 1273 errln("Old " + dtd + " contains element not in new: <" + element + ">"); 1274 } 1275 continue; 1276 } 1277 Set<String> funny = containsInOrder(newChildren, oldChildren); 1278 if (funny != null) { 1279 if (changedToEmpty.contains(element) 1280 && oldChildren.equals(PCDATA) 1281 && newChildren.equals(EMPTY)) { 1282 // ok, skip 1283 } else { 1284 errln( 1285 "Old " 1286 + dtd 1287 + " element <" 1288 + element 1289 + "> has children Missing/Misordered:\t" 1290 + funny 1291 + "\n\t\tOld:\t" 1292 + oldChildren 1293 + "\n\t\tNew:\t" 1294 + newChildren); 1295 } 1296 } 1297 1298 Set<String> oldAttributes = oldElement2Attributes.getAll(element); 1299 if (oldAttributes == null) { 1300 oldAttributes = Collections.emptySet(); 1301 } 1302 Set<String> newAttributes = newElement2Attributes.getAll(element); 1303 if (newAttributes == null) { 1304 newAttributes = Collections.emptySet(); 1305 } 1306 if (!newAttributes.containsAll(oldAttributes)) { 1307 LinkedHashSet<String> missing = new LinkedHashSet<>(oldAttributes); 1308 missing.removeAll(newAttributes); 1309 if (element.equals(dtd.toString()) && missing.equals(VERSION)) { 1310 // ok, skip 1311 } else { 1312 errln( 1313 "Old " 1314 + dtd 1315 + " element <" 1316 + element 1317 + "> has attributes Missing:\t" 1318 + missing 1319 + "\n\t\tOld:\t" 1320 + oldAttributes 1321 + "\n\t\tNew:\t" 1322 + newAttributes); 1323 } 1324 } 1325 } 1326 } catch (Exception e) { 1327 e.printStackTrace(); 1328 errln("Failure with " + dtd); 1329 } 1330 } 1331 } 1332 containsInOrder(Set<T> superset, Set<T> subset)1333 private <T> Set<T> containsInOrder(Set<T> superset, Set<T> subset) { 1334 if (!superset.containsAll(subset)) { 1335 LinkedHashSet<T> missing = new LinkedHashSet<>(subset); 1336 missing.removeAll(superset); 1337 return missing; 1338 } 1339 // ok, we know that they are subsets, try order 1340 Set<T> result = null; 1341 DiscreteComparator<T> comp = 1342 new DiscreteComparator.Builder<T>(Ordering.ARBITRARY).add(superset).get(); 1343 T last = null; 1344 for (T item : subset) { 1345 if (last != null) { 1346 int order = comp.compare(last, item); 1347 if (order != -1) { 1348 if (result == null) { 1349 result = new HashSet<>(); 1350 result.add(last); 1351 result.add(item); 1352 } 1353 } 1354 } 1355 last = item; 1356 } 1357 return result; 1358 } 1359 TestDtdCompatibility()1360 public void TestDtdCompatibility() { 1361 1362 for (DtdType type : DtdType.values()) { 1363 if (type.getStatus() != DtdType.DtdStatus.active) { 1364 continue; 1365 } 1366 DtdData dtdData = DtdData.getInstance(type); 1367 Map<String, Element> currentElementFromName = dtdData.getElementFromName(); 1368 1369 // current has no orphan 1370 Set<Element> orphans = new LinkedHashSet<>(dtdData.getElementFromName().values()); 1371 orphans.remove(dtdData.ROOT); 1372 orphans.remove(dtdData.PCDATA); 1373 orphans.remove(dtdData.ANY); 1374 Set<String> elementsWithoutAlt = new TreeSet<>(); 1375 Set<String> elementsWithoutDraft = new TreeSet<>(); 1376 Set<String> elementsWithoutAlias = new TreeSet<>(); 1377 Set<String> elementsWithoutSpecial = new TreeSet<>(); 1378 1379 for (Element element : dtdData.getElementFromName().values()) { 1380 Set<Element> children = element.getChildren().keySet(); 1381 orphans.removeAll(children); 1382 if (type == DtdType.ldml 1383 && !SUPPLEMENTAL_DATA_INFO.isDeprecated(type, element.name, "*", "*")) { 1384 if (element.getType() == ElementType.PCDATA) { 1385 if (element.getAttributeNamed("alt") == null) { 1386 elementsWithoutAlt.add(element.name); 1387 } 1388 if (element.getAttributeNamed("draft") == null) { 1389 elementsWithoutDraft.add(element.name); 1390 } 1391 } else { 1392 if (children.size() != 0 && !"alias".equals(element.name)) { 1393 if (element.getChildNamed("alias") == null) { 1394 elementsWithoutAlias.add(element.name); 1395 } 1396 if (element.getChildNamed("special") == null) { 1397 elementsWithoutSpecial.add(element.name); 1398 } 1399 } 1400 } 1401 } 1402 } 1403 assertEquals( 1404 type + " DTD Must not have orphan elements", Collections.EMPTY_SET, orphans); 1405 assertEquals( 1406 type + " DTD elements with PCDATA must have 'alt' attributes", 1407 Collections.EMPTY_SET, 1408 elementsWithoutAlt); 1409 assertEquals( 1410 type + " DTD elements with PCDATA must have 'draft' attributes", 1411 Collections.EMPTY_SET, 1412 elementsWithoutDraft); 1413 assertEquals( 1414 type + " DTD elements with children must have 'alias' elements", 1415 Collections.EMPTY_SET, 1416 elementsWithoutAlias); 1417 assertEquals( 1418 type + " DTD elements with children must have 'special' elements", 1419 Collections.EMPTY_SET, 1420 elementsWithoutSpecial); 1421 1422 if (!TestCLDRPaths.canUseArchiveDirectory()) { 1423 return; 1424 } 1425 1426 for (CldrVersion version : CldrVersion.CLDR_VERSIONS_DESCENDING) { 1427 if (version == CldrVersion.unknown || version == CldrVersion.baseline) { 1428 continue; 1429 } 1430 if (type.getStatus() != DtdStatus.active) { 1431 continue; // not active 1432 } 1433 if (type.firstVersion != null 1434 && version.isOlderThan(CldrVersion.from(type.firstVersion))) { 1435 continue; // didn't exist at that point 1436 } 1437 DtdData dtdDataOld; 1438 try { 1439 dtdDataOld = DtdData.getInstance(type, version.toString()); 1440 } catch (IllegalArgumentException e) { 1441 boolean tooOld = false; 1442 switch (type) { 1443 case ldmlICU: 1444 tooOld = badLdmlICUVersions.contains(version); 1445 break; 1446 case ldmlBCP47: 1447 case keyboard3: 1448 if (type.firstVersion != null) { 1449 tooOld = version.isOlderThan(CldrVersion.from(type.firstVersion)); 1450 } 1451 break; 1452 default: 1453 break; 1454 } 1455 if (tooOld) { 1456 continue; 1457 } else { 1458 errln( 1459 "v" 1460 + version 1461 + ": " 1462 + e.getClass().getSimpleName() 1463 + ", " 1464 + e.getMessage()); 1465 continue; 1466 } 1467 } 1468 // verify that if E is in dtdDataOld, then it is in dtdData, and 1469 // has at least the same children and attributes 1470 for (Entry<String, Element> entry : dtdDataOld.getElementFromName().entrySet()) { 1471 Element oldElement = entry.getValue(); 1472 Element newElement = currentElementFromName.get(entry.getKey()); 1473 if (knownElementExceptions.contains( 1474 Pair.of(type.toString(), oldElement.getName()))) { 1475 continue; 1476 } 1477 if (assertNotNull( 1478 type 1479 + " DTD for trunk must be superset of v" 1480 + version 1481 + ", and must contain «" 1482 + oldElement.getName() 1483 + "»", 1484 newElement)) { 1485 // TODO Check order also 1486 for (Element oldChild : oldElement.getChildren().keySet()) { 1487 if (oldChild == null) { 1488 continue; 1489 } 1490 Element newChild = newElement.getChildNamed(oldChild.getName()); 1491 // skip certain items 1492 if (version.isOlderThan(CldrVersion.v1_6_1) 1493 && newElement.getName().equals("zone") 1494 && oldChild.getName().equals("usesMetazone")) { 1495 if (logKnownIssue( 1496 "CLDR-17054", 1497 "Breakage with items older than 1.6.1: " 1498 + newElement.getName() 1499 + " / " 1500 + oldChild.getName())) { 1501 continue; 1502 } 1503 } 1504 1505 if (knownChildExceptions.contains( 1506 Pair.of(newElement.getName(), oldChild.getName()))) { 1507 continue; 1508 } 1509 assertNotNull( 1510 type 1511 + " DTD - Trunk children of «" 1512 + newElement.getName() 1513 + "» must be superset of v" 1514 + version 1515 + ", and must contain «" 1516 + oldChild.getName() 1517 + "»", 1518 newChild); 1519 } 1520 for (Attribute oldAttribute : oldElement.getAttributes().keySet()) { 1521 Attribute newAttribute = 1522 newElement.getAttributeNamed(oldAttribute.getName()); 1523 1524 if (knownAttributeExceptions.contains( 1525 Pair.of(newElement.getName(), oldAttribute.getName()))) { 1526 continue; 1527 } 1528 assertNotNull( 1529 type 1530 + " DTD - Trunk attributes of «" 1531 + newElement.getName() 1532 + "» must be superset of v" 1533 + version 1534 + ", and must contain «" 1535 + oldAttribute.getName() 1536 + "»", 1537 newAttribute); 1538 } 1539 } 1540 } 1541 } 1542 } 1543 } 1544 1545 /** Compare each path to each other path for every single file in CLDR */ TestDtdComparison()1546 public void TestDtdComparison() { 1547 // try some simple paths for regression 1548 1549 sortPaths( 1550 DtdData.getInstance(DtdType.ldml).getDtdComparator(null), 1551 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/dateTimeFormatLength[@type=\"full\"]/dateTimeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1552 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats"); 1553 1554 sortPaths( 1555 DtdData.getInstance(DtdType.supplementalData).getDtdComparator(null), 1556 "//supplementalData/territoryContainment/group[@type=\"419\"][@contains=\"013 029 005\"][@grouping=\"true\"]", 1557 "//supplementalData/territoryContainment/group[@type=\"003\"][@contains=\"021 013 029\"][@grouping=\"true\"]"); 1558 } 1559 TestDtdComparisonsAll()1560 public void TestDtdComparisonsAll() { 1561 if (getInclusion() <= 5) { // Only run this test in exhaustive mode. 1562 return; 1563 } 1564 for (File file : CLDRConfig.getInstance().getAllCLDRFilesEndingWith(".xml")) { 1565 if (file.getParentFile().getName().equals("import") 1566 && file.getParentFile().getParentFile().getName().equals("keyboards")) { 1567 return; // skip imports 1568 } 1569 if (file.getPath().contains("/keyboards/3.0/") 1570 && logKnownIssue( 1571 "CLDR-17574", "With v46, parsing issues for keyboard xml files")) { 1572 continue; 1573 } 1574 checkDtdComparatorFor(file, null); 1575 } 1576 } 1577 checkDtdComparatorForResource(String fileToRead, DtdType overrideDtdType)1578 public void checkDtdComparatorForResource(String fileToRead, DtdType overrideDtdType) { 1579 MyHandler myHandler = new MyHandler(overrideDtdType); 1580 XMLFileReader xfr = new XMLFileReader().setHandler(myHandler); 1581 try { 1582 myHandler.fileName = fileToRead; 1583 xfr.read(myHandler.fileName, TestBasic.class, -1, true); 1584 logln(myHandler.fileName); 1585 } catch (Exception e) { 1586 Throwable t = e; 1587 StringBuilder b = new StringBuilder(); 1588 String indent = ""; 1589 while (t != null) { 1590 b.append(indent).append(t.getMessage()); 1591 indent = indent.isEmpty() ? "\n\t\t" : indent + "\t"; 1592 t = t.getCause(); 1593 } 1594 errln(b.toString()); 1595 return; 1596 } 1597 DtdData dtdData = DtdData.getInstance(myHandler.dtdType); 1598 sortPaths(dtdData.getDtdComparator(null), myHandler.data); 1599 } 1600 checkDtdComparatorFor(File fileToRead, DtdType overrideDtdType)1601 public void checkDtdComparatorFor(File fileToRead, DtdType overrideDtdType) { 1602 MyHandler myHandler = new MyHandler(overrideDtdType); 1603 XMLFileReader xfr = new XMLFileReader().setHandler(myHandler); 1604 try { 1605 myHandler.fileName = PathUtilities.getNormalizedPathString(fileToRead); 1606 xfr.read(myHandler.fileName, -1, true); 1607 logln(myHandler.fileName); 1608 } catch (Exception e) { 1609 e.printStackTrace(); 1610 Throwable t = e; 1611 StringBuilder b = new StringBuilder(); 1612 String indent = ""; 1613 while (t != null) { 1614 b.append(indent).append(t.getMessage()); 1615 indent = indent.isEmpty() ? "\n\t\t" : indent + "\t"; 1616 t = t.getCause(); 1617 } 1618 errln(b.toString()); 1619 return; 1620 } 1621 DtdData dtdData = DtdData.getInstance(myHandler.dtdType); 1622 sortPaths(dtdData.getDtdComparator(null), myHandler.data); 1623 } 1624 1625 static class MyHandler extends XMLFileReader.SimpleHandler { 1626 private String fileName; 1627 private DtdType dtdType; 1628 private final Set<String> data = new LinkedHashSet<>(); 1629 MyHandler(DtdType overrideDtdType)1630 public MyHandler(DtdType overrideDtdType) { 1631 dtdType = overrideDtdType; 1632 } 1633 1634 @Override handlePathValue(String path, @SuppressWarnings("unused") String value)1635 public void handlePathValue(String path, @SuppressWarnings("unused") String value) { 1636 if (dtdType == null) { 1637 try { 1638 dtdType = DtdType.fromPath(path); 1639 } catch (Exception e) { 1640 throw new IllegalArgumentException("Can't read " + fileName, e); 1641 } 1642 } 1643 data.add(path); 1644 } 1645 } 1646 sortPaths(Comparator<String> dc, Collection<String> paths)1647 public void sortPaths(Comparator<String> dc, Collection<String> paths) { 1648 String[] array = paths.toArray(new String[paths.size()]); 1649 sortPaths(dc, array); 1650 } 1651 sortPaths(Comparator<String> dc, String... array)1652 public void sortPaths(Comparator<String> dc, String... array) { 1653 Arrays.sort(array, 0, array.length, dc); 1654 } 1655 // public void TestNewDtdData() moved to TestDtdData 1656 } 1657