1 package org.unicode.cldr.unittest; 2 3 import java.io.File; 4 import java.io.IOException; 5 import java.io.InputStream; 6 import java.util.ArrayList; 7 import java.util.Arrays; 8 import java.util.Collection; 9 import java.util.Collections; 10 import java.util.Comparator; 11 import java.util.EnumSet; 12 import java.util.HashSet; 13 import java.util.Iterator; 14 import java.util.LinkedHashSet; 15 import java.util.List; 16 import java.util.Map; 17 import java.util.Map.Entry; 18 import java.util.Set; 19 import java.util.TreeMap; 20 import java.util.TreeSet; 21 22 import org.unicode.cldr.test.DisplayAndInputProcessor; 23 import org.unicode.cldr.tool.GenerateBirth.Versions; 24 import org.unicode.cldr.tool.LikelySubtags; 25 import org.unicode.cldr.util.Builder; 26 import org.unicode.cldr.util.CLDRConfig; 27 import org.unicode.cldr.util.CLDRFile; 28 import org.unicode.cldr.util.CLDRFile.DraftStatus; 29 import org.unicode.cldr.util.CLDRFile.Status; 30 import org.unicode.cldr.util.CLDRFile.WinningChoice; 31 import org.unicode.cldr.util.CLDRPaths; 32 import org.unicode.cldr.util.ChainedMap; 33 import org.unicode.cldr.util.ChainedMap.M4; 34 import org.unicode.cldr.util.CharacterFallbacks; 35 import org.unicode.cldr.util.CldrUtility; 36 import org.unicode.cldr.util.Counter; 37 import org.unicode.cldr.util.DiscreteComparator; 38 import org.unicode.cldr.util.DiscreteComparator.Ordering; 39 import org.unicode.cldr.util.DtdData; 40 import org.unicode.cldr.util.DtdData.Attribute; 41 import org.unicode.cldr.util.DtdData.Element; 42 import org.unicode.cldr.util.DtdData.ElementType; 43 import org.unicode.cldr.util.DtdType; 44 import org.unicode.cldr.util.ElementAttributeInfo; 45 import org.unicode.cldr.util.Factory; 46 import org.unicode.cldr.util.InputStreamFactory; 47 import org.unicode.cldr.util.LanguageTagParser; 48 import org.unicode.cldr.util.Level; 49 import org.unicode.cldr.util.LocaleIDParser; 50 import org.unicode.cldr.util.Pair; 51 import org.unicode.cldr.util.PathHeader; 52 import org.unicode.cldr.util.SupplementalDataInfo; 53 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 54 import org.unicode.cldr.util.SupplementalDataInfo.PluralType; 55 import org.unicode.cldr.util.XMLFileReader; 56 import org.unicode.cldr.util.XPathParts; 57 import org.xml.sax.ErrorHandler; 58 import org.xml.sax.InputSource; 59 import org.xml.sax.SAXException; 60 import org.xml.sax.SAXParseException; 61 import org.xml.sax.XMLReader; 62 63 import com.google.common.base.Objects; 64 import com.google.common.collect.ImmutableMultimap; 65 import com.google.common.collect.ImmutableSet; 66 import com.google.common.collect.Multimap; 67 import com.google.common.collect.TreeMultimap; 68 import com.ibm.icu.dev.util.CollectionUtilities; 69 import com.ibm.icu.impl.Relation; 70 import com.ibm.icu.impl.Row; 71 import com.ibm.icu.impl.Row.R2; 72 import com.ibm.icu.impl.Row.R3; 73 import com.ibm.icu.impl.Utility; 74 import com.ibm.icu.lang.UCharacter; 75 import com.ibm.icu.text.Collator; 76 import com.ibm.icu.text.DecimalFormat; 77 import com.ibm.icu.text.Normalizer; 78 import com.ibm.icu.text.NumberFormat; 79 import com.ibm.icu.text.UTF16; 80 import com.ibm.icu.text.UnicodeSet; 81 import com.ibm.icu.text.UnicodeSetIterator; 82 import com.ibm.icu.util.Currency; 83 import com.ibm.icu.util.ULocale; 84 85 public class TestBasic extends TestFmwkPlus { 86 87 private static final boolean DEBUG = false; 88 89 static CLDRConfig testInfo = CLDRConfig.getInstance(); 90 91 private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO = testInfo 92 .getSupplementalDataInfo(); 93 94 private static final ImmutableSet<Pair<String, String>> knownElementExceptions = ImmutableSet.of( 95 Pair.of("ldml", "usesMetazone"), 96 Pair.of("ldmlICU", "usesMetazone")); 97 98 private static final ImmutableSet<Pair<String, String>> knownAttributeExceptions = ImmutableSet.of( 99 Pair.of("ldml", "version"), 100 Pair.of("supplementalData", "version"), 101 Pair.of("ldmlICU", "version"), 102 Pair.of("layout", "standard")); 103 104 private static final ImmutableSet<Pair<String, String>> knownChildExceptions = ImmutableSet.of( 105 Pair.of("abbreviationFallback", "special"), 106 Pair.of("inList", "special"), 107 Pair.of("preferenceOrdering", "special")); 108 109 /** 110 * Simple test that loads each file in the cldr directory, thus verifying 111 * that the DTD works, and also checks that the PrettyPaths work. 112 * 113 * @author markdavis 114 */ 115 main(String[] args)116 public static void main(String[] args) { 117 new TestBasic().run(args); 118 } 119 120 private static final ImmutableSet<String> skipAttributes = ImmutableSet.of( 121 "alt", "draft", "references"); 122 123 private final ImmutableSet<String> eightPointLocales = ImmutableSet.of( 124 "ar", "ca", "cs", "da", "de", "el", "es", "fi", "fr", "he", "hi", "hr", "hu", "id", 125 "it", "ja", "ko", "lt", "lv", "nb", "nl", "pl", "pt", "pt_PT", "ro", "ru", "sk", "sl", "sr", "sv", 126 "th", "tr", "uk", "vi", "zh", "zh_Hant"); 127 128 // private final boolean showForceZoom = Utility.getProperty("forcezoom", 129 // false); 130 131 private final boolean resolved = CldrUtility.getProperty("resolved", false); 132 133 private final Exception[] internalException = new Exception[1]; 134 TestDtds()135 public void TestDtds() throws IOException { 136 Relation<Row.R2<DtdType, String>, String> foundAttributes = Relation 137 .of(new TreeMap<Row.R2<DtdType, String>, Set<String>>(), 138 TreeSet.class); 139 final CLDRConfig config = CLDRConfig.getInstance(); 140 final File basedir = config.getCldrBaseDirectory(); 141 List<TimingInfo> data = new ArrayList<>(); 142 143 for (String subdir : config.getCLDRDataDirectories()) { 144 checkDtds(new File(basedir, subdir), 0, foundAttributes, data); 145 } 146 if (foundAttributes.size() > 0) { 147 showFoundElements(foundAttributes); 148 } 149 if (isVerbose()) { 150 long totalBytes = 0; 151 long totalNanos = 0; 152 for (TimingInfo i : data) { 153 long length = i.file.length(); 154 totalBytes += length; 155 totalNanos += i.nanos; 156 logln(i.nanos + "\t" + length + "\t" + i.file); 157 } 158 logln(totalNanos + "\t" + totalBytes); 159 } 160 } 161 checkDtds(File directoryFile, int level, Relation<R2<DtdType, String>, String> foundAttributes, List<TimingInfo> data)162 private void checkDtds(File directoryFile, int level, 163 Relation<R2<DtdType, String>, String> foundAttributes, 164 List<TimingInfo> data) throws IOException { 165 boolean deepCheck = getInclusion() >= 10; 166 File[] listFiles = directoryFile.listFiles(); 167 String canonicalPath = directoryFile.getCanonicalPath(); 168 String indent = Utility.repeat("\t", level); 169 if (listFiles == null) { 170 throw new IllegalArgumentException(indent + "Empty directory: " 171 + canonicalPath); 172 } 173 logln("Checking files for DTD errors in: " + indent + canonicalPath); 174 for (File fileName : listFiles) { 175 String name = fileName.getName(); 176 if (CLDRConfig.isJunkFile(name)) { 177 continue; 178 } else if (fileName.isDirectory()) { 179 checkDtds(fileName, level + 1, foundAttributes, data); 180 } else if (name.endsWith(".xml")) { 181 data.add(check(fileName)); 182 if (deepCheck // takes too long to do all the time 183 ) { 184 CLDRFile cldrfile = CLDRFile.loadFromFile(fileName, "temp", 185 DraftStatus.unconfirmed); 186 for (String xpath : cldrfile) { 187 String fullPath = cldrfile.getFullXPath(xpath); 188 if (fullPath == null) { 189 fullPath = cldrfile.getFullXPath(xpath); 190 assertNotNull("", fullPath); 191 continue; 192 } 193 XPathParts parts = XPathParts 194 .getFrozenInstance(fullPath); 195 DtdType type = parts.getDtdData().dtdType; 196 for (int i = 0; i < parts.size(); ++i) { 197 String element = parts.getElement(i); 198 R2<DtdType, String> typeElement = Row.of(type, 199 element); 200 if (parts.getAttributeCount(i) == 0) { 201 foundAttributes.put(typeElement, "NONE"); 202 } else { 203 for (String attribute : parts 204 .getAttributeKeys(i)) { 205 foundAttributes.put(typeElement, attribute); 206 } 207 } 208 } 209 } 210 } 211 } 212 } 213 } 214 showFoundElements( Relation<Row.R2<DtdType, String>, String> foundAttributes)215 public void showFoundElements( 216 Relation<Row.R2<DtdType, String>, String> foundAttributes) { 217 Relation<Row.R2<DtdType, String>, String> theoryAttributes = Relation 218 .of(new TreeMap<Row.R2<DtdType, String>, Set<String>>(), 219 TreeSet.class); 220 for (DtdType type : DtdType.values()) { 221 DtdData dtdData = DtdData.getInstance(type); 222 for (Element element : dtdData.getElementFromName().values()) { 223 String name = element.getName(); 224 Set<Attribute> attributes = element.getAttributes().keySet(); 225 R2<DtdType, String> typeElement = Row.of(type, name); 226 if (attributes.isEmpty()) { 227 theoryAttributes.put(typeElement, "NONE"); 228 } else { 229 for (Attribute attribute : attributes) { 230 theoryAttributes.put(typeElement, attribute.name); 231 } 232 } 233 } 234 } 235 Relation<String, R3<Boolean, DtdType, String>> attributesToTypeElementUsed = Relation 236 .of(new TreeMap<String, Set<R3<Boolean, DtdType, String>>>(), 237 LinkedHashSet.class); 238 239 for (Entry<R2<DtdType, String>, Set<String>> s : theoryAttributes 240 .keyValuesSet()) { 241 R2<DtdType, String> typeElement = s.getKey(); 242 Set<String> theoryAttributeSet = s.getValue(); 243 DtdType type = typeElement.get0(); 244 String element = typeElement.get1(); 245 if (element.equals("ANY") || element.equals("#PCDATA")) { 246 continue; 247 } 248 boolean deprecatedElement = SUPPLEMENTAL_DATA_INFO.isDeprecated( 249 type, element, "*", "*"); 250 String header = type + "\t" + element + "\t" 251 + (deprecatedElement ? "X" : "") + "\t"; 252 Set<String> usedAttributes = foundAttributes.get(typeElement); 253 Set<String> unusedAttributes = new LinkedHashSet<String>( 254 theoryAttributeSet); 255 if (usedAttributes == null) { 256 logln(header 257 + "<NOT-FOUND>\t\t" 258 + siftDeprecated(type, element, unusedAttributes, 259 attributesToTypeElementUsed, false)); 260 continue; 261 } 262 unusedAttributes.removeAll(usedAttributes); 263 logln(header 264 + siftDeprecated(type, element, usedAttributes, 265 attributesToTypeElementUsed, true) 266 + "\t" 267 + siftDeprecated(type, element, unusedAttributes, 268 attributesToTypeElementUsed, false)); 269 } 270 271 logln("Undeprecated Attributes\t"); 272 for (Entry<String, R3<Boolean, DtdType, String>> s : attributesToTypeElementUsed 273 .keyValueSet()) { 274 R3<Boolean, DtdType, String> typeElementUsed = s.getValue(); 275 logln(s.getKey() + "\t" + typeElementUsed.get0() 276 + "\t" + typeElementUsed.get1() + "\t" 277 + typeElementUsed.get2()); 278 } 279 } 280 siftDeprecated( DtdType type, String element, Set<String> attributeSet, Relation<String, R3<Boolean, DtdType, String>> attributesToTypeElementUsed, boolean used)281 private String siftDeprecated( 282 DtdType type, 283 String element, 284 Set<String> attributeSet, 285 Relation<String, R3<Boolean, DtdType, String>> attributesToTypeElementUsed, 286 boolean used) { 287 StringBuilder b = new StringBuilder(); 288 StringBuilder bdep = new StringBuilder(); 289 for (String attribute : attributeSet) { 290 String attributeName = "«" 291 + attribute 292 + (!"NONE".equals(attribute) && CLDRFile.isDistinguishing(type, element, attribute) ? "*" 293 : "") 294 + "»"; 295 if (!"NONE".equals(attribute) && SUPPLEMENTAL_DATA_INFO.isDeprecated(type, element, attribute, 296 "*")) { 297 if (bdep.length() != 0) { 298 bdep.append(" "); 299 } 300 bdep.append(attributeName); 301 } else { 302 if (b.length() != 0) { 303 b.append(" "); 304 } 305 b.append(attributeName); 306 if (!"NONE".equals(attribute)) { 307 attributesToTypeElementUsed.put(attribute, 308 Row.of(used, type, element)); 309 } 310 } 311 } 312 return b.toString() + "\t" + bdep.toString(); 313 } 314 315 class MyErrorHandler implements ErrorHandler { error(SAXParseException exception)316 public void error(SAXParseException exception) throws SAXException { 317 errln("error: " + XMLFileReader.showSAX(exception)); 318 throw exception; 319 } 320 fatalError(SAXParseException exception)321 public void fatalError(SAXParseException exception) throws SAXException { 322 errln("fatalError: " + XMLFileReader.showSAX(exception)); 323 throw exception; 324 } 325 warning(SAXParseException exception)326 public void warning(SAXParseException exception) throws SAXException { 327 errln("warning: " + XMLFileReader.showSAX(exception)); 328 throw exception; 329 } 330 } 331 332 private class TimingInfo { 333 File file; 334 long nanos; 335 } 336 check(File systemID)337 public TimingInfo check(File systemID) { 338 long start = System.nanoTime(); 339 try (InputStream fis = InputStreamFactory.createInputStream(systemID)) { 340 // FileInputStream fis = new FileInputStream(systemID); 341 XMLReader xmlReader = XMLFileReader.createXMLReader(true); 342 xmlReader.setErrorHandler(new MyErrorHandler()); 343 InputSource is = new InputSource(fis); 344 is.setSystemId(systemID.toString()); 345 xmlReader.parse(is); 346 // fis.close(); 347 } catch (SAXException | IOException e) { 348 errln("\t" + "Can't read " + systemID + "\t" + e.getClass() + "\t" 349 + e.getMessage()); 350 } 351 // catch (SAXParseException e) { 352 // errln("\t" + "Can't read " + systemID + "\t" + e.getClass() + "\t" + 353 // e.getMessage()); 354 // } catch (IOException e) { 355 // errln("\t" + "Can't read " + systemID + "\t" + e.getClass() + "\t" + 356 // e.getMessage()); 357 // } 358 TimingInfo timingInfo = new TimingInfo(); 359 timingInfo.nanos = System.nanoTime() - start; 360 timingInfo.file = systemID; 361 return timingInfo; 362 } 363 TestCurrencyFallback()364 public void TestCurrencyFallback() { 365 XPathParts parts = new XPathParts(); 366 Factory cldrFactory = testInfo.getCldrFactory(); 367 Set<String> currencies = testInfo.getStandardCodes().getAvailableCodes( 368 "currency"); 369 370 final UnicodeSet CHARACTERS_THAT_SHOULD_HAVE_FALLBACKS = (UnicodeSet) new UnicodeSet( 371 "[[:sc:]-[\\u0000-\\u00FF]]").freeze(); 372 373 CharacterFallbacks fallbacks = CharacterFallbacks.make(); 374 375 for (String locale : cldrFactory.getAvailable()) { 376 CLDRFile file = testInfo.getCLDRFile(locale, false); 377 if (file.isNonInheriting()) 378 continue; 379 380 final UnicodeSet OK_CURRENCY_FALLBACK = (UnicodeSet) new UnicodeSet( 381 "[\\u0000-\\u00FF]").addAll(safeExemplars(file, "")) 382 .addAll(safeExemplars(file, "auxiliary")) 383 // .addAll(safeExemplars(file, "currencySymbol")) 384 .freeze(); 385 UnicodeSet badSoFar = new UnicodeSet(); 386 387 for (Iterator<String> it = file.iterator(); it.hasNext();) { 388 String path = it.next(); 389 if (path.endsWith("/alias")) { 390 continue; 391 } 392 String value = file.getStringValue(path); 393 394 // check for special characters 395 396 if (CHARACTERS_THAT_SHOULD_HAVE_FALLBACKS.containsSome(value)) { 397 398 parts.set(path); 399 if (!parts.getElement(-1).equals("symbol")) { 400 continue; 401 } 402 // We don't care about fallbacks for narrow currency symbols 403 if ("narrow".equals(parts.getAttributeValue(-1, "alt"))) { 404 continue; 405 } 406 String currencyType = parts.getAttributeValue(-2, "type"); 407 408 UnicodeSet fishy = new UnicodeSet().addAll(value) 409 .retainAll(CHARACTERS_THAT_SHOULD_HAVE_FALLBACKS) 410 .removeAll(badSoFar); 411 for (UnicodeSetIterator it2 = new UnicodeSetIterator(fishy); it2 412 .next();) { 413 final int fishyCodepoint = it2.codepoint; 414 List<String> fallbackList = fallbacks 415 .getSubstitutes(fishyCodepoint); 416 417 String nfkc = Normalizer.normalize(fishyCodepoint, 418 Normalizer.NFKC); 419 if (!nfkc.equals(UTF16.valueOf(fishyCodepoint))) { 420 if (fallbackList == null) { 421 fallbackList = new ArrayList<String>(); 422 } else { 423 fallbackList = new ArrayList<String>( 424 fallbackList); // writable 425 } 426 fallbackList.add(nfkc); 427 } 428 // later test for all Latin-1 429 if (fallbackList == null) { 430 errln("Locale:\t" + locale 431 + ";\tCharacter with no fallback:\t" 432 + it2.getString() + "\t" 433 + UCharacter.getName(fishyCodepoint)); 434 badSoFar.add(fishyCodepoint); 435 } else { 436 String fallback = null; 437 for (String fb : fallbackList) { 438 if (OK_CURRENCY_FALLBACK.containsAll(fb)) { 439 if (!fb.equals(currencyType) 440 && currencies.contains(fb)) { 441 errln("Locale:\t" 442 + locale 443 + ";\tCurrency:\t" 444 + currencyType 445 + ";\tFallback converts to different code!:\t" 446 + fb 447 + "\t" 448 + it2.getString() 449 + "\t" 450 + UCharacter 451 .getName(fishyCodepoint)); 452 } 453 if (fallback == null) { 454 fallback = fb; 455 } 456 } 457 } 458 if (fallback == null) { 459 errln("Locale:\t" 460 + locale 461 + ";\tCharacter with no good fallback (exemplars+Latin1):\t" 462 + it2.getString() + "\t" 463 + UCharacter.getName(fishyCodepoint)); 464 badSoFar.add(fishyCodepoint); 465 } else { 466 logln("Locale:\t" + locale 467 + ";\tCharacter with good fallback:\t" 468 + it2.getString() + " " 469 + UCharacter.getName(fishyCodepoint) 470 + " => " + fallback); 471 // badSoFar.add(fishyCodepoint); 472 } 473 } 474 } 475 } 476 } 477 } 478 } 479 TestAbstractPaths()480 public void TestAbstractPaths() { 481 Factory cldrFactory = testInfo.getCldrFactory(); 482 CLDRFile english = testInfo.getEnglish(); 483 Map<String, Counter<Level>> abstactPaths = new TreeMap<String, Counter<Level>>(); 484 RegexTransform abstractPathTransform = new RegexTransform( 485 RegexTransform.Processing.ONE_PASS).add("//ldml/", "") 486 .add("\\[@alt=\"[^\"]*\"\\]", "").add("=\"[^\"]*\"", "=\"*\"") 487 .add("([^]])\\[", "$1\t[").add("([^]])/", "$1\t/") 488 .add("/", "\t"); 489 490 for (String locale : getInclusion() <= 5 ? eightPointLocales : cldrFactory.getAvailable()) { 491 CLDRFile file = testInfo.getCLDRFile(locale, resolved); 492 if (file.isNonInheriting()) 493 continue; 494 logln(locale + "\t-\t" + english.getName(locale)); 495 496 for (Iterator<String> it = file.iterator(); it.hasNext();) { 497 String path = it.next(); 498 if (path.endsWith("/alias")) { 499 continue; 500 } 501 // collect abstracted paths 502 String abstractPath = abstractPathTransform.transform(path); 503 Level level = SUPPLEMENTAL_DATA_INFO.getCoverageLevel(path, 504 locale); 505 if (level == Level.OPTIONAL) { 506 level = Level.COMPREHENSIVE; 507 } 508 Counter<Level> row = abstactPaths.get(abstractPath); 509 if (row == null) { 510 abstactPaths.put(abstractPath, row = new Counter<Level>()); 511 } 512 row.add(level, 1); 513 } 514 } 515 logln(CldrUtility.LINE_SEPARATOR + "Abstract Paths"); 516 for (Entry<String, Counter<Level>> pathInfo : abstactPaths.entrySet()) { 517 String path = pathInfo.getKey(); 518 Counter<Level> counter = pathInfo.getValue(); 519 logln(counter.getTotal() + "\t" + getCoverage(counter) + "\t" 520 + path); 521 } 522 } 523 getCoverage(Counter<Level> counter)524 private CharSequence getCoverage(Counter<Level> counter) { 525 StringBuilder result = new StringBuilder(); 526 boolean first = true; 527 for (Level level : counter.getKeysetSortedByKey()) { 528 if (first) { 529 first = false; 530 } else { 531 result.append(' '); 532 } 533 result.append("L").append(level.ordinal()).append("=") 534 .append(counter.get(level)); 535 } 536 return result; 537 } 538 539 // public void TestCLDRFileCache() { 540 // long start = System.nanoTime(); 541 // Factory cldrFactory = testInfo.getCldrFactory(); 542 // String unusualLocale = "hi"; 543 // CLDRFile file = cldrFactory.make(unusualLocale, true); 544 // long afterOne = System.nanoTime(); 545 // logln("First: " + (afterOne-start)); 546 // CLDRFile file2 = cldrFactory.make(unusualLocale, true); 547 // long afterTwo = System.nanoTime(); 548 // logln("Second: " + (afterTwo-afterOne)); 549 // } 550 // TestPaths()551 public void TestPaths() { 552 Relation<String, String> distinguishing = Relation.of( 553 new TreeMap<String, Set<String>>(), TreeSet.class); 554 Relation<String, String> nonDistinguishing = Relation.of( 555 new TreeMap<String, Set<String>>(), TreeSet.class); 556 XPathParts parts = new XPathParts(); 557 Factory cldrFactory = testInfo.getCldrFactory(); 558 CLDRFile english = testInfo.getEnglish(); 559 560 Relation<String, String> pathToLocale = Relation.of( 561 new TreeMap<String, Set<String>>(CLDRFile 562 .getComparator(DtdType.ldml)), 563 TreeSet.class, null); 564 Set<String> localesToTest = getInclusion() <= 5 ? eightPointLocales : cldrFactory.getAvailable(); 565 for (String locale : localesToTest) { 566 CLDRFile file = testInfo.getCLDRFile(locale, resolved); 567 DtdType dtdType = null; 568 if (file.isNonInheriting()) 569 continue; 570 DisplayAndInputProcessor displayAndInputProcessor = new DisplayAndInputProcessor( 571 file, false); 572 573 logln(locale + "\t-\t" + english.getName(locale)); 574 575 for (Iterator<String> it = file.iterator(); it.hasNext();) { 576 String path = it.next(); 577 if (dtdType == null) { 578 dtdType = DtdType.fromPath(path); 579 } 580 581 if (path.endsWith("/alias")) { 582 continue; 583 } 584 String value = file.getStringValue(path); 585 if (value == null) { 586 throw new IllegalArgumentException(locale 587 + "\tError: in null value at " + path); 588 } 589 590 String displayValue = displayAndInputProcessor 591 .processForDisplay(path, value); 592 if (!displayValue.equals(value)) { 593 logln("\t" 594 + locale 595 + "\tdisplayAndInputProcessor changes display value <" 596 + value + ">\t=>\t<" + displayValue + ">\t\t" 597 + path); 598 } 599 String inputValue = displayAndInputProcessor.processInput(path, 600 value, internalException); 601 if (internalException[0] != null) { 602 errln("\t" + locale 603 + "\tdisplayAndInputProcessor internal error <" 604 + value + ">\t=>\t<" + inputValue + ">\t\t" + path); 605 internalException[0].printStackTrace(System.out); 606 } 607 if (isVerbose() && !inputValue.equals(value)) { 608 displayAndInputProcessor.processInput(path, value, 609 internalException); // for 610 // debugging 611 logln("\t" 612 + locale 613 + "\tdisplayAndInputProcessor changes input value <" 614 + value + ">\t=>\t<" + inputValue + ">\t\t" + path); 615 } 616 617 pathToLocale.put(path, locale); 618 619 // also check for non-distinguishing attributes 620 if (path.contains("/identity")) 621 continue; 622 623 String fullPath = file.getFullXPath(path); 624 parts.set(fullPath); 625 for (int i = 0; i < parts.size(); ++i) { 626 if (parts.getAttributeCount(i) == 0) 627 continue; 628 String element = parts.getElement(i); 629 for (String attribute : parts.getAttributeKeys(i)) { 630 if (skipAttributes.contains(attribute)) 631 continue; 632 if (CLDRFile.isDistinguishing(dtdType, element, attribute)) { 633 distinguishing.put(element, attribute); 634 } else { 635 nonDistinguishing.put(element, attribute); 636 } 637 } 638 } 639 } 640 } 641 642 if (isVerbose()) { 643 System.out.format("Distinguishing Elements: %s" 644 + CldrUtility.LINE_SEPARATOR, distinguishing); 645 System.out.format("Nondistinguishing Elements: %s" 646 + CldrUtility.LINE_SEPARATOR, nonDistinguishing); 647 System.out.format("Skipped %s" + CldrUtility.LINE_SEPARATOR, 648 skipAttributes); 649 } 650 } 651 652 /** 653 * The verbose output shows the results of 1..3 \u00a4 signs. 654 */ checkCurrency()655 public void checkCurrency() { 656 Map<String, Set<R2<String, Integer>>> results = new TreeMap<String, Set<R2<String, Integer>>>( 657 Collator.getInstance(ULocale.ENGLISH)); 658 for (ULocale locale : ULocale.getAvailableLocales()) { 659 if (locale.getCountry().length() != 0) { 660 continue; 661 } 662 for (int i = 1; i < 4; ++i) { 663 NumberFormat format = getCurrencyInstance(locale, i); 664 for (Currency c : new Currency[] { Currency.getInstance("USD"), 665 Currency.getInstance("EUR"), 666 Currency.getInstance("INR") }) { 667 format.setCurrency(c); 668 final String formatted = format.format(12345.67); 669 Set<R2<String, Integer>> set = results.get(formatted); 670 if (set == null) { 671 results.put(formatted, 672 set = new TreeSet<R2<String, Integer>>()); 673 } 674 set.add(Row.of(locale.toString(), Integer.valueOf(i))); 675 } 676 } 677 } 678 for (String formatted : results.keySet()) { 679 logln(formatted + "\t" + results.get(formatted)); 680 } 681 } 682 getCurrencyInstance(ULocale locale, int type)683 private static NumberFormat getCurrencyInstance(ULocale locale, int type) { 684 NumberFormat format = NumberFormat.getCurrencyInstance(locale); 685 if (type > 1) { 686 DecimalFormat format2 = (DecimalFormat) format; 687 String pattern = format2.toPattern(); 688 String replacement = "\u00a4\u00a4"; 689 for (int i = 2; i < type; ++i) { 690 replacement += "\u00a4"; 691 } 692 pattern = pattern.replace("\u00a4", replacement); 693 format2.applyPattern(pattern); 694 } 695 return format; 696 } 697 safeExemplars(CLDRFile file, String string)698 private UnicodeSet safeExemplars(CLDRFile file, String string) { 699 final UnicodeSet result = file.getExemplarSet(string, 700 WinningChoice.NORMAL); 701 return result != null ? result : new UnicodeSet(); 702 } 703 TestAPath()704 public void TestAPath() { 705 // <month type="1">1</month> 706 String path = "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"abbreviated\"]/month[@type=\"1\"]"; 707 CLDRFile root = testInfo.getRoot(); 708 logln("path: " + path); 709 String fullpath = root.getFullXPath(path); 710 logln("fullpath: " + fullpath); 711 String value = root.getStringValue(path); 712 logln("value: " + value); 713 Status status = new Status(); 714 String source = root.getSourceLocaleID(path, status); 715 logln("locale: " + source); 716 logln("status: " + status); 717 } 718 TestDefaultContents()719 public void TestDefaultContents() { 720 Set<String> defaultContents = Inheritance.defaultContents; 721 Multimap<String, String> parentToChildren = Inheritance.parentToChildren; 722 723 if (DEBUG) { 724 Inheritance.showChain("", "", "root"); 725 } 726 727 for (String locale : defaultContents) { 728 CLDRFile cldrFile; 729 try { 730 cldrFile = testInfo.getCLDRFile(locale, false); 731 } catch (RuntimeException e) { 732 logln("Can't open default content file:\t" + locale); 733 continue; 734 } 735 // we check that the default content locale is always empty 736 for (Iterator<String> it = cldrFile.iterator(); it.hasNext();) { 737 String path = it.next(); 738 if (path.contains("/identity")) { 739 continue; 740 } 741 errln("Default content file not empty:\t" + locale); 742 showDifferences(locale); 743 break; 744 } 745 } 746 747 // check that if a locale has any children, that exactly one of them is 748 // the default content. Ignore locales with variants 749 750 for (Entry<String, Collection<String>> localeAndKids : parentToChildren.asMap().entrySet()) { 751 String locale = localeAndKids.getKey(); 752 if (locale.equals("root")) { 753 continue; 754 } 755 756 Collection<String> rawChildren = localeAndKids.getValue(); 757 758 // remove variant children 759 Set<String> children = new LinkedHashSet<>(); 760 for (String child : rawChildren) { 761 if (new LocaleIDParser().set(child).getVariants().length == 0) { 762 children.add(child); 763 } 764 } 765 if (children.isEmpty()) { 766 continue; 767 } 768 769 Set<String> defaultContentChildren = new LinkedHashSet<String>(children); 770 defaultContentChildren.retainAll(defaultContents); 771 if (defaultContentChildren.size() == 1) { 772 continue; 773 // If we're already down to the region level then it's OK not to have 774 // default contents. 775 } else if (! new LocaleIDParser().set(locale).getRegion().isEmpty()) { 776 continue; 777 } else if (defaultContentChildren.isEmpty()) { 778 Object possible = highestShared(locale, children); 779 errln("Locale has children but is missing default contents locale: " 780 + locale + ", children: " + children + "; possible fixes for children:\n" + possible); 781 } else { 782 errln("Locale has too many defaultContent locales!!: " 783 + locale + ", defaultContents: " 784 + defaultContentChildren); 785 } 786 } 787 788 // check that each default content locale is likely-subtag equivalent to 789 // its parent. 790 791 for (String locale : defaultContents) { 792 String maxLocale = LikelySubtags.maximize(locale, likelyData); 793 String localeParent = LocaleIDParser.getParent(locale); 794 String maxLocaleParent = LikelySubtags.maximize(localeParent, 795 likelyData); 796 if (locale.equals("ar_001")) { 797 logln("Known exception to likelyMax(locale=" + locale + ")" 798 + " == " + "likelyMax(defaultContent=" + localeParent 799 + ")"); 800 continue; 801 } 802 assertEquals("likelyMax(locale=" + locale + ")" + " == " 803 + "likelyMax(defaultContent=" + localeParent + ")", 804 maxLocaleParent, maxLocale); 805 } 806 807 } 808 highestShared(String parent, Set<String> children)809 private String highestShared(String parent, Set<String> children) { 810 M4<PathHeader, String, String, Boolean> data = ChainedMap.of(new TreeMap<PathHeader, Object>(), new TreeMap<String, Object>(), 811 new TreeMap<String, Object>(), Boolean.class); 812 CLDRFile parentFile = testInfo.getCLDRFile(parent, true); 813 PathHeader.Factory phf = PathHeader.getFactory(testInfo.getEnglish()); 814 for (String child : children) { 815 CLDRFile cldrFile = testInfo.getCLDRFile(child, false); 816 for (String path : cldrFile) { 817 if (path.contains("/identity")) { 818 continue; 819 } 820 if (path.contains("provisional") || path.contains("unconfirmed")) { 821 continue; 822 } 823 String value = cldrFile.getStringValue(path); 824 // double-check 825 String parentValue = parentFile.getStringValue(path); 826 if (value.equals(parentValue)) { 827 continue; 828 } 829 PathHeader ph = phf.fromPath(path); 830 data.put(ph, value, child, Boolean.TRUE); 831 data.put(ph, parentValue == null ? "∅∅∅" : parentValue, child, Boolean.TRUE); 832 } 833 } 834 StringBuilder result = new StringBuilder(); 835 for (Entry<PathHeader, Map<String, Map<String, Boolean>>> entry : data) { 836 for (Entry<String, Map<String, Boolean>> item : entry.getValue().entrySet()) { 837 result.append("\n") 838 .append(entry.getKey()) 839 .append("\t") 840 .append(item.getKey() + "\t" + item.getValue().keySet()); 841 } 842 } 843 return result.toString(); 844 } 845 846 public static class Inheritance { 847 public static final Set<String> defaultContents = SUPPLEMENTAL_DATA_INFO 848 .getDefaultContentLocales(); 849 public static final Multimap<String, String> parentToChildren; 850 851 static { 852 Multimap<String, String> _parentToChildren = TreeMultimap.create(); 853 for (String child : testInfo.getCldrFactory().getAvailable()) { 854 if (child.equals("root")) { 855 continue; 856 } 857 String localeParent = LocaleIDParser.getParent(child); _parentToChildren.put(localeParent, child)858 _parentToChildren.put(localeParent, child); 859 } 860 parentToChildren = ImmutableMultimap.copyOf(_parentToChildren); 861 } 862 showChain(String prefix, String gparent, String current)863 public static void showChain(String prefix, String gparent, String current) { 864 Collection<String> children = parentToChildren.get(current); 865 if (children == null) { 866 throw new IllegalArgumentException(); 867 } 868 prefix += current + (defaultContents.contains(current) ? "*" : "") 869 + (isLikelyEquivalent(gparent, current) ? "~" : "") + "\t"; 870 871 // find leaves 872 Set<String> parents = new LinkedHashSet<>(children); 873 parents.retainAll(parentToChildren.keySet()); 874 Set<String> leaves = new LinkedHashSet<>(children); 875 leaves.removeAll(parentToChildren.keySet()); 876 if (!leaves.isEmpty()) { 877 List<String> presentation = new ArrayList<>(); 878 boolean gotDc = false; 879 for (String s : leaves) { 880 String shown = s; 881 if (isLikelyEquivalent(current, s)) { 882 shown += "~"; 883 } 884 if (defaultContents.contains(s)) { 885 gotDc = true; 886 shown += "*"; 887 } 888 if (!shown.equals(s)) { 889 presentation.add(0, shown); 890 } else { 891 presentation.add(shown); 892 } 893 } 894 if (!gotDc) { 895 int debug = 0; 896 } 897 if (leaves.size() == 1) { 898 System.out.println(prefix + CollectionUtilities.join(presentation, " ")); 899 } else { 900 System.out.println(prefix + "{" + CollectionUtilities.join(presentation, " ") + "}"); 901 } 902 } 903 for (String parent : parents) { 904 showChain(prefix, current, parent); 905 } 906 } 907 isLikelyEquivalent(String locale1, String locale2)908 static boolean isLikelyEquivalent(String locale1, String locale2) { 909 if (locale1.equals(locale2)) { 910 return true; 911 } 912 try { 913 String maxLocale1 = LikelySubtags.maximize(locale1, likelyData); 914 String maxLocale2 = LikelySubtags.maximize(locale2, likelyData); 915 return maxLocale1 != null && Objects.equal(maxLocale1, maxLocale2); 916 } catch (Exception e) { 917 return false; 918 } 919 } 920 } 921 922 static final Map<String, String> likelyData = SUPPLEMENTAL_DATA_INFO 923 .getLikelySubtags(); 924 TestLikelySubtagsComplete()925 public void TestLikelySubtagsComplete() { 926 LanguageTagParser ltp = new LanguageTagParser(); 927 for (String locale : testInfo.getCldrFactory().getAvailable()) { 928 if (locale.equals("root")) { 929 continue; 930 } 931 String maxLocale = LikelySubtags.maximize(locale, likelyData); 932 if (maxLocale == null) { 933 errln("Locale missing likely subtag: " + locale); 934 continue; 935 } 936 ltp.set(maxLocale); 937 if (ltp.getLanguage().isEmpty() || ltp.getScript().isEmpty() 938 || ltp.getRegion().isEmpty()) { 939 errln("Locale has defective likely subtag: " + locale + " => " 940 + maxLocale); 941 } 942 } 943 } 944 showDifferences(String locale)945 private void showDifferences(String locale) { 946 CLDRFile cldrFile = testInfo.getCLDRFile(locale, false); 947 final String localeParent = LocaleIDParser.getParent(locale); 948 CLDRFile parentFile = testInfo.getCLDRFile(localeParent, true); 949 int funnyCount = 0; 950 for (Iterator<String> it = cldrFile.iterator("", 951 cldrFile.getComparator()); it.hasNext();) { 952 String path = it.next(); 953 if (path.contains("/identity")) { 954 continue; 955 } 956 final String fullXPath = cldrFile.getFullXPath(path); 957 if (fullXPath.contains("[@draft=\"unconfirmed\"]") 958 || fullXPath.contains("[@draft=\"provisional\"]")) { 959 funnyCount++; 960 continue; 961 } 962 logln("\tpath:\t" + path); 963 logln("\t\t" + locale + " value:\t<" 964 + cldrFile.getStringValue(path) + ">"); 965 final String parentFullPath = parentFile.getFullXPath(path); 966 logln("\t\t" + localeParent + " value:\t<" 967 + parentFile.getStringValue(path) + ">"); 968 logln("\t\t" + locale + " fullpath:\t" + fullXPath); 969 logln("\t\t" + localeParent + " fullpath:\t" + parentFullPath); 970 } 971 logln("\tCount of non-approved:\t" + funnyCount); 972 } 973 974 enum MissingType { 975 plurals, main_exemplars, no_main, collation, index_exemplars, punct_exemplars 976 } 977 TestCoreData()978 public void TestCoreData() { 979 Set<String> availableLanguages = testInfo.getCldrFactory() 980 .getAvailableLanguages(); 981 PluralInfo rootRules = SUPPLEMENTAL_DATA_INFO.getPlurals( 982 PluralType.cardinal, "root"); 983 EnumSet<MissingType> errors = EnumSet.of(MissingType.collation); 984 EnumSet<MissingType> warnings = EnumSet.of(MissingType.collation, 985 MissingType.index_exemplars, MissingType.punct_exemplars); 986 987 Set<String> collations = new HashSet<String>(); 988 989 // collect collation info 990 Factory collationFactory = Factory.make(CLDRPaths.COLLATION_DIRECTORY, 991 ".*", DraftStatus.contributed); 992 for (String localeID : collationFactory.getAvailable()) { 993 // if (localeID.equals("root")) { 994 // CLDRFile cldrFile = collationFactory.make(localeID, false, 995 // DraftStatus.contributed); 996 // for (String path : cldrFile) { 997 // if (path.startsWith("//ldml/collations")) { 998 // String fullPath = cldrFile.getFullXPath(path); 999 // String valid = parts.set(fullPath).getAttributeValue(1, 1000 // "validSubLocales"); 1001 // for (String validSub : valid.trim().split("\\s+")) { 1002 // if (isTopLevel(validSub)) { 1003 // collations.add(validSub); 1004 // } 1005 // } 1006 // break; // done with root 1007 // } 1008 // } 1009 // } else 1010 if (isTopLevel(localeID)) { 1011 collations.add(localeID); 1012 } 1013 } 1014 logln(collations.toString()); 1015 1016 Set<String> allLanguages = Builder.with(new TreeSet<String>()) 1017 .addAll(collations).addAll(availableLanguages).freeze(); 1018 1019 for (String localeID : allLanguages) { 1020 if (localeID.equals("root")) { 1021 continue; // skip script locales 1022 } 1023 if (!isTopLevel(localeID)) { 1024 continue; 1025 } 1026 1027 errors.clear(); 1028 warnings.clear(); 1029 1030 String name = "Locale:" + localeID + " (" 1031 + testInfo.getEnglish().getName(localeID) + ")"; 1032 1033 if (!collations.contains(localeID)) { 1034 warnings.add(MissingType.collation); 1035 logln(name + " is missing " + MissingType.collation.toString()); 1036 } 1037 1038 try { 1039 CLDRFile cldrFile = testInfo.getCldrFactory().make(localeID, 1040 false, DraftStatus.contributed); 1041 1042 String wholeFileAlias = cldrFile.getStringValue("//ldml/alias"); 1043 if (wholeFileAlias != null) { 1044 logln("Whole-file alias:" + name); 1045 continue; 1046 } 1047 1048 PluralInfo pluralInfo = SUPPLEMENTAL_DATA_INFO.getPlurals( 1049 PluralType.cardinal, localeID); 1050 if (pluralInfo == rootRules) { 1051 logln(name + " is missing " 1052 + MissingType.plurals.toString()); 1053 warnings.add(MissingType.plurals); 1054 } 1055 UnicodeSet main = cldrFile.getExemplarSet("", 1056 WinningChoice.WINNING); 1057 if (main == null || main.isEmpty()) { 1058 errln(" " + name + " is missing " 1059 + MissingType.main_exemplars.toString()); 1060 errors.add(MissingType.main_exemplars); 1061 } 1062 UnicodeSet index = cldrFile.getExemplarSet("index", 1063 WinningChoice.WINNING); 1064 if (index == null || index.isEmpty()) { 1065 logln(name + " is missing " 1066 + MissingType.index_exemplars.toString()); 1067 warnings.add(MissingType.index_exemplars); 1068 } 1069 UnicodeSet punctuation = cldrFile.getExemplarSet("punctuation", 1070 WinningChoice.WINNING); 1071 if (punctuation == null || punctuation.isEmpty()) { 1072 logln(name + " is missing " 1073 + MissingType.punct_exemplars.toString()); 1074 warnings.add(MissingType.punct_exemplars); 1075 } 1076 } catch (Exception e) { 1077 errln(" " + name + " is missing main locale data."); 1078 errors.add(MissingType.no_main); 1079 } 1080 1081 // report errors 1082 1083 if (errors.isEmpty() && warnings.isEmpty()) { 1084 logln(name + ": No problems..."); 1085 } 1086 } 1087 } 1088 isTopLevel(String localeID)1089 private boolean isTopLevel(String localeID) { 1090 return "root".equals(LocaleIDParser.getParent(localeID)); 1091 } 1092 1093 /** 1094 * Tests that every dtd item is connected from root 1095 */ TestDtdCompleteness()1096 public void TestDtdCompleteness() { 1097 for (DtdType type : DtdType.values()) { 1098 DtdData dtdData = DtdData.getInstance(type); 1099 Set<Element> descendents = new LinkedHashSet<Element>(); 1100 dtdData.getDescendents(dtdData.ROOT, descendents); 1101 Set<Element> elements = dtdData.getElements(); 1102 if (!elements.equals(descendents)) { 1103 for (Element e : elements) { 1104 if (!descendents.contains(e) && !e.equals(dtdData.PCDATA) 1105 && !e.equals(dtdData.ANY)) { 1106 errln(type + ": Element " + e 1107 + " not contained in descendents of ROOT."); 1108 } 1109 } 1110 for (Element e : descendents) { 1111 if (!elements.contains(e)) { 1112 errln(type + ": Element " + e 1113 + ", descendent of ROOT, not in elements."); 1114 } 1115 } 1116 } 1117 LinkedHashSet<Element> all = new LinkedHashSet<Element>(descendents); 1118 all.addAll(elements); 1119 Set<Attribute> attributes = dtdData.getAttributes(); 1120 for (Attribute a : attributes) { 1121 if (!elements.contains(a.element)) { 1122 errln(type + ": Attribute " + a + " isn't for any element."); 1123 } 1124 } 1125 } 1126 } 1127 TestBasicDTDCompatibility()1128 public void TestBasicDTDCompatibility() { 1129 1130 // Only run the rest in exhaustive mode, since it requires CLDR_ARCHIVE_DIRECTORY 1131 if (getInclusion() <= 5) { 1132 return; 1133 } 1134 1135 final String oldCommon = CLDRPaths.ARCHIVE_DIRECTORY + "/cldr-" + Versions.v22_1.toString() + "/common"; 1136 1137 // set up exceptions 1138 Set<String> changedToEmpty = new HashSet<String>( 1139 Arrays.asList(new String[] { "version", "languageCoverage", 1140 "scriptCoverage", "territoryCoverage", 1141 "currencyCoverage", "timezoneCoverage", 1142 "skipDefaultLocale" })); 1143 Set<String> PCDATA = new HashSet<String>(); 1144 PCDATA.add("PCDATA"); 1145 Set<String> EMPTY = new HashSet<String>(); 1146 EMPTY.add("EMPTY"); 1147 Set<String> VERSION = new HashSet<String>(); 1148 VERSION.add("version"); 1149 1150 // test all DTDs 1151 for (DtdType dtd : DtdType.values()) { 1152 try { 1153 ElementAttributeInfo oldDtd = ElementAttributeInfo.getInstance( 1154 oldCommon, dtd); 1155 ElementAttributeInfo newDtd = ElementAttributeInfo 1156 .getInstance(dtd); 1157 1158 if (oldDtd == newDtd) { 1159 continue; 1160 } 1161 Relation<String, String> oldElement2Children = oldDtd 1162 .getElement2Children(); 1163 Relation<String, String> newElement2Children = newDtd 1164 .getElement2Children(); 1165 1166 Relation<String, String> oldElement2Attributes = oldDtd 1167 .getElement2Attributes(); 1168 Relation<String, String> newElement2Attributes = newDtd 1169 .getElement2Attributes(); 1170 1171 for (String element : oldElement2Children.keySet()) { 1172 Set<String> oldChildren = oldElement2Children 1173 .getAll(element); 1174 Set<String> newChildren = newElement2Children 1175 .getAll(element); 1176 if (newChildren == null) { 1177 if (!knownElementExceptions.contains(Pair.of(dtd.toString(), element))) { 1178 errln("Old " + dtd + " contains element not in new: <" 1179 + element + ">"); 1180 } 1181 continue; 1182 } 1183 Set<String> funny = containsInOrder(newChildren, 1184 oldChildren); 1185 if (funny != null) { 1186 if (changedToEmpty.contains(element) 1187 && oldChildren.equals(PCDATA) 1188 && newChildren.equals(EMPTY)) { 1189 // ok, skip 1190 } else { 1191 errln("Old " + dtd + " element <" + element 1192 + "> has children Missing/Misordered:\t" 1193 + funny + "\n\t\tOld:\t" + oldChildren 1194 + "\n\t\tNew:\t" + newChildren); 1195 } 1196 } 1197 1198 Set<String> oldAttributes = oldElement2Attributes 1199 .getAll(element); 1200 if (oldAttributes == null) { 1201 oldAttributes = Collections.emptySet(); 1202 } 1203 Set<String> newAttributes = newElement2Attributes 1204 .getAll(element); 1205 if (newAttributes == null) { 1206 newAttributes = Collections.emptySet(); 1207 } 1208 if (!newAttributes.containsAll(oldAttributes)) { 1209 LinkedHashSet<String> missing = new LinkedHashSet<String>( 1210 oldAttributes); 1211 missing.removeAll(newAttributes); 1212 if (element.equals(dtd.toString()) 1213 && missing.equals(VERSION)) { 1214 // ok, skip 1215 } else { 1216 errln("Old " + dtd + " element <" + element 1217 + "> has attributes Missing:\t" + missing 1218 + "\n\t\tOld:\t" + oldAttributes 1219 + "\n\t\tNew:\t" + newAttributes); 1220 } 1221 } 1222 } 1223 } catch (Exception e) { 1224 e.printStackTrace(); 1225 errln("Failure with " + dtd); 1226 } 1227 } 1228 } 1229 containsInOrder(Set<T> superset, Set<T> subset)1230 private <T> Set<T> containsInOrder(Set<T> superset, Set<T> subset) { 1231 if (!superset.containsAll(subset)) { 1232 LinkedHashSet<T> missing = new LinkedHashSet<T>(subset); 1233 missing.removeAll(superset); 1234 return missing; 1235 } 1236 // ok, we know that they are subsets, try order 1237 Set<T> result = null; 1238 DiscreteComparator<T> comp = new DiscreteComparator.Builder<T>( 1239 Ordering.ARBITRARY).add(superset).get(); 1240 T last = null; 1241 for (T item : subset) { 1242 if (last != null) { 1243 int order = comp.compare(last, item); 1244 if (order != -1) { 1245 if (result == null) { 1246 result = new HashSet<T>(); 1247 result.add(last); 1248 result.add(item); 1249 } 1250 } 1251 } 1252 last = item; 1253 } 1254 return result; 1255 } 1256 TestDtdCompatibility()1257 public void TestDtdCompatibility() { 1258 1259 for (DtdType type : DtdType.values()) { 1260 DtdData dtdData = DtdData.getInstance(type); 1261 Map<String, Element> currentElementFromName = dtdData 1262 .getElementFromName(); 1263 1264 // current has no orphan 1265 Set<Element> orphans = new LinkedHashSet<Element>(dtdData 1266 .getElementFromName().values()); 1267 orphans.remove(dtdData.ROOT); 1268 orphans.remove(dtdData.PCDATA); 1269 orphans.remove(dtdData.ANY); 1270 Set<String> elementsWithoutAlt = new TreeSet<String>(); 1271 Set<String> elementsWithoutDraft = new TreeSet<String>(); 1272 Set<String> elementsWithoutAlias = new TreeSet<String>(); 1273 Set<String> elementsWithoutSpecial = new TreeSet<String>(); 1274 1275 for (Element element : dtdData.getElementFromName().values()) { 1276 Set<Element> children = element.getChildren().keySet(); 1277 orphans.removeAll(children); 1278 if (type == DtdType.ldml 1279 && !SUPPLEMENTAL_DATA_INFO.isDeprecated(type, 1280 element.name, "*", "*")) { 1281 if (element.getType() == ElementType.PCDATA) { 1282 if (element.getAttributeNamed("alt") == null) { 1283 elementsWithoutAlt.add(element.name); 1284 } 1285 if (element.getAttributeNamed("draft") == null) { 1286 elementsWithoutDraft.add(element.name); 1287 } 1288 } else { 1289 if (children.size() != 0 && !"alias".equals(element.name)) { 1290 if (element.getChildNamed("alias") == null) { 1291 elementsWithoutAlias.add(element.name); 1292 } 1293 if (element.getChildNamed("special") == null) { 1294 elementsWithoutSpecial.add(element.name); 1295 } 1296 } 1297 } 1298 } 1299 } 1300 assertEquals(type + " DTD Must not have orphan elements", 1301 Collections.EMPTY_SET, orphans); 1302 assertEquals(type 1303 + " DTD elements with PCDATA must have 'alt' attributes", 1304 Collections.EMPTY_SET, elementsWithoutAlt); 1305 assertEquals(type 1306 + " DTD elements with PCDATA must have 'draft' attributes", 1307 Collections.EMPTY_SET, elementsWithoutDraft); 1308 assertEquals(type 1309 + " DTD elements with children must have 'alias' elements", 1310 Collections.EMPTY_SET, elementsWithoutAlias); 1311 assertEquals( 1312 type 1313 + " DTD elements with children must have 'special' elements", 1314 Collections.EMPTY_SET, elementsWithoutSpecial); 1315 1316 // Only run the rest in exhaustive mode, since it requires CLDR_ARCHIVE_DIRECTORY 1317 if (getInclusion() <= 5) { 1318 return; 1319 } 1320 1321 for (Versions version : Versions.values()) { 1322 if (version == Versions.trunk) { 1323 continue; 1324 } else if (version == Versions.v1_1_1) { 1325 break; 1326 } 1327 DtdData dtdDataOld; 1328 try { 1329 dtdDataOld = DtdData.getInstance(type, version.toString()); 1330 } catch (IllegalArgumentException e) { 1331 boolean tooOld = false; 1332 switch (type) { 1333 case ldmlBCP47: 1334 case ldmlICU: 1335 tooOld = version.compareTo(Versions.v1_7_2) >= 0; 1336 break; 1337 case keyboard: 1338 case platform: 1339 tooOld = version.compareTo(Versions.v22_1) >= 0; 1340 break; 1341 default: 1342 break; 1343 } 1344 if (tooOld) { 1345 continue; 1346 } else { 1347 throw e; 1348 } 1349 } 1350 // verify that if E is in dtdDataOld, then it is in dtdData, and 1351 // has at least the same children and attributes 1352 for (Entry<String, Element> entry : dtdDataOld 1353 .getElementFromName().entrySet()) { 1354 Element oldElement = entry.getValue(); 1355 Element newElement = currentElementFromName.get(entry 1356 .getKey()); 1357 if (knownElementExceptions.contains(Pair.of(type.toString(), oldElement.getName()))) { 1358 continue; 1359 } 1360 if (assertNotNull(type 1361 + " DTD for trunk must be superset of v" + version 1362 + ", and must contain «" + oldElement.getName() 1363 + "»", newElement)) { 1364 // TODO Check order also 1365 for (Element oldChild : oldElement.getChildren() 1366 .keySet()) { 1367 if (oldChild == null) { 1368 continue; 1369 } 1370 Element newChild = newElement 1371 .getChildNamed(oldChild.getName()); 1372 1373 if (knownChildExceptions.contains(Pair.of(newElement.getName(), oldChild.getName()))) { 1374 continue; 1375 } 1376 assertNotNull( 1377 type + " DTD - Children of «" 1378 + newElement.getName() 1379 + "» must be superset of v" 1380 + version + ", and must contain «" 1381 + oldChild.getName() + "»", 1382 newChild); 1383 } 1384 for (Attribute oldAttribute : oldElement 1385 .getAttributes().keySet()) { 1386 Attribute newAttribute = newElement 1387 .getAttributeNamed(oldAttribute.getName()); 1388 1389 if (knownAttributeExceptions.contains(Pair.of(newElement.getName(), oldAttribute.getName()))) { 1390 continue; 1391 } 1392 assertNotNull( 1393 type + " DTD - Attributes of «" 1394 + newElement.getName() 1395 + "» must be superset of v" 1396 + version + ", and must contain «" 1397 + oldAttribute.getName() + "»", 1398 newAttribute); 1399 1400 } 1401 } 1402 } 1403 } 1404 } 1405 } 1406 1407 /** 1408 * Compare each path to each other path for every single file in CLDR 1409 */ TestDtdComparison()1410 public void TestDtdComparison() { 1411 // try some simple paths for regression 1412 1413 sortPaths( 1414 DtdData.getInstance(DtdType.ldml).getDtdComparator(null), 1415 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/dateTimeFormatLength[@type=\"full\"]/dateTimeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1416 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats"); 1417 1418 sortPaths( 1419 DtdData.getInstance(DtdType.supplementalData).getDtdComparator( 1420 null), 1421 "//supplementalData/territoryContainment/group[@type=\"419\"][@contains=\"013 029 005\"][@grouping=\"true\"]", 1422 "//supplementalData/territoryContainment/group[@type=\"003\"][@contains=\"021 013 029\"][@grouping=\"true\"]"); 1423 1424 //checkDtdComparatorForResource("TestBasic_ja.xml", DtdType.ldmlICU); 1425 } 1426 TestDtdComparisonsAll()1427 public void TestDtdComparisonsAll() { 1428 if (getInclusion() <= 5) { // Only run this test in exhaustive mode. 1429 return; 1430 } 1431 for (File file : CLDRConfig.getInstance().getAllCLDRFilesEndingWith(".xml")) { 1432 checkDtdComparatorFor(file, null); 1433 } 1434 } 1435 checkDtdComparatorForResource(String fileToRead, DtdType overrideDtdType)1436 public void checkDtdComparatorForResource(String fileToRead, 1437 DtdType overrideDtdType) { 1438 MyHandler myHandler = new MyHandler(overrideDtdType); 1439 XMLFileReader xfr = new XMLFileReader().setHandler(myHandler); 1440 try { 1441 myHandler.fileName = fileToRead; 1442 xfr.read(myHandler.fileName, TestBasic.class, -1, true); 1443 logln(myHandler.fileName); 1444 } catch (Exception e) { 1445 Throwable t = e; 1446 StringBuilder b = new StringBuilder(); 1447 String indent = ""; 1448 while (t != null) { 1449 b.append(indent).append(t.getMessage()); 1450 indent = indent.isEmpty() ? "\n\t\t" : indent + "\t"; 1451 t = t.getCause(); 1452 } 1453 errln(b.toString()); 1454 return; 1455 } 1456 DtdData dtdData = DtdData.getInstance(myHandler.dtdType); 1457 sortPaths(dtdData.getDtdComparator(null), myHandler.data); 1458 } 1459 checkDtdComparatorFor(File fileToRead, DtdType overrideDtdType)1460 public void checkDtdComparatorFor(File fileToRead, DtdType overrideDtdType) { 1461 MyHandler myHandler = new MyHandler(overrideDtdType); 1462 XMLFileReader xfr = new XMLFileReader().setHandler(myHandler); 1463 try { 1464 myHandler.fileName = fileToRead.getCanonicalPath(); 1465 xfr.read(myHandler.fileName, -1, true); 1466 logln(myHandler.fileName); 1467 } catch (Exception e) { 1468 Throwable t = e; 1469 StringBuilder b = new StringBuilder(); 1470 String indent = ""; 1471 while (t != null) { 1472 b.append(indent).append(t.getMessage()); 1473 indent = indent.isEmpty() ? "\n\t\t" : indent + "\t"; 1474 t = t.getCause(); 1475 } 1476 errln(b.toString()); 1477 return; 1478 } 1479 DtdData dtdData = DtdData.getInstance(myHandler.dtdType); 1480 sortPaths(dtdData.getDtdComparator(null), myHandler.data); 1481 } 1482 1483 static class MyHandler extends XMLFileReader.SimpleHandler { 1484 private String fileName; 1485 private DtdType dtdType; 1486 private final Set<String> data = new LinkedHashSet<>(); 1487 MyHandler(DtdType overrideDtdType)1488 public MyHandler(DtdType overrideDtdType) { 1489 dtdType = overrideDtdType; 1490 } 1491 handlePathValue(String path, String value)1492 public void handlePathValue(String path, String value) { 1493 if (dtdType == null) { 1494 try { 1495 dtdType = DtdType.fromPath(path); 1496 } catch (Exception e) { 1497 throw new IllegalArgumentException( 1498 "Can't read " + fileName, e); 1499 } 1500 } 1501 data.add(path); 1502 } 1503 } 1504 sortPaths(Comparator<String> dc, Collection<String> paths)1505 public void sortPaths(Comparator<String> dc, Collection<String> paths) { 1506 String[] array = paths.toArray(new String[paths.size()]); 1507 sortPaths(dc, array); 1508 } 1509 sortPaths(Comparator<String> dc, String... array)1510 public void sortPaths(Comparator<String> dc, String... array) { 1511 Arrays.sort(array, 0, array.length, dc); 1512 } 1513 // public void TestNewDtdData() moved to TestDtdData 1514 } 1515