1 package org.unicode.cldr.util; 2 3 import java.io.File; 4 import java.io.StringReader; 5 import java.util.ArrayList; 6 import java.util.Arrays; 7 import java.util.Collection; 8 import java.util.Collections; 9 import java.util.Comparator; 10 import java.util.HashMap; 11 import java.util.HashSet; 12 import java.util.Iterator; 13 import java.util.LinkedHashMap; 14 import java.util.LinkedHashSet; 15 import java.util.List; 16 import java.util.Locale; 17 import java.util.Map; 18 import java.util.Map.Entry; 19 import java.util.Set; 20 import java.util.TreeMap; 21 import java.util.concurrent.ConcurrentHashMap; 22 import java.util.concurrent.ConcurrentMap; 23 import java.util.regex.Pattern; 24 25 import com.google.common.base.CharMatcher; 26 import com.google.common.base.Joiner; 27 import com.google.common.base.Splitter; 28 import com.google.common.collect.ImmutableSet; 29 import com.google.common.collect.ImmutableSet.Builder; 30 import com.google.common.collect.ImmutableSetMultimap; 31 import com.google.common.collect.Multimap; 32 import com.google.common.collect.TreeMultimap; 33 import com.ibm.icu.impl.Relation; 34 import com.ibm.icu.text.Transform; 35 36 /** 37 * An immutable object that contains the structure of a DTD. 38 * @author markdavis 39 */ 40 public class DtdData extends XMLFileReader.SimpleHandler { 41 private static final String COMMENT_PREFIX = System.lineSeparator() + " "; 42 private static final boolean SHOW_ALL = CldrUtility.getProperty("show_all", false); 43 private static final boolean USE_SYNTHESIZED = false; 44 45 private static final boolean DEBUG = false; 46 private static final Pattern FILLER = PatternCache.get("[^-a-zA-Z0-9#_:]"); 47 48 private final Relation<String, Attribute> nameToAttributes = Relation.of(new TreeMap<String, Set<Attribute>>(), LinkedHashSet.class); 49 private Map<String, Element> nameToElement = new HashMap<>(); 50 private MapComparator<String> elementComparator; 51 private MapComparator<String> attributeComparator; 52 53 public final Element ROOT; 54 public final Element PCDATA = elementFrom("#PCDATA"); 55 public final Element ANY = elementFrom("ANY"); 56 public final DtdType dtdType; 57 public final String version; 58 private Element lastElement; 59 private Attribute lastAttribute; 60 private Set<String> preCommentCache; 61 private DtdComparator dtdComparator; 62 63 public enum AttributeStatus { 64 distinguished ("§d"), 65 value ("§v"), 66 metadata ("§m︎"); 67 public final String shortName; AttributeStatus(String shortName)68 AttributeStatus(String shortName) { 69 this.shortName = shortName; 70 } getShortName(AttributeStatus status)71 public static String getShortName(AttributeStatus status) { 72 return status == null ? "" : status.shortName; 73 } 74 } 75 76 public enum Mode { 77 REQUIRED("#REQUIRED"), OPTIONAL("#IMPLIED"), FIXED("#FIXED"), NULL("null"); 78 79 public final String source; 80 Mode(String s)81 Mode(String s) { 82 source = s; 83 } 84 forString(String mode)85 public static Mode forString(String mode) { 86 for (Mode value : Mode.values()) { 87 if (value.source.equals(mode)) { 88 return value; 89 } 90 } 91 if (mode == null) { 92 return NULL; 93 } 94 throw new IllegalArgumentException(mode); 95 } 96 } 97 98 public enum AttributeType { 99 CDATA, ID, IDREF, IDREFS, ENTITY, ENTITIES, NMTOKEN, NMTOKENS, ENUMERATED_TYPE 100 } 101 102 static final Set<String> DRAFT_ON_NON_LEAF_ALLOWED = ImmutableSet.of("collation", "transform", "unitPreferenceData", "rulesetGrouping"); 103 104 public static class Attribute implements Named { 105 private static final Joiner JOINER_COMMA_SPACE = Joiner.on(", "); 106 public static final String AUG_TRAIL = "⟫"; 107 public static final String AUG_LEAD = "⟪"; 108 public static final String ENUM_TRAIL = "⟩"; 109 public static final String ENUM_LEAD = "⟨"; 110 public static final Pattern LEAD_TRAIL = Pattern.compile("(.*[" + AUG_LEAD + ENUM_LEAD + "])(.*)([" + AUG_TRAIL + ENUM_TRAIL + "].*)"); 111 public final String name; 112 public final Element element; 113 public final Mode mode; 114 public final String defaultValue; 115 public final AttributeType type; 116 public final Map<String, Integer> values; 117 private final Set<String> commentsPre; 118 private Set<String> commentsPost; 119 private boolean isDeprecatedAttribute; 120 public AttributeStatus attributeStatus = AttributeStatus.distinguished; // default unless reset by annotations 121 private Set<String> deprecatedValues = Collections.emptySet(); 122 public MatchValue matchValue; 123 private final Comparator<String> attributeValueComparator; 124 Attribute(DtdType dtdType, Element element2, String aName, Mode mode2, String[] split, String value2, Set<String> firstComment)125 private Attribute(DtdType dtdType, Element element2, String aName, Mode mode2, String[] split, String value2, Set<String> firstComment) { 126 commentsPre = firstComment; 127 element = element2; 128 name = aName.intern(); 129 if (name.equals("draft") // normally never permitted on elements with children, but special cases... 130 && !DRAFT_ON_NON_LEAF_ALLOWED.contains(element.getName())) { 131 int elementChildrenCount = element.getChildren().size(); 132 if (elementChildrenCount > 1 133 || elementChildrenCount == 1 && !element.getChildren().keySet().iterator().next().getName().equals("cp")) { 134 isDeprecatedAttribute = true; 135 if (DEBUG) { 136 System.out.println(element.getName() + ":" + element.getChildren()); 137 } 138 } 139 } 140 mode = mode2; 141 defaultValue = value2 == null ? null 142 : value2.intern(); 143 AttributeType _type = AttributeType.ENUMERATED_TYPE; 144 Map<String, Integer> _values = Collections.emptyMap(); 145 if (split.length == 1) { 146 try { 147 _type = AttributeType.valueOf(split[0]); 148 } catch (Exception e) { 149 } 150 } 151 type = _type; 152 153 if (_type == AttributeType.ENUMERATED_TYPE) { 154 LinkedHashMap<String, Integer> temp = new LinkedHashMap<>(); 155 for (String part : split) { 156 if (part.length() != 0) { 157 temp.put(part.intern(), temp.size()); 158 } 159 } 160 _values = Collections.unmodifiableMap(temp); 161 } 162 values = _values; 163 attributeValueComparator = getAttributeValueComparator(dtdType, element.name, name); 164 } 165 166 @Override toString()167 public String toString() { 168 return element.name + ":" + name; 169 } 170 getSampleValue()171 public String getSampleValue() { 172 return type == AttributeType.ENUMERATED_TYPE ? (values.containsKey("year") ? "year" : values.keySet().iterator().next()) 173 : matchValue != null ? matchValue.getSample() 174 : MatchValue.DEFAULT_SAMPLE; 175 } 176 appendDtdString(StringBuilder b)177 public StringBuilder appendDtdString(StringBuilder b) { 178 Attribute a = this; 179 b.append("<!ATTLIST " + element.name + " " + a.name); 180 boolean first; 181 if (a.type == AttributeType.ENUMERATED_TYPE) { 182 b.append(" ("); 183 first = true; 184 for (String s : a.values.keySet()) { 185 if (deprecatedValues.contains(s)) { 186 continue; 187 } 188 if (first) { 189 first = false; 190 } else { 191 b.append(" | "); 192 } 193 b.append(s); 194 } 195 b.append(")"); 196 } else { 197 b.append(' ').append(a.type); 198 } 199 if (a.mode != Mode.NULL) { 200 b.append(" ").append(a.mode.source); 201 } 202 if (a.defaultValue != null) { 203 b.append(" \"").append(a.defaultValue).append('"'); 204 } 205 b.append(" >"); 206 return b; 207 } 208 features()209 public String features() { 210 return (type == AttributeType.ENUMERATED_TYPE ? values.keySet().toString() : type.toString()) 211 + (mode == Mode.NULL ? "" : ", mode=" + mode) 212 + (defaultValue == null ? "" : ", default=" + defaultValue); 213 } 214 215 @Override getName()216 public String getName() { 217 return name; 218 } 219 220 private static Splitter COMMA = Splitter.on(',').trimResults(); 221 addComment(String commentIn)222 public void addComment(String commentIn) { 223 if (commentIn.startsWith("@")) { 224 // there are exactly 2 cases: deprecated and ordered 225 switch (commentIn) { 226 case "@METADATA": 227 attributeStatus = AttributeStatus.metadata; 228 break; 229 case "@VALUE": 230 attributeStatus = AttributeStatus.value; 231 break; 232 case "@DEPRECATED": 233 isDeprecatedAttribute = true; 234 break; 235 default: 236 int colonPos = commentIn.indexOf(':'); 237 if (colonPos < 0) { 238 throw new IllegalArgumentException("Unrecognized annotation: " + commentIn); 239 } 240 String command = commentIn.substring(0, colonPos); 241 String argument = commentIn.substring(colonPos + 1); 242 switch(command) { 243 case "@DEPRECATED": 244 deprecatedValues = Collections.unmodifiableSet(new HashSet<>(COMMA.splitToList(argument))); 245 break; 246 case "@MATCH": 247 if (matchValue != null) { 248 throw new IllegalArgumentException("Conflicting @MATCH: " + matchValue.getName() + " & " + argument); 249 } 250 matchValue = MatchValue.of(argument); 251 break; 252 default: 253 throw new IllegalArgumentException("Unrecognized annotation: " + commentIn); 254 } 255 } 256 return; 257 } 258 commentsPost = addUnmodifiable(commentsPost, commentIn.trim()); 259 } 260 261 /** 262 * Special version of identity; only considers name and name of element 263 */ 264 @Override equals(Object obj)265 public boolean equals(Object obj) { 266 if (!(obj instanceof Attribute)) { 267 return false; 268 } 269 Attribute that = (Attribute) obj; 270 return name.equals(that.name) 271 && element.name.equals(that.element.name) // don't use plain element: circularity 272 // not relevant to identity 273 // && Objects.equals(comment, that.comment) 274 // && mode.equals(that.mode) 275 // && Objects.equals(defaultValue, that.defaultValue) 276 // && type.equals(that.type) 277 // && values.equals(that.values) 278 ; 279 } 280 281 /** 282 * Special version of identity; only considers name and name of element 283 */ 284 @Override hashCode()285 public int hashCode() { 286 return name.hashCode() * 37 287 + element.name.hashCode() // don't use plain element: circularity 288 // not relevant to identity 289 // ) * 37 + Objects.hashCode(comment)) * 37 290 // + mode.hashCode()) * 37 291 // + Objects.hashCode(defaultValue)) * 37 292 // + type.hashCode()) * 37 293 // + values.hashCode() 294 ; 295 } 296 isDeprecated()297 public boolean isDeprecated() { 298 return isDeprecatedAttribute; 299 } 300 isDeprecatedValue(String value)301 public boolean isDeprecatedValue(String value) { 302 return deprecatedValues.contains(value); 303 } 304 getStatus()305 public AttributeStatus getStatus() { 306 return attributeStatus; 307 } 308 getValueStatus(String value)309 public ValueStatus getValueStatus(String value) { 310 return deprecatedValues.contains(value) ? ValueStatus.invalid 311 : type == AttributeType.ENUMERATED_TYPE ? (values.containsKey(value) ? ValueStatus.valid : ValueStatus.invalid) 312 : matchValue == null ? ValueStatus.unknown 313 : matchValue.is(value) ? ValueStatus.valid 314 : ValueStatus.invalid; 315 } 316 getMatchString()317 public String getMatchString() { 318 return type == AttributeType.ENUMERATED_TYPE ? ENUM_LEAD + JOINER_COMMA_SPACE.join(values.keySet()) + ENUM_TRAIL 319 : matchValue != null ? AUG_LEAD + matchValue.toString() + AUG_TRAIL 320 : ""; 321 } 322 getMatchingName(Map<Attribute, Integer> attributes)323 public Attribute getMatchingName(Map<Attribute, Integer> attributes) { 324 for (Attribute attribute : attributes.keySet()) { 325 if (name.equals(attribute.getName())) { 326 return attribute; 327 } 328 } 329 return null; 330 } 331 332 } 333 334 public enum ValueStatus {invalid, unknown, valid} 335 DtdData(DtdType type, String version)336 private DtdData(DtdType type, String version) { 337 this.dtdType = type; 338 this.ROOT = elementFrom(type.rootType.toString()); 339 this.version = version; 340 } 341 addAttribute(String eName, String aName, String type, String mode, String value)342 private void addAttribute(String eName, String aName, String type, String mode, String value) { 343 Attribute a = new Attribute(dtdType, nameToElement.get(eName), aName, Mode.forString(mode), FILLER.split(type), value, preCommentCache); 344 preCommentCache = null; 345 getAttributesFromName().put(aName, a); 346 CldrUtility.putNew(a.element.attributes, a, a.element.attributes.size()); 347 lastElement = null; 348 lastAttribute = a; 349 } 350 351 public enum ElementType { 352 EMPTY, ANY, PCDATA("(#PCDATA)"), CHILDREN; 353 public final String source; 354 ElementType(String s)355 private ElementType(String s) { 356 source = s; 357 } 358 ElementType()359 private ElementType() { 360 source = name(); 361 } 362 } 363 364 interface Named { getName()365 String getName(); 366 } 367 368 public enum ElementStatus { 369 regular, metadata 370 } 371 372 public static class Element implements Named { 373 public final String name; 374 private String rawModel; 375 private ElementType type; 376 private final Map<Element, Integer> children = new LinkedHashMap<>(); 377 private final Map<Attribute, Integer> attributes = new LinkedHashMap<>(); 378 private Set<String> commentsPre; 379 private Set<String> commentsPost; 380 private String model; 381 private boolean isOrderedElement; 382 private boolean isDeprecatedElement; 383 private ElementStatus elementStatus = ElementStatus.regular; 384 Element(String name2)385 private Element(String name2) { 386 name = name2.intern(); 387 } 388 setChildren(DtdData dtdData, String model, Set<String> precomments)389 private void setChildren(DtdData dtdData, String model, Set<String> precomments) { 390 this.commentsPre = precomments; 391 rawModel = model; 392 this.model = clean(model); 393 if (model.equals("EMPTY")) { 394 type = ElementType.EMPTY; 395 return; 396 } 397 type = ElementType.CHILDREN; 398 for (String part : FILLER.split(model)) { 399 if (part.length() != 0) { 400 if (part.equals("#PCDATA")) { 401 type = ElementType.PCDATA; 402 } else if (part.equals("ANY")) { 403 type = ElementType.ANY; 404 } else { 405 CldrUtility.putNew(children, dtdData.elementFrom(part), children.size()); 406 } 407 } 408 } 409 if ((type == ElementType.CHILDREN) == (children.size() == 0) 410 && !model.startsWith("(#PCDATA|cp")) { 411 throw new IllegalArgumentException("CLDR does not permit Mixed content. " + name + ":" + model); 412 } 413 } 414 415 static final Pattern CLEANER1 = PatternCache.get("([,|(])(?=\\S)"); 416 static final Pattern CLEANER2 = PatternCache.get("(?=\\S)([|)])"); 417 clean(String model2)418 private String clean(String model2) { 419 // (x) -> ( x ); 420 // x,y -> x, y 421 // x|y -> x | y 422 String result = CLEANER1.matcher(model2).replaceAll("$1 "); 423 result = CLEANER2.matcher(result).replaceAll(" $1"); 424 return result.equals(model2) 425 ? model2 426 : result; // for debugging 427 } 428 containsAttribute(String string)429 public boolean containsAttribute(String string) { 430 for (Attribute a : attributes.keySet()) { 431 if (a.name.equals(string)) { 432 return true; 433 } 434 } 435 return false; 436 } 437 438 @Override toString()439 public String toString() { 440 return name; 441 } 442 toDtdString()443 public String toDtdString() { 444 return "<!ELEMENT " + name + " " + getRawModel() + " >"; 445 } 446 getType()447 public ElementType getType() { 448 return type; 449 } 450 getChildren()451 public Map<Element, Integer> getChildren() { 452 return Collections.unmodifiableMap(children); 453 } 454 getAttributes()455 public Map<Attribute, Integer> getAttributes() { 456 return Collections.unmodifiableMap(attributes); 457 } 458 459 @Override getName()460 public String getName() { 461 return name; 462 } 463 getChildNamed(String string)464 public Element getChildNamed(String string) { 465 for (Element e : children.keySet()) { 466 if (e.name.equals(string)) { 467 return e; 468 } 469 } 470 return null; 471 } 472 getAttributeNamed(String string)473 public Attribute getAttributeNamed(String string) { 474 for (Attribute a : attributes.keySet()) { 475 if (a.name.equals(string)) { 476 return a; 477 } 478 } 479 return null; 480 } 481 addComment(String addition)482 public void addComment(String addition) { 483 if (addition.startsWith("@")) { 484 // there are exactly 3 cases: deprecated, ordered, and metadata 485 switch (addition) { 486 case "@ORDERED": 487 isOrderedElement = true; 488 break; 489 case "@DEPRECATED": 490 isDeprecatedElement = true; 491 break; 492 case "@METADATA": 493 elementStatus = ElementStatus.metadata; 494 break; 495 default: 496 throw new IllegalArgumentException("Unrecognized annotation: " + addition); 497 } 498 return; 499 } 500 commentsPost = addUnmodifiable(commentsPost, addition.trim()); 501 } 502 503 /** 504 * Special version of equals. Only the name is considered in the identity. 505 */ 506 @Override equals(Object obj)507 public boolean equals(Object obj) { 508 if (!(obj instanceof Element)) { 509 return false; 510 } 511 Element that = (Element) obj; 512 return name.equals(that.name) 513 // not relevant to the identity of the object 514 // && Objects.equals(comment, that.comment) 515 // && type == that.type 516 // && attributes.equals(that.attributes) 517 // && children.equals(that.children) 518 ; 519 } 520 521 /** 522 * Special version of hashcode. Only the name is considered in the identity. 523 */ 524 @Override hashCode()525 public int hashCode() { 526 return name.hashCode() 527 // not relevant to the identity of the object 528 // * 37 + Objects.hashCode(comment) 529 //) * 37 + Objects.hashCode(type) 530 // ) * 37 + attributes.hashCode() 531 // ) * 37 + children.hashCode() 532 ; 533 } 534 isDeprecated()535 public boolean isDeprecated() { 536 return isDeprecatedElement; 537 } 538 isOrdered()539 public boolean isOrdered() { 540 return isOrderedElement; 541 } 542 getElementStatus()543 public ElementStatus getElementStatus() { 544 return elementStatus; 545 } 546 547 /** 548 * @return the rawModel 549 */ getRawModel()550 public String getRawModel() { 551 return rawModel; 552 } 553 } 554 elementFrom(String name)555 private Element elementFrom(String name) { 556 Element result = nameToElement.get(name); 557 if (result == null) { 558 nameToElement.put(name, result = new Element(name)); 559 } 560 return result; 561 } 562 addElement(String name2, String model)563 private void addElement(String name2, String model) { 564 Element element = elementFrom(name2); 565 element.setChildren(this, model, preCommentCache); 566 preCommentCache = null; 567 lastElement = element; 568 lastAttribute = null; 569 } 570 addComment(String comment)571 private void addComment(String comment) { 572 comment = comment.trim(); 573 if (preCommentCache != null || comment.startsWith("#")) { // the precomments are "sticky" 574 if (comment.startsWith("@")) { 575 throw new IllegalArgumentException("@ annotation comment must follow element or attribute, without intervening # comment"); 576 } 577 preCommentCache = addUnmodifiable(preCommentCache, comment); 578 } else if (lastElement != null) { 579 lastElement.addComment(comment); 580 } else if (lastAttribute != null) { 581 lastAttribute.addComment(comment); 582 } else { 583 if (comment.startsWith("@")) { 584 throw new IllegalArgumentException("@ annotation comment must follow element or attribute, without intervening # comment"); 585 } 586 preCommentCache = addUnmodifiable(preCommentCache, comment); 587 } 588 } 589 590 // TODO hide this 591 /** 592 * @deprecated 593 */ 594 @Deprecated 595 @Override handleElementDecl(String name, String model)596 public void handleElementDecl(String name, String model) { 597 if (SHOW_ALL) { 598 // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, listPatterns?, collations?, posix?, segmentations?, rbnf?, annotations?, metadata?, references?, special*))) > 599 System.out.println(System.lineSeparator() + "<!ELEMENT " + name + " " + model + " >"); 600 } 601 addElement(name, model); 602 } 603 604 // TODO hide this 605 /** 606 * @deprecated 607 */ 608 @Deprecated 609 @Override handleStartDtd(String name, String publicId, String systemId)610 public void handleStartDtd(String name, String publicId, String systemId) { 611 DtdType explicitDtdType = DtdType.valueOf(name); 612 if (explicitDtdType != dtdType && explicitDtdType != dtdType.rootType) { 613 throw new IllegalArgumentException("Mismatch in dtdTypes"); 614 } 615 } 616 617 /** 618 * @deprecated 619 */ 620 @Deprecated 621 @Override handleAttributeDecl(String eName, String aName, String type, String mode, String value)622 public void handleAttributeDecl(String eName, String aName, String type, String mode, String value) { 623 if (SHOW_ALL) { 624 // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false ) #IMPLIED > 625 // <!ATTLIST version number CDATA #REQUIRED > 626 // <!ATTLIST version cldrVersion CDATA #FIXED "27" > 627 628 System.out.println("<!ATTLIST " + eName 629 + " " + aName 630 + " " + type 631 + " " + mode 632 + (value == null ? "" : " \"" + value + "\"") 633 + " >"); 634 } 635 // HACK for 1.1.1 636 if (eName.equals("draft")) { 637 eName = "week"; 638 } 639 addAttribute(eName, aName, type, mode, value); 640 } 641 642 /** 643 * @deprecated 644 */ 645 @Deprecated 646 @Override handleComment(String path, String comment)647 public void handleComment(String path, String comment) { 648 if (SHOW_ALL) { 649 // <!-- true and false are deprecated. --> 650 System.out.println("<!-- " + comment.trim() + " -->"); 651 } 652 addComment(comment); 653 } 654 655 // TODO hide this 656 /** 657 * @deprecated 658 */ 659 @Deprecated 660 @Override handleEndDtd()661 public void handleEndDtd() { 662 throw new XMLFileReader.AbortException(); 663 } 664 665 /** 666 * Note that it always gets the trunk version 667 * @deprecated depends on static config, use {@link DtdData#getInstance(DtdType, File)} instead 668 */ 669 @Deprecated getInstance(DtdType type)670 public static DtdData getInstance(DtdType type) { 671 return getInstance(type, CLDRConfig.getInstance().getCldrBaseDirectory()); 672 } 673 674 /** 675 * Special form using version, used only by tests, etc. 676 */ getInstance(DtdType type, String version)677 public static DtdData getInstance(DtdType type, String version) { 678 File directory = version == null ? CLDRConfig.getInstance().getCldrBaseDirectory() 679 : new File(CLDRPaths.ARCHIVE_DIRECTORY + "/cldr-" + version); 680 681 return getInstance(type, version, directory); 682 } 683 684 private static final ConcurrentMap<Pair<DtdType, File>, DtdData> CACHE = new ConcurrentHashMap<>(); 685 686 /** 687 * Normal version of DtdData 688 * Get a DtdData, given the CLDR root directory. 689 * @param type which DtdType to return 690 * @param directory the CLDR Root directory, which contains the "common" directory. 691 * @return 692 */ getInstance(DtdType type, File directory)693 public static DtdData getInstance(DtdType type, File directory) { 694 Pair<DtdType, File> key = new Pair<>(type, directory); 695 DtdData data = CACHE.computeIfAbsent(key, k -> getInstance(type, null, directory)); 696 return data; 697 } 698 getInstance(DtdType type, String version, File directory)699 private static DtdData getInstance(DtdType type, String version, File directory) { 700 DtdData simpleHandler = new DtdData(type, version); 701 XMLFileReader xfr = new XMLFileReader().setHandler(simpleHandler); 702 if (type != type.rootType) { 703 // read the real first, then add onto it. 704 readFile(type.rootType, xfr, directory); 705 } 706 readFile(type, xfr, directory); 707 // HACK 708 if (type == DtdType.ldmlICU) { 709 Element special = simpleHandler.nameToElement.get("special"); 710 for (String extraElementName : Arrays.asList( 711 "icu:breakIteratorData", 712 "icu:UCARules", 713 "icu:scripts", 714 "icu:transforms", 715 "icu:ruleBasedNumberFormats", 716 "icu:isLeapMonth", 717 "icu:version", 718 "icu:breakDictionaryData", 719 "icu:depends")) { 720 Element extraElement = simpleHandler.nameToElement.get(extraElementName); 721 special.children.put(extraElement, special.children.size()); 722 } 723 } 724 if (simpleHandler.ROOT.children.size() == 0) { 725 throw new IllegalArgumentException(); // should never happen 726 } 727 simpleHandler.finish(); 728 simpleHandler.freeze(); 729 return simpleHandler; 730 } 731 finish()732 private void finish() { 733 dtdComparator = new DtdComparator(); 734 } 735 readFile(DtdType type, XMLFileReader xfr, File directory)736 public static void readFile(DtdType type, XMLFileReader xfr, File directory) { 737 File file = new File(directory, type.dtdPath); 738 StringReader s = new StringReader("<?xml version='1.0' encoding='UTF-8' ?>" 739 + "<!DOCTYPE " + type 740 + " SYSTEM '" + file.getAbsolutePath() + "'>"); 741 xfr.read(type.toString(), s, -1, true); // DTD_TYPE_TO_FILE.get(type) 742 } 743 freeze()744 private void freeze() { 745 if (version == null) { // only generate for new versions 746 MergeLists<String> elementMergeList = new MergeLists<>(); 747 elementMergeList.add(dtdType.toString()); 748 MergeLists<String> attributeMergeList = new MergeLists<>(); 749 attributeMergeList.add("_q"); 750 751 for (Element element : nameToElement.values()) { 752 if (element.children.size() > 0) { 753 Collection<String> names = getNames(element.children.keySet()); 754 elementMergeList.add(names); 755 if (DEBUG) { 756 System.out.println(element.getName() + "\t→\t" + names); 757 } 758 } 759 if (element.attributes.size() > 0) { 760 Collection<String> names = getNames(element.attributes.keySet()); 761 attributeMergeList.add(names); 762 if (DEBUG) { 763 System.out.println(element.getName() + "\t→\t@" + names); 764 } 765 } 766 } 767 List<String> elementList = elementMergeList.merge(); 768 List<String> attributeList = attributeMergeList.merge(); 769 if (DEBUG) { 770 System.out.println("Element Ordering:\t" + elementList); 771 System.out.println("Attribute Ordering:\t" + attributeList); 772 } 773 elementComparator = new MapComparator<>(elementList).setErrorOnMissing(true).freeze(); 774 attributeComparator = new MapComparator<>(attributeList).setErrorOnMissing(true).freeze(); 775 } 776 nameToAttributes.freeze(); 777 nameToElement = Collections.unmodifiableMap(nameToElement); 778 } 779 getNames(Collection<? extends Named> keySet)780 private Collection<String> getNames(Collection<? extends Named> keySet) { 781 List<String> result = new ArrayList<>(); 782 for (Named e : keySet) { 783 result.add(e.getName()); 784 } 785 return result; 786 } 787 788 public enum DtdItem { 789 ELEMENT, ATTRIBUTE, ATTRIBUTE_VALUE 790 } 791 792 public interface AttributeValueComparator { compare(String element, String attribute, String value1, String value2)793 public int compare(String element, String attribute, String value1, String value2); 794 } 795 getDtdComparator(AttributeValueComparator avc)796 public Comparator<String> getDtdComparator(AttributeValueComparator avc) { 797 return dtdComparator; 798 } 799 getDtdComparator()800 public DtdComparator getDtdComparator() { 801 return dtdComparator; 802 } 803 804 public class DtdComparator implements Comparator<String> { 805 @Override compare(String path1, String path2)806 public int compare(String path1, String path2) { 807 XPathParts a = XPathParts.getFrozenInstance(path1); 808 XPathParts b = XPathParts.getFrozenInstance(path2); 809 return xpathComparator(a, b); 810 } 811 xpathComparator(XPathParts a, XPathParts b)812 public int xpathComparator(XPathParts a, XPathParts b) { 813 // there must always be at least one element 814 String baseA = a.getElement(0); 815 String baseB = b.getElement(0); 816 if (!ROOT.name.equals(baseA) || !ROOT.name.equals(baseB)) { 817 throw new IllegalArgumentException("Comparing different DTDs: " + ROOT.name + ", " + baseA + ", " + baseB); 818 } 819 int min = Math.min(a.size(), b.size()); 820 Element parent = ROOT; 821 Element elementA; 822 for (int i = 1; i < min; ++i, parent = elementA) { 823 // add extra test for "fake" elements, used in diffing. they always start with _ 824 String elementRawA = a.getElement(i); 825 String elementRawB = b.getElement(i); 826 if (elementRawA.startsWith("_")) { 827 return elementRawB.startsWith("_") ? elementRawA.compareTo(elementRawB) : -1; 828 } else if (elementRawB.startsWith("_")) { 829 return 1; 830 } 831 // 832 elementA = nameToElement.get(elementRawA); 833 Element elementB = nameToElement.get(elementRawB); 834 if (elementA != elementB) { 835 int aa = parent.children.get(elementA); 836 int bb = parent.children.get(elementB); 837 return aa - bb; 838 } 839 int countA = a.getAttributeCount(i); 840 int countB = b.getAttributeCount(i); 841 if (countA == 0 && countB == 0) { 842 continue; 843 } 844 // we have two ways to compare the attributes. One based on the dtd, 845 // and one based on explicit comparators 846 847 // at this point the elements are the same and correspond to elementA 848 // in the dtd 849 850 // Handle the special added elements 851 String aqValue = a.getAttributeValue(i, "_q"); 852 if (aqValue != null) { 853 String bqValue = b.getAttributeValue(i, "_q"); 854 if (!aqValue.equals(bqValue)) { 855 int aValue = Integer.parseInt(aqValue); 856 int bValue = Integer.parseInt(bqValue); 857 return aValue - bValue; 858 } 859 --countA; 860 --countB; 861 } 862 863 attributes: for (Entry<Attribute, Integer> attr : elementA.attributes.entrySet()) { 864 Attribute main = attr.getKey(); 865 String valueA = a.getAttributeValue(i, main.name); 866 String valueB = b.getAttributeValue(i, main.name); 867 if (valueA == null) { 868 if (valueB != null) { 869 return -1; 870 } 871 } else if (valueB == null) { 872 return 1; 873 } else if (valueA.equals(valueB)) { 874 --countA; 875 --countB; 876 if (countA == 0 && countB == 0) { 877 break attributes; 878 } 879 continue; // TODO 880 } else if (main.attributeValueComparator != null) { 881 return main.attributeValueComparator.compare(valueA, valueB); 882 } else if (main.values.size() != 0) { 883 int aa = main.values.get(valueA); 884 int bb = main.values.get(valueB); 885 return aa - bb; 886 } else { 887 return valueA.compareTo(valueB); 888 } 889 } 890 if (countA != 0 || countB != 0) { 891 throw new IllegalArgumentException(); 892 } 893 } 894 return a.size() - b.size(); 895 } 896 } 897 getAttributeComparator()898 public MapComparator<String> getAttributeComparator() { 899 return attributeComparator; 900 } 901 902 getElementComparator()903 public MapComparator<String> getElementComparator() { 904 return elementComparator; 905 } 906 getAttributesFromName()907 public Relation<String, Attribute> getAttributesFromName() { 908 return nameToAttributes; 909 } 910 getElementFromName()911 public Map<String, Element> getElementFromName() { 912 return nameToElement; 913 } 914 915 @Override toString()916 public String toString() { 917 StringBuilder b = new StringBuilder(); 918 // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, listPatterns?, collations?, posix?, segmentations?, rbnf?, metadata?, references?, special*))) > 919 // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false ) #IMPLIED > <!-- true and false are deprecated. --> 920 Seen seen = new Seen(dtdType); 921 seen.seenElements.add(ANY); 922 seen.seenElements.add(PCDATA); 923 toString(ROOT, b, seen); 924 925 // Hack for ldmlIcu: catch the items that are not mentioned in the original 926 int currentEnd = b.length(); 927 for (Element e : nameToElement.values()) { 928 toString(e, b, seen); 929 } 930 if (currentEnd != b.length()) { 931 b.insert(currentEnd, 932 System.lineSeparator() + System.lineSeparator() 933 + "<!-- Elements not reachable from root! -->" 934 + System.lineSeparator()); 935 } 936 return b.toString(); 937 } 938 939 static final class Seen { 940 Set<Element> seenElements = new HashSet<>(); 941 Set<Attribute> seenAttributes = new HashSet<>(); 942 Seen(DtdType dtdType)943 public Seen(DtdType dtdType) { 944 if (dtdType.rootType == dtdType) { 945 return; 946 } 947 DtdData otherData = DtdData.getInstance(dtdType.rootType); 948 walk(otherData, otherData.ROOT); 949 seenElements.remove(otherData.nameToElement.get("special")); 950 } 951 walk(DtdData otherData, Element current)952 private void walk(DtdData otherData, Element current) { 953 seenElements.add(current); 954 seenAttributes.addAll(current.attributes.keySet()); 955 for (Element e : current.children.keySet()) { 956 walk(otherData, e); 957 } 958 } 959 } 960 getDescendents(Element start, Set<Element> toAddTo)961 public Set<Element> getDescendents(Element start, Set<Element> toAddTo) { 962 if (!toAddTo.contains(start)) { 963 toAddTo.add(start); 964 for (Element e : start.children.keySet()) { 965 getDescendents(e, toAddTo); 966 } 967 } 968 return toAddTo; 969 } 970 toString(Element current, StringBuilder b, Seen seen)971 private void toString(Element current, StringBuilder b, Seen seen) { 972 boolean first = true; 973 if (seen.seenElements.contains(current)) { 974 return; 975 } 976 seen.seenElements.add(current); 977 boolean elementDeprecated = isDeprecated(current.name, "*", "*"); 978 979 showComments(b, current.commentsPre, true); 980 b.append("\n\n<!ELEMENT " + current.name + " " + current.model + " >"); 981 if (USE_SYNTHESIZED) { 982 Element aliasElement = getElementFromName().get("alias"); 983 //b.append(current.rawChildren); 984 if (!current.children.isEmpty()) { 985 LinkedHashSet<Element> elements = new LinkedHashSet<>(current.children.keySet()); 986 boolean hasAlias = aliasElement != null && elements.remove(aliasElement); 987 //boolean hasSpecial = specialElement != null && elements.remove(specialElement); 988 if (hasAlias) { 989 b.append("(alias |"); 990 } 991 b.append("("); 992 // <!ELEMENT transformNames ( alias | (transformName | special)* ) > 993 // <!ELEMENT layout ( alias | (orientation*, inList*, inText*, special*) ) > 994 995 for (Element e : elements) { 996 if (first) { 997 first = false; 998 } else { 999 b.append(", "); 1000 } 1001 b.append(e.name); 1002 if (e.type != ElementType.PCDATA) { 1003 b.append("*"); 1004 } 1005 } 1006 if (hasAlias) { 1007 b.append(")"); 1008 } 1009 b.append(")"); 1010 } else { 1011 b.append(current.type == null ? "???" : current.type.source); 1012 } 1013 b.append(">"); 1014 } 1015 showComments(b, current.commentsPost, false); 1016 if (isOrdered(current.name)) { 1017 b.append(COMMENT_PREFIX + "<!--@ORDERED-->"); 1018 } 1019 if (current.getElementStatus() != ElementStatus.regular) { 1020 b.append(COMMENT_PREFIX + "<!--@" 1021 + current.getElementStatus().toString().toUpperCase(Locale.ROOT) 1022 + "-->"); 1023 } 1024 if (elementDeprecated) { 1025 b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->"); 1026 } 1027 1028 LinkedHashSet<String> deprecatedValues = new LinkedHashSet<>(); 1029 1030 for (Attribute a : current.attributes.keySet()) { 1031 if (seen.seenAttributes.contains(a)) { 1032 continue; 1033 } 1034 seen.seenAttributes.add(a); 1035 boolean attributeDeprecated = elementDeprecated || isDeprecated(current.name, a.name, "*"); 1036 1037 deprecatedValues.clear(); 1038 1039 showComments(b, a.commentsPre, true); 1040 b.append("\n<!ATTLIST " + current.name + " " + a.name); 1041 if (a.type == AttributeType.ENUMERATED_TYPE) { 1042 b.append(" ("); 1043 first = true; 1044 for (String s : a.values.keySet()) { 1045 if (first) { 1046 first = false; 1047 } else { 1048 b.append(" | "); 1049 } 1050 b.append(s); 1051 if (!attributeDeprecated && isDeprecated(current.name, a.name, s)) { 1052 deprecatedValues.add(s); 1053 } 1054 } 1055 b.append(")"); 1056 } else { 1057 b.append(' ').append(a.type); 1058 } 1059 if (a.mode != Mode.NULL) { 1060 b.append(" ").append(a.mode.source); 1061 } 1062 if (a.defaultValue != null) { 1063 b.append(" \"").append(a.defaultValue).append('"'); 1064 } 1065 b.append(" >"); 1066 showComments(b, a.commentsPost, false); 1067 // if (attributeDeprecated != deprecatedComment) { 1068 // System.out.println("*** BAD DEPRECATION ***" + a); 1069 // } 1070 if (a.matchValue != null) { 1071 b.append(COMMENT_PREFIX + "<!--@MATCH:" + a.matchValue.getName() + "-->"); 1072 } 1073 if (METADATA.contains(a.name) || a.attributeStatus == AttributeStatus.metadata) { 1074 b.append(COMMENT_PREFIX + "<!--@METADATA-->"); 1075 } else if (!isDistinguishing(current.name, a.name)) { 1076 b.append(COMMENT_PREFIX + "<!--@VALUE-->"); 1077 } 1078 if (attributeDeprecated) { 1079 b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->"); 1080 } else if (!deprecatedValues.isEmpty()) { 1081 b.append(COMMENT_PREFIX + "<!--@DEPRECATED:" + Joiner.on(", ") 1082 .join(deprecatedValues) + "-->"); 1083 } 1084 } 1085 if (current.children.size() > 0) { 1086 for (Element e : current.children.keySet()) { 1087 toString(e, b, seen); 1088 } 1089 } 1090 } 1091 showComments(StringBuilder b, Set<String> comments, boolean separate)1092 private void showComments(StringBuilder b, Set<String> comments, boolean separate) { 1093 if (comments == null) { 1094 return; 1095 } 1096 if (separate && b.length() != 0) { 1097 b.append(System.lineSeparator()); 1098 } 1099 for (String c : comments) { 1100 boolean deprecatedComment = false; // the following served its purpose... c.toLowerCase(Locale.ENGLISH).contains("deprecat"); 1101 if (!deprecatedComment) { 1102 if (separate) { 1103 // special handling for very first comment 1104 if (b.length() == 0) { 1105 b.append("<!--") 1106 .append(System.lineSeparator()) 1107 .append(c) 1108 .append(System.lineSeparator()) 1109 .append("-->"); 1110 continue; 1111 } 1112 b.append(System.lineSeparator()); 1113 } else { 1114 b.append(COMMENT_PREFIX); 1115 } 1116 b.append("<!-- ").append(c).append(" -->"); 1117 } 1118 } 1119 } 1120 removeFirst(Collection<T> elements, Transform<T, Boolean> matcher)1121 public static <T> T removeFirst(Collection<T> elements, Transform<T, Boolean> matcher) { 1122 for (Iterator<T> it = elements.iterator(); it.hasNext();) { 1123 T item = it.next(); 1124 if (matcher.transform(item) == Boolean.TRUE) { 1125 it.remove(); 1126 return item; 1127 } 1128 } 1129 return null; 1130 } 1131 getElements()1132 public Set<Element> getElements() { 1133 return new LinkedHashSet<>(nameToElement.values()); 1134 } 1135 getAttributes()1136 public Set<Attribute> getAttributes() { 1137 return new LinkedHashSet<>(nameToAttributes.values()); 1138 } 1139 isDistinguishing(String elementName, String attribute)1140 public boolean isDistinguishing(String elementName, String attribute) { 1141 return getAttributeStatus(elementName, attribute) == AttributeStatus.distinguished; 1142 } 1143 1144 static final Set<String> METADATA = new HashSet<>(Arrays.asList("references", "standard", "draft")); 1145 addUnmodifiable(Set<String> comment, String addition)1146 static final Set<String> addUnmodifiable(Set<String> comment, String addition) { 1147 if (comment == null) { 1148 return Collections.singleton(addition); 1149 } else { 1150 comment = new LinkedHashSet<>(comment); 1151 comment.add(addition); 1152 return Collections.unmodifiableSet(comment); 1153 } 1154 } 1155 1156 public class IllegalByDtdException extends RuntimeException { 1157 private static final long serialVersionUID = 1L; 1158 public final String elementName; 1159 public final String attributeName; 1160 public final String attributeValue; 1161 IllegalByDtdException(String elementName, String attributeName, String attributeValue)1162 public IllegalByDtdException(String elementName, String attributeName, String attributeValue) { 1163 this.elementName = elementName; 1164 this.attributeName = attributeName; 1165 this.attributeValue = attributeValue; 1166 } 1167 1168 @Override getMessage()1169 public String getMessage() { 1170 return "Dtd " + dtdType 1171 + " doesn’t allow " 1172 + "element=" + elementName 1173 + (attributeName == null ? "" : ", attribute: " + attributeName) 1174 + (attributeValue == null ? "" : ", attributeValue: " + attributeValue); 1175 } 1176 } 1177 1178 //@SuppressWarnings("unused") isDeprecated(String elementName, String attributeName, String attributeValue)1179 public boolean isDeprecated(String elementName, String attributeName, String attributeValue) { 1180 Element element = nameToElement.get(elementName); 1181 if (element == null) { 1182 throw new IllegalByDtdException(elementName, attributeName, attributeValue); 1183 } else if (element.isDeprecatedElement) { 1184 return true; 1185 } 1186 if ("*".equals(attributeName) || "_q".equals(attributeName)) { 1187 return false; 1188 } 1189 Attribute attribute = element.getAttributeNamed(attributeName); 1190 if (attribute == null) { 1191 throw new IllegalByDtdException(elementName, attributeName, attributeValue); 1192 } else if (attribute.isDeprecatedAttribute) { 1193 return true; 1194 } 1195 return attribute.deprecatedValues.contains(attributeValue); // don't need special test for "*" 1196 } 1197 isOrdered(String elementName)1198 public boolean isOrdered(String elementName) { 1199 Element element = nameToElement.get(elementName); 1200 if (element == null) { 1201 if (elementName.startsWith("icu:")) { 1202 return false; 1203 } 1204 throw new IllegalByDtdException(elementName, null, null); 1205 } 1206 return element.isOrderedElement; 1207 } 1208 getAttributeStatus(String elementName, String attributeName)1209 public AttributeStatus getAttributeStatus(String elementName, String attributeName) { 1210 if ("_q".equals(attributeName)) { 1211 return AttributeStatus.distinguished; // special case 1212 } 1213 Element element = nameToElement.get(elementName); 1214 if (element == null) { 1215 if (elementName.startsWith("icu:")) { 1216 return AttributeStatus.distinguished; 1217 } 1218 throw new IllegalByDtdException(elementName, attributeName, null); 1219 } 1220 Attribute attribute = element.getAttributeNamed(attributeName); 1221 if (attribute == null) { 1222 if (elementName.startsWith("icu:")) { 1223 return AttributeStatus.distinguished; 1224 } 1225 throw new IllegalByDtdException(elementName, attributeName, null); 1226 } 1227 return attribute.attributeStatus; 1228 } 1229 1230 // The default is a map comparator, which compares numbers as numbers, and strings with UCA 1231 private static MapComparator<String> valueOrdering = new MapComparator<String>().setErrorOnMissing(false).freeze(); 1232 1233 static MapComparator<String> dayValueOrder = new MapComparator<String>().add( 1234 "sun", "mon", "tue", "wed", "thu", "fri", "sat").freeze(); 1235 static MapComparator<String> dayPeriodOrder = new MapComparator<String>().add( 1236 "midnight", "am", "noon", "pm", 1237 "morning1", "morning2", "afternoon1", "afternoon2", "evening1", "evening2", "night1", "night2", 1238 // The ones on the following line are no longer used actively. Can be removed later? 1239 "earlyMorning", "morning", "midDay", "afternoon", "evening", "night", "weeHours").freeze(); 1240 static MapComparator<String> listPatternOrder = new MapComparator<String>().add( 1241 "start", "middle", "end", "2", "3").freeze(); 1242 static MapComparator<String> widthOrder = new MapComparator<String>().add( 1243 "abbreviated", "narrow", "short", "wide", "all").freeze(); 1244 static MapComparator<String> lengthOrder = new MapComparator<String>().add( 1245 "full", "long", "medium", "short").freeze(); 1246 static MapComparator<String> dateFieldOrder = new MapComparator<String>().add( 1247 "era", "era-short", "era-narrow", 1248 "year", "year-short", "year-narrow", 1249 "quarter", "quarter-short", "quarter-narrow", 1250 "month", "month-short", "month-narrow", 1251 "week", "week-short", "week-narrow", 1252 "weekOfMonth", "weekOfMonth-short", "weekOfMonth-narrow", 1253 "day", "day-short", "day-narrow", 1254 "dayOfYear", "dayOfYear-short", "dayOfYear-narrow", 1255 "weekday", "weekday-short", "weekday-narrow", 1256 "weekdayOfMonth", "weekdayOfMonth-short", "weekdayOfMonth-narrow", 1257 "sun", "sun-short", "sun-narrow", 1258 "mon", "mon-short", "mon-narrow", 1259 "tue", "tue-short", "tue-narrow", 1260 "wed", "wed-short", "wed-narrow", 1261 "thu", "thu-short", "thu-narrow", 1262 "fri", "fri-short", "fri-narrow", 1263 "sat", "sat-short", "sat-narrow", 1264 "dayperiod-short", "dayperiod", "dayperiod-narrow", 1265 "hour", "hour-short", "hour-narrow", 1266 "minute", "minute-short", "minute-narrow", 1267 "second", "second-short", "second-narrow", 1268 "zone", "zone-short", "zone-narrow").freeze(); 1269 1270 /* TODO: change this to be data-file driven. Can do with new Unit preferences info; also put them in a more meaningful order (metric vs other; size) */ 1271 1272 public static final MapComparator<String> unitOrder = new MapComparator<String>().add( 1273 "acceleration-g-force", "acceleration-meter-per-square-second", 1274 "angle-revolution", "angle-radian", "angle-degree", "angle-arc-minute", "angle-arc-second", 1275 "area-square-kilometer", "area-hectare", "area-square-meter", "area-square-centimeter", 1276 "area-square-mile", "area-acre", "area-square-yard", "area-square-foot", "area-square-inch", 1277 "area-dunam", 1278 "concentr-karat", 1279 "concentr-milligram-per-deciliter", "concentr-millimole-per-liter", 1280 "concentr-item", 1281 "concentr-portion", 1282 "concentr-permillion", "concentr-percent", "concentr-permille", "concentr-permyriad", 1283 "concentr-mole", 1284 "consumption-liter-per-kilometer", "consumption-liter-per-100-kilometer", 1285 "consumption-mile-per-gallon", "consumption-mile-per-gallon-imperial", 1286 "digital-petabyte", "digital-terabyte", "digital-terabit", "digital-gigabyte", "digital-gigabit", 1287 "digital-megabyte", "digital-megabit", "digital-kilobyte", "digital-kilobit", 1288 "digital-byte", "digital-bit", 1289 "duration-century", "duration-decade", 1290 "duration-year", "duration-year-person", 1291 "duration-month", "duration-month-person", 1292 "duration-week", "duration-week-person", 1293 "duration-day", "duration-day-person", 1294 "duration-hour", "duration-minute", "duration-second", 1295 "duration-millisecond", "duration-microsecond", "duration-nanosecond", 1296 "electric-ampere", "electric-milliampere", "electric-ohm", "electric-volt", 1297 "energy-kilocalorie", "energy-calorie", "energy-foodcalorie", "energy-kilojoule", "energy-joule", "energy-kilowatt-hour", 1298 "energy-electronvolt", 1299 "energy-british-thermal-unit", 1300 "energy-therm-us", 1301 "force-pound-force", 1302 "force-newton", 1303 "frequency-gigahertz", "frequency-megahertz", "frequency-kilohertz", "frequency-hertz", 1304 "graphics-em", "graphics-pixel", "graphics-megapixel", 1305 "graphics-pixel-per-centimeter", "graphics-pixel-per-inch", 1306 "graphics-dot-per-centimeter", "graphics-dot-per-inch", 1307 "graphics-dot", 1308 "length-earth-radius", 1309 "length-100-kilometer", 1310 "length-kilometer", "length-meter", "length-decimeter", "length-centimeter", 1311 "length-millimeter", "length-micrometer", "length-nanometer", "length-picometer", 1312 "length-mile", "length-yard", "length-foot", "length-inch", 1313 "length-parsec", "length-light-year", "length-astronomical-unit", 1314 "length-furlong", "length-fathom", 1315 "length-nautical-mile", "length-mile-scandinavian", 1316 "length-point", 1317 "length-solar-radius", 1318 "light-lux", 1319 "light-candela", 1320 "light-lumen", 1321 "light-solar-luminosity", 1322 "mass-metric-ton", "mass-kilogram", "mass-gram", "mass-milligram", "mass-microgram", 1323 "mass-ton", "mass-stone", "mass-pound", "mass-ounce", 1324 "mass-ounce-troy", "mass-carat", 1325 "mass-dalton", 1326 "mass-earth-mass", 1327 "mass-solar-mass", 1328 1329 "mass-grain", 1330 1331 "power-gigawatt", "power-megawatt", "power-kilowatt", "power-watt", "power-milliwatt", 1332 "power-horsepower", 1333 "pressure-millimeter-ofhg", 1334 "pressure-ofhg", 1335 "pressure-pound-force-per-square-inch", "pressure-inch-ofhg", "pressure-bar", "pressure-millibar", "pressure-atmosphere", 1336 "pressure-pascal", 1337 "pressure-hectopascal", 1338 "pressure-kilopascal", 1339 "pressure-megapascal", 1340 "speed-kilometer-per-hour", "speed-meter-per-second", "speed-mile-per-hour", "speed-knot", 1341 "temperature-generic", "temperature-celsius", "temperature-fahrenheit", "temperature-kelvin", 1342 "torque-pound-force-foot", 1343 "torque-newton-meter", 1344 "volume-cubic-kilometer", "volume-cubic-meter", "volume-cubic-centimeter", 1345 "volume-cubic-mile", "volume-cubic-yard", "volume-cubic-foot", "volume-cubic-inch", 1346 "volume-megaliter", "volume-hectoliter", "volume-liter", "volume-deciliter", "volume-centiliter", "volume-milliliter", 1347 "volume-pint-metric", "volume-cup-metric", 1348 "volume-acre-foot", 1349 "volume-bushel", "volume-gallon", "volume-gallon-imperial", "volume-quart", "volume-pint", "volume-cup", 1350 "volume-fluid-ounce", "volume-fluid-ounce-imperial", "volume-tablespoon", "volume-teaspoon", 1351 "volume-barrel", 1352 1353 "volume-dessert-spoon", 1354 "volume-dessert-spoon-imperial", 1355 "volume-drop", 1356 "volume-dram", 1357 "volume-jigger", 1358 "volume-pinch", 1359 "volume-quart-imperial" 1360 // "volume-pint-imperial" 1361 ).freeze(); 1362 1363 static MapComparator<String> countValueOrder = new MapComparator<String>().add( 1364 "0", "1", "zero", "one", "two", "few", "many", "other").freeze(); 1365 static MapComparator<String> unitLengthOrder = new MapComparator<String>().add( 1366 "long", "short", "narrow").freeze(); 1367 static MapComparator<String> currencyFormatOrder = new MapComparator<String>().add( 1368 "standard", "accounting").freeze(); 1369 static Comparator<String> zoneOrder = StandardCodes.make().getTZIDComparator(); 1370 1371 static final Comparator<String> COMP = (Comparator) CLDRConfig.getInstance().getCollator(); 1372 1373 // Hack for US 1374 static final Comparator<String> UNICODE_SET_COMPARATOR = new Comparator<String>() { 1375 @Override 1376 public int compare(String o1, String o2) { 1377 if (o1.contains("{")) { 1378 o1 = o1.replace("{", ""); 1379 } 1380 if (o2.contains("{")) { 1381 o2 = o2.replace("{", ""); 1382 } 1383 return COMP.compare(o1, o2); 1384 } 1385 1386 }; 1387 getAttributeValueComparator(String element, String attribute)1388 public static Comparator<String> getAttributeValueComparator(String element, String attribute) { 1389 return getAttributeValueComparator(DtdType.ldml, element, attribute); 1390 } 1391 getAttributeValueComparator(DtdType type, String element, String attribute)1392 static Comparator<String> getAttributeValueComparator(DtdType type, String element, String attribute) { 1393 // The default is a map comparator, which compares numbers as numbers, and strings with UCA 1394 Comparator<String> comp = valueOrdering; 1395 if (type != DtdType.ldml && type != DtdType.ldmlICU) { 1396 return comp; 1397 } 1398 if (attribute.equals("day")) { // && (element.startsWith("weekend") 1399 comp = dayValueOrder; 1400 } else if (attribute.equals("type")) { 1401 if (element.endsWith("FormatLength")) { 1402 comp = lengthOrder; 1403 } else if (element.endsWith("Width")) { 1404 comp = widthOrder; 1405 } else if (element.equals("day")) { 1406 comp = dayValueOrder; 1407 } else if (element.equals("field")) { 1408 comp = dateFieldOrder; 1409 } else if (element.equals("zone")) { 1410 comp = zoneOrder; 1411 } else if (element.equals("listPatternPart")) { 1412 comp = listPatternOrder; 1413 } else if (element.equals("currencyFormat")) { 1414 comp = currencyFormatOrder; 1415 } else if (element.equals("unitLength")) { 1416 comp = unitLengthOrder; 1417 } else if (element.equals("unit")) { 1418 comp = unitOrder; 1419 } else if (element.equals("dayPeriod")) { 1420 comp = dayPeriodOrder; 1421 } 1422 } else if (attribute.equals("count") && !element.equals("minDays")) { 1423 comp = countValueOrder; 1424 } else if (attribute.equals("cp") && element.equals("annotation")) { 1425 comp = UNICODE_SET_COMPARATOR; 1426 } 1427 return comp; 1428 } 1429 1430 /** 1431 * Comparator for attributes in CLDR files 1432 */ 1433 private static AttributeValueComparator ldmlAvc = new AttributeValueComparator() { 1434 @Override 1435 public int compare(String element, String attribute, String value1, String value2) { 1436 Comparator<String> comp = getAttributeValueComparator(element, attribute); 1437 return comp.compare(value1, value2); 1438 } 1439 }; 1440 hasValue(String elementName)1441 public boolean hasValue(String elementName) { 1442 return nameToElement.get(elementName).type == ElementType.PCDATA; 1443 } 1444 isMetadata(XPathParts pathPlain)1445 public boolean isMetadata(XPathParts pathPlain) { 1446 for (String s : pathPlain.getElements()) { 1447 Element e = getElementFromName().get(s); 1448 if (e.elementStatus == ElementStatus.metadata) { 1449 return true; 1450 } 1451 } 1452 return false; 1453 } 1454 isMetadataOld(DtdType dtdType2, XPathParts pathPlain)1455 public static boolean isMetadataOld(DtdType dtdType2, XPathParts pathPlain) { 1456 // TODO Don't use hard-coded list; instead add to DTD annotations 1457 final String element1 = pathPlain.getElement(1); 1458 final String element2 = pathPlain.getElement(2); 1459 final String elementN = pathPlain.getElement(-1); 1460 switch (dtdType2) { 1461 case ldml: 1462 switch (element1) { 1463 case "generation": 1464 case "metadata": 1465 return true; 1466 } 1467 break; 1468 case ldmlBCP47: 1469 switch (element1) { 1470 case "generation": 1471 case "version": 1472 return true; 1473 } 1474 break; 1475 ////supplementalData/transforms/transform[@source="am"][@target="am_FONIPA"][@direction="forward"]/comment 1476 case supplementalData: 1477 // these are NOT under /metadata/ but are actually metadata 1478 switch (element1) { 1479 case "generation": 1480 case "version": 1481 case "validity": 1482 case "references": 1483 case "coverageLevels": 1484 return true; 1485 case "transforms": 1486 return elementN.equals("comment"); 1487 case "metadata": 1488 // these ARE under /metadata/, but many others under /metadata/ are NOT actually metadata. 1489 switch (element2) { 1490 case "validity": 1491 case "serialElements": 1492 case "suppress": 1493 case "distinguishing": 1494 case "blocking": 1495 case "casingData": 1496 return true; 1497 } 1498 break; 1499 } 1500 break; 1501 default: 1502 } 1503 return false; 1504 } 1505 isDeprecated(XPathParts pathPlain)1506 public boolean isDeprecated(XPathParts pathPlain) { 1507 for (int i = 0; i < pathPlain.size(); ++i) { 1508 String elementName = pathPlain.getElement(i); 1509 if (isDeprecated(elementName, "*", null)) { 1510 return true; 1511 } 1512 for (String attribute : pathPlain.getAttributeKeys(i)) { 1513 String attributeValue = pathPlain.getAttributeValue(i, attribute); 1514 if (isDeprecated(elementName, attribute, attributeValue)) { 1515 return true; 1516 } 1517 } 1518 } 1519 return false; 1520 } 1521 1522 public final static Splitter SPACE_SPLITTER = Splitter.on(CharMatcher.whitespace()).trimResults().omitEmptyStrings(); 1523 public final static Splitter BAR_SPLITTER = Splitter.on('|').trimResults().omitEmptyStrings(); 1524 public final static Splitter CR_SPLITTER = Splitter.on(CharMatcher.anyOf("\n\r")).trimResults().omitEmptyStrings(); 1525 1526 private static class XPathPartsSet { 1527 private final Set<XPathParts> list = new LinkedHashSet<>(); 1528 addElement(String element)1529 private void addElement(String element) { 1530 if (list.isEmpty()) { 1531 list.add(new XPathParts().addElement(element)); 1532 } else { 1533 for (XPathParts item : list) { 1534 item.addElement(element); 1535 } 1536 } 1537 } 1538 addAttribute(String attribute, String attributeValue)1539 private void addAttribute(String attribute, String attributeValue) { 1540 for (XPathParts item : list) { 1541 item.addAttribute(attribute, attributeValue); 1542 } 1543 } 1544 setElement(int i, String string)1545 private void setElement(int i, String string) { 1546 for (XPathParts item : list) { 1547 item.setElement(i, string); 1548 } 1549 } 1550 addAttributes(String attribute, List<String> attributeValues)1551 private void addAttributes(String attribute, List<String> attributeValues) { 1552 if (attributeValues.size() == 1) { 1553 addAttribute(attribute, attributeValues.iterator().next()); 1554 } else { 1555 // duplicate all the items in the list with the given values 1556 Set<XPathParts> newList = new LinkedHashSet<>(); 1557 for (XPathParts item : list) { 1558 for (String attributeValue : attributeValues) { 1559 XPathParts newItem = item.cloneAsThawed(); 1560 newItem.addAttribute(attribute, attributeValue); 1561 newList.add(newItem); 1562 } 1563 } 1564 list.clear(); 1565 list.addAll(newList); 1566 } 1567 } 1568 toStrings()1569 private ImmutableSet<String> toStrings() { 1570 Builder<String> result = new ImmutableSet.Builder<>(); 1571 1572 for (XPathParts item : list) { 1573 result.add(item.toString()); 1574 } 1575 return result.build(); 1576 } 1577 1578 @Override toString()1579 public String toString() { 1580 return list.toString(); 1581 } 1582 } 1583 getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras)1584 public Set<String> getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras) { 1585 extras.clear(); 1586 Map<String, String> valueAttributes = new HashMap<>(); 1587 XPathPartsSet pathResult = new XPathPartsSet(); 1588 String element = null; 1589 for (int i = 0; i < pathPlain.size(); ++i) { 1590 element = pathPlain.getElement(i); 1591 pathResult.addElement(element); 1592 valueAttributes.clear(); 1593 for (String attribute : pathPlain.getAttributeKeys(i)) { 1594 AttributeStatus status = getAttributeStatus(element, attribute); 1595 final String attributeValue = pathPlain.getAttributeValue(i, attribute); 1596 switch (status) { 1597 case distinguished: 1598 AttributeType attrType = getAttributeType(element, attribute); 1599 if (attrType == AttributeType.NMTOKENS) { 1600 pathResult.addAttributes(attribute, SPACE_SPLITTER.splitToList(attributeValue)); 1601 } else { 1602 pathResult.addAttribute(attribute, attributeValue); 1603 } 1604 break; 1605 case value: 1606 valueAttributes.put(attribute, attributeValue); 1607 break; 1608 case metadata: 1609 break; 1610 } 1611 } 1612 if (!valueAttributes.isEmpty()) { 1613 boolean hasValue = hasValue(element); 1614 // if it doesn't have a value, we construct new child elements, with _ prefix 1615 // if it does have a value, we have to play a further trick, since 1616 // we can't have a value and child elements at the same level. 1617 // So we use a _ suffix on the element. 1618 if (hasValue) { 1619 pathResult.setElement(i, element + "_"); 1620 } else { 1621 int debug = 0; 1622 } 1623 for (Entry<String, String> attributeAndValue : valueAttributes.entrySet()) { 1624 final String attribute = attributeAndValue.getKey(); 1625 final String attributeValue = attributeAndValue.getValue(); 1626 1627 Set<String> pathsShort = pathResult.toStrings(); 1628 AttributeType attrType = getAttributeType(element, attribute); 1629 for (String pathShort : pathsShort) { 1630 pathShort += "/_" + attribute; 1631 if (attrType == AttributeType.NMTOKENS) { 1632 for (String valuePart : SPACE_SPLITTER.split(attributeValue)) { 1633 extras.put(pathShort, valuePart); 1634 } 1635 } else { 1636 extras.put(pathShort, attributeValue); 1637 } 1638 } 1639 } 1640 if (hasValue) { 1641 pathResult.setElement(i, element); // restore 1642 } 1643 } 1644 } 1645 // Only add the path if it could have a value, looking at the last element 1646 if (!hasValue(element)) { 1647 return null; 1648 } 1649 return pathResult.toStrings(); 1650 } 1651 getAttributeType(String elementName, String attributeName)1652 public AttributeType getAttributeType(String elementName, String attributeName) { 1653 Attribute attr = getAttribute(elementName, attributeName); 1654 return (attr != null) ? attr.type : null; 1655 } 1656 getAttribute(String elementName, String attributeName)1657 public Attribute getAttribute(String elementName, String attributeName) { 1658 Element element = nameToElement.get(elementName); 1659 return (element != null) ? element.getAttributeNamed(attributeName) : null; 1660 } 1661 1662 // TODO: add support for following to DTD annotations, and rework API 1663 1664 static final Set<String> SPACED_VALUES = ImmutableSet.of( 1665 "idValidity", 1666 "languageGroup"); 1667 getValueSplitter(XPathParts pathPlain)1668 public static Splitter getValueSplitter(XPathParts pathPlain) { 1669 if (!Collections.disjoint(pathPlain.getElements(), SPACED_VALUES)) { 1670 return SPACE_SPLITTER; 1671 } else if (pathPlain.getElement(-1).equals("annotation") 1672 && !pathPlain.getAttributeKeys(-1).contains("tts")) { 1673 return BAR_SPLITTER; 1674 } 1675 return CR_SPLITTER; 1676 } 1677 isComment(XPathParts pathPlain, String line)1678 public static boolean isComment(XPathParts pathPlain, String line) { 1679 if (pathPlain.contains("transform")) { 1680 if (line.startsWith("#")) { 1681 return true; 1682 } 1683 } 1684 return false; 1685 } 1686 isExtraSplit(String extraPath)1687 public static boolean isExtraSplit(String extraPath) { 1688 if (extraPath.endsWith("/_type") && extraPath.startsWith("//supplementalData/metaZones/mapTimezones")) { 1689 return true; 1690 } 1691 return false; 1692 } 1693 1694 /** 1695 * Return the value status for an EAV 1696 */ getValueStatus(String elementName, String attributeName, String value)1697 public ValueStatus getValueStatus(String elementName, String attributeName, String value) { 1698 Element element = nameToElement.get(elementName); 1699 if (element == null) { 1700 return ValueStatus.invalid; 1701 } 1702 Attribute attr = element.getAttributeNamed(attributeName); 1703 if (attr == null) { 1704 return ValueStatus.invalid; 1705 } 1706 return attr.getValueStatus(value); 1707 } 1708 1709 /** 1710 * Return element-attribute pairs with non-enumerated values, for quick checks. 1711 */ getNonEnumerated(Map<String,String> matchValues)1712 public Multimap<String, String> getNonEnumerated(Map<String,String> matchValues) { 1713 Multimap<String,String> nonEnumeratedElementToAttribute = TreeMultimap.create(); // make tree for ease of debugging 1714 for (Entry<String, Element> entry : nameToElement.entrySet()) { 1715 Element element = entry.getValue(); 1716 for (Attribute attribute : element.attributes.keySet()) { 1717 if (attribute.type != AttributeType.ENUMERATED_TYPE) { 1718 String elementName = element.getName(); 1719 String attrName = attribute.getName(); 1720 nonEnumeratedElementToAttribute.put(elementName, attrName); 1721 if (attribute.matchValue != null) { 1722 matchValues.put(elementName + "\t" + attrName, attribute.matchValue.getName()); 1723 } 1724 } 1725 } 1726 } 1727 return ImmutableSetMultimap.copyOf(nonEnumeratedElementToAttribute); 1728 } 1729 } 1730