1 package org.unicode.cldr.util; 2 3 import java.io.File; 4 import java.io.StringReader; 5 import java.util.ArrayList; 6 import java.util.Arrays; 7 import java.util.Collection; 8 import java.util.Collections; 9 import java.util.Comparator; 10 import java.util.HashMap; 11 import java.util.HashSet; 12 import java.util.Iterator; 13 import java.util.LinkedHashMap; 14 import java.util.LinkedHashSet; 15 import java.util.List; 16 import java.util.Locale; 17 import java.util.Map; 18 import java.util.Map.Entry; 19 import java.util.Set; 20 import java.util.TreeMap; 21 import java.util.concurrent.ConcurrentHashMap; 22 import java.util.concurrent.ConcurrentMap; 23 import java.util.regex.Pattern; 24 25 import com.google.common.base.CharMatcher; 26 import com.google.common.base.Splitter; 27 import com.google.common.collect.ImmutableSet; 28 import com.google.common.collect.ImmutableSet.Builder; 29 import com.google.common.collect.ImmutableSetMultimap; 30 import com.google.common.collect.Multimap; 31 import com.google.common.collect.TreeMultimap; 32 import com.ibm.icu.dev.util.CollectionUtilities; 33 import com.ibm.icu.impl.Relation; 34 import com.ibm.icu.text.Transform; 35 36 /** 37 * An immutable object that contains the structure of a DTD. 38 * @author markdavis 39 */ 40 public class DtdData extends XMLFileReader.SimpleHandler { 41 private static final String COMMENT_PREFIX = System.lineSeparator() + " "; 42 private static final boolean SHOW_ALL = CldrUtility.getProperty("show_all", false); 43 private static final boolean USE_SYNTHESIZED = false; 44 45 private static final boolean DEBUG = false; 46 private static final Pattern FILLER = PatternCache.get("[^-a-zA-Z0-9#_:]"); 47 48 private final Relation<String, Attribute> nameToAttributes = Relation.of(new TreeMap<String, Set<Attribute>>(), LinkedHashSet.class); 49 private Map<String, Element> nameToElement = new HashMap<String, Element>(); 50 private MapComparator<String> elementComparator; 51 private MapComparator<String> attributeComparator; 52 53 public final Element ROOT; 54 public final Element PCDATA = elementFrom("#PCDATA"); 55 public final Element ANY = elementFrom("ANY"); 56 public final DtdType dtdType; 57 public final String version; 58 private Element lastElement; 59 private Attribute lastAttribute; 60 private Set<String> preCommentCache; 61 private DtdComparator dtdComparator; 62 63 public enum AttributeStatus { 64 distinguished ("§d"), 65 value ("§v"), 66 metadata ("§m︎"); 67 public final String shortName; AttributeStatus(String shortName)68 AttributeStatus(String shortName) { 69 this.shortName = shortName; 70 } getShortName(AttributeStatus status)71 public static String getShortName(AttributeStatus status) { 72 return status == null ? "" : status.shortName; 73 } 74 } 75 76 public enum Mode { 77 REQUIRED("#REQUIRED"), OPTIONAL("#IMPLIED"), FIXED("#FIXED"), NULL("null"); 78 79 public final String source; 80 Mode(String s)81 Mode(String s) { 82 source = s; 83 } 84 forString(String mode)85 public static Mode forString(String mode) { 86 for (Mode value : Mode.values()) { 87 if (value.source.equals(mode)) { 88 return value; 89 } 90 } 91 if (mode == null) { 92 return NULL; 93 } 94 throw new IllegalArgumentException(mode); 95 } 96 } 97 98 public enum AttributeType { 99 CDATA, ID, IDREF, IDREFS, ENTITY, ENTITIES, NMTOKEN, NMTOKENS, ENUMERATED_TYPE 100 } 101 102 public static class Attribute implements Named { 103 public final String name; 104 public final Element element; 105 public final Mode mode; 106 public final String defaultValue; 107 public final AttributeType type; 108 public final Map<String, Integer> values; 109 private final Set<String> commentsPre; 110 private Set<String> commentsPost; 111 private boolean isDeprecatedAttribute; 112 public AttributeStatus attributeStatus = AttributeStatus.distinguished; // default unless reset by annotations 113 private Set<String> deprecatedValues = Collections.emptySet(); 114 public MatchValue matchValue; 115 private final Comparator<String> attributeValueComparator; 116 Attribute(DtdType dtdType, Element element2, String aName, Mode mode2, String[] split, String value2, Set<String> firstComment)117 private Attribute(DtdType dtdType, Element element2, String aName, Mode mode2, String[] split, String value2, Set<String> firstComment) { 118 commentsPre = firstComment; 119 element = element2; 120 name = aName.intern(); 121 if (name.equals("draft") // normally never permitted on elements with children, but special cases... 122 && !element.getName().equals("collation") 123 && !element.getName().equals("transform")) { 124 int elementChildrenCount = element.getChildren().size(); 125 if (elementChildrenCount > 1 126 || elementChildrenCount == 1 && !element.getChildren().keySet().iterator().next().getName().equals("cp")) { 127 isDeprecatedAttribute = true; 128 if (DEBUG) { 129 System.out.println(element.getName() + ":" + element.getChildren()); 130 } 131 } 132 } 133 mode = mode2; 134 defaultValue = value2 == null ? null 135 : value2.intern(); 136 AttributeType _type = AttributeType.ENUMERATED_TYPE; 137 Map<String, Integer> _values = Collections.emptyMap(); 138 if (split.length == 1) { 139 try { 140 _type = AttributeType.valueOf(split[0]); 141 } catch (Exception e) { 142 } 143 } 144 type = _type; 145 146 if (_type == AttributeType.ENUMERATED_TYPE) { 147 LinkedHashMap<String, Integer> temp = new LinkedHashMap<String, Integer>(); 148 for (String part : split) { 149 if (part.length() != 0) { 150 temp.put(part.intern(), temp.size()); 151 } 152 } 153 _values = Collections.unmodifiableMap(temp); 154 } 155 values = _values; 156 attributeValueComparator = getAttributeValueComparator(dtdType, element.name, name); 157 } 158 159 @Override toString()160 public String toString() { 161 return element.name + ":" + name; 162 } 163 appendDtdString(StringBuilder b)164 public StringBuilder appendDtdString(StringBuilder b) { 165 Attribute a = this; 166 b.append("<!ATTLIST " + element.name + " " + a.name); 167 boolean first; 168 if (a.type == AttributeType.ENUMERATED_TYPE) { 169 b.append(" ("); 170 first = true; 171 for (String s : a.values.keySet()) { 172 if (deprecatedValues.contains(s)) { 173 continue; 174 } 175 if (first) { 176 first = false; 177 } else { 178 b.append(" | "); 179 } 180 b.append(s); 181 } 182 b.append(")"); 183 } else { 184 b.append(' ').append(a.type); 185 } 186 if (a.mode != Mode.NULL) { 187 b.append(" ").append(a.mode.source); 188 } 189 if (a.defaultValue != null) { 190 b.append(" \"").append(a.defaultValue).append('"'); 191 } 192 b.append(" >"); 193 return b; 194 } 195 features()196 public String features() { 197 return (type == AttributeType.ENUMERATED_TYPE ? values.keySet().toString() : type.toString()) 198 + (mode == Mode.NULL ? "" : ", mode=" + mode) 199 + (defaultValue == null ? "" : ", default=" + defaultValue); 200 } 201 202 @Override getName()203 public String getName() { 204 return name; 205 } 206 207 private static Splitter COMMA = Splitter.on(',').trimResults(); 208 addComment(String commentIn)209 public void addComment(String commentIn) { 210 if (commentIn.startsWith("@")) { 211 // there are exactly 2 cases: deprecated and ordered 212 switch (commentIn) { 213 case "@METADATA": 214 attributeStatus = AttributeStatus.metadata; 215 break; 216 case "@VALUE": 217 attributeStatus = AttributeStatus.value; 218 break; 219 case "@DEPRECATED": 220 isDeprecatedAttribute = true; 221 break; 222 default: 223 int colonPos = commentIn.indexOf(':'); 224 if (colonPos < 0) { 225 throw new IllegalArgumentException("Unrecognized annotation: " + commentIn); 226 } 227 String command = commentIn.substring(0, colonPos); 228 String argument = commentIn.substring(colonPos + 1); 229 switch(command) { 230 case "@DEPRECATED": 231 deprecatedValues = Collections.unmodifiableSet(new HashSet<>(COMMA.splitToList(argument))); 232 break; 233 case "@MATCH": 234 if (matchValue != null) { 235 throw new IllegalArgumentException("Conflicting @MATCH: " + matchValue.getName() + " & " + argument); 236 } 237 matchValue = MatchValue.of(argument); 238 break; 239 default: 240 throw new IllegalArgumentException("Unrecognized annotation: " + commentIn); 241 } 242 } 243 return; 244 } 245 commentsPost = addUnmodifiable(commentsPost, commentIn.trim()); 246 } 247 248 /** 249 * Special version of identity; only considers name and name of element 250 */ 251 @Override equals(Object obj)252 public boolean equals(Object obj) { 253 if (!(obj instanceof Attribute)) { 254 return false; 255 } 256 Attribute that = (Attribute) obj; 257 return name.equals(that.name) 258 && element.name.equals(that.element.name) // don't use plain element: circularity 259 // not relevant to identity 260 // && Objects.equals(comment, that.comment) 261 // && mode.equals(that.mode) 262 // && Objects.equals(defaultValue, that.defaultValue) 263 // && type.equals(that.type) 264 // && values.equals(that.values) 265 ; 266 } 267 268 /** 269 * Special version of identity; only considers name and name of element 270 */ 271 @Override hashCode()272 public int hashCode() { 273 return name.hashCode() * 37 274 + element.name.hashCode() // don't use plain element: circularity 275 // not relevant to identity 276 // ) * 37 + Objects.hashCode(comment)) * 37 277 // + mode.hashCode()) * 37 278 // + Objects.hashCode(defaultValue)) * 37 279 // + type.hashCode()) * 37 280 // + values.hashCode() 281 ; 282 } 283 isDeprecated()284 public boolean isDeprecated() { 285 return isDeprecatedAttribute; 286 } 287 isDeprecatedValue(String value)288 public boolean isDeprecatedValue(String value) { 289 return deprecatedValues.contains(value); 290 } 291 getStatus()292 public AttributeStatus getStatus() { 293 return attributeStatus; 294 } 295 getValueStatus(String value)296 public ValueStatus getValueStatus(String value) { 297 return deprecatedValues.contains(value) 298 ? ValueStatus.invalid 299 : type == AttributeType.ENUMERATED_TYPE 300 ? (values.containsKey(value) 301 ? ValueStatus.valid 302 : ValueStatus.invalid) 303 : matchValue == null 304 ? ValueStatus.unknown : 305 matchValue.is(value) 306 ? ValueStatus.valid 307 : ValueStatus.invalid; 308 } 309 getMatchString()310 public String getMatchString() { 311 return type == AttributeType.ENUMERATED_TYPE 312 ? "⟨" + CollectionUtilities.join(values.keySet(), ", ") + "⟩" 313 : matchValue != null 314 ? "⟪" + matchValue.toString() + "⟫" 315 : ""; 316 } 317 getMatchingName(Map<Attribute, Integer> attributes)318 public Attribute getMatchingName(Map<Attribute, Integer> attributes) { 319 for (Attribute attribute : attributes.keySet()) { 320 if (name.equals(attribute.getName())) { 321 return attribute; 322 } 323 } 324 return null; 325 } 326 327 } 328 329 public enum ValueStatus {invalid, unknown, valid} 330 DtdData(DtdType type, String version)331 private DtdData(DtdType type, String version) { 332 this.dtdType = type; 333 this.ROOT = elementFrom(type.rootType.toString()); 334 this.version = version; 335 } 336 addAttribute(String eName, String aName, String type, String mode, String value)337 private void addAttribute(String eName, String aName, String type, String mode, String value) { 338 Attribute a = new Attribute(dtdType, nameToElement.get(eName), aName, Mode.forString(mode), FILLER.split(type), value, preCommentCache); 339 preCommentCache = null; 340 getAttributesFromName().put(aName, a); 341 CldrUtility.putNew(a.element.attributes, a, a.element.attributes.size()); 342 lastElement = null; 343 lastAttribute = a; 344 } 345 346 public enum ElementType { 347 EMPTY, ANY, PCDATA("(#PCDATA)"), CHILDREN; 348 public final String source; 349 ElementType(String s)350 private ElementType(String s) { 351 source = s; 352 } 353 ElementType()354 private ElementType() { 355 source = name(); 356 } 357 } 358 359 interface Named { getName()360 String getName(); 361 } 362 363 public enum ElementStatus { 364 regular, metadata 365 } 366 367 public static class Element implements Named { 368 public final String name; 369 private String rawModel; 370 private ElementType type; 371 private final Map<Element, Integer> children = new LinkedHashMap<Element, Integer>(); 372 private final Map<Attribute, Integer> attributes = new LinkedHashMap<Attribute, Integer>(); 373 private Set<String> commentsPre; 374 private Set<String> commentsPost; 375 private String model; 376 private boolean isOrderedElement; 377 private boolean isDeprecatedElement; 378 private ElementStatus elementStatus = ElementStatus.regular; 379 Element(String name2)380 private Element(String name2) { 381 name = name2.intern(); 382 } 383 setChildren(DtdData dtdData, String model, Set<String> precomments)384 private void setChildren(DtdData dtdData, String model, Set<String> precomments) { 385 this.commentsPre = precomments; 386 rawModel = model; 387 this.model = clean(model); 388 if (model.equals("EMPTY")) { 389 type = ElementType.EMPTY; 390 return; 391 } 392 type = ElementType.CHILDREN; 393 for (String part : FILLER.split(model)) { 394 if (part.length() != 0) { 395 if (part.equals("#PCDATA")) { 396 type = ElementType.PCDATA; 397 } else if (part.equals("ANY")) { 398 type = ElementType.ANY; 399 } else { 400 CldrUtility.putNew(children, dtdData.elementFrom(part), children.size()); 401 } 402 } 403 } 404 if ((type == ElementType.CHILDREN) == (children.size() == 0) 405 && !model.startsWith("(#PCDATA|cp")) { 406 throw new IllegalArgumentException("CLDR does not permit Mixed content. " + name + ":" + model); 407 } 408 } 409 410 static final Pattern CLEANER1 = PatternCache.get("([,|(])(?=\\S)"); 411 static final Pattern CLEANER2 = PatternCache.get("(?=\\S)([|)])"); 412 clean(String model2)413 private String clean(String model2) { 414 // (x) -> ( x ); 415 // x,y -> x, y 416 // x|y -> x | y 417 String result = CLEANER1.matcher(model2).replaceAll("$1 "); 418 result = CLEANER2.matcher(result).replaceAll(" $1"); 419 return result.equals(model2) 420 ? model2 421 : result; // for debugging 422 } 423 containsAttribute(String string)424 public boolean containsAttribute(String string) { 425 for (Attribute a : attributes.keySet()) { 426 if (a.name.equals(string)) { 427 return true; 428 } 429 } 430 return false; 431 } 432 433 @Override toString()434 public String toString() { 435 return name; 436 } 437 toDtdString()438 public String toDtdString() { 439 return "<!ELEMENT " + name + " " + getRawModel() + " >"; 440 } 441 getType()442 public ElementType getType() { 443 return type; 444 } 445 getChildren()446 public Map<Element, Integer> getChildren() { 447 return Collections.unmodifiableMap(children); 448 } 449 getAttributes()450 public Map<Attribute, Integer> getAttributes() { 451 return Collections.unmodifiableMap(attributes); 452 } 453 454 @Override getName()455 public String getName() { 456 return name; 457 } 458 getChildNamed(String string)459 public Element getChildNamed(String string) { 460 for (Element e : children.keySet()) { 461 if (e.name.equals(string)) { 462 return e; 463 } 464 } 465 return null; 466 } 467 getAttributeNamed(String string)468 public Attribute getAttributeNamed(String string) { 469 for (Attribute a : attributes.keySet()) { 470 if (a.name.equals(string)) { 471 return a; 472 } 473 } 474 return null; 475 } 476 addComment(String addition)477 public void addComment(String addition) { 478 if (addition.startsWith("@")) { 479 // there are exactly 3 cases: deprecated, ordered, and metadata 480 switch (addition) { 481 case "@ORDERED": 482 isOrderedElement = true; 483 break; 484 case "@DEPRECATED": 485 isDeprecatedElement = true; 486 break; 487 case "@METADATA": 488 elementStatus = ElementStatus.metadata; 489 break; 490 default: 491 throw new IllegalArgumentException("Unrecognized annotation: " + addition); 492 } 493 return; 494 } 495 commentsPost = addUnmodifiable(commentsPost, addition.trim()); 496 } 497 498 /** 499 * Special version of equals. Only the name is considered in the identity. 500 */ 501 @Override equals(Object obj)502 public boolean equals(Object obj) { 503 if (!(obj instanceof Element)) { 504 return false; 505 } 506 Element that = (Element) obj; 507 return name.equals(that.name) 508 // not relevant to the identity of the object 509 // && Objects.equals(comment, that.comment) 510 // && type == that.type 511 // && attributes.equals(that.attributes) 512 // && children.equals(that.children) 513 ; 514 } 515 516 /** 517 * Special version of hashcode. Only the name is considered in the identity. 518 */ 519 @Override hashCode()520 public int hashCode() { 521 return name.hashCode() 522 // not relevant to the identity of the object 523 // * 37 + Objects.hashCode(comment) 524 //) * 37 + Objects.hashCode(type) 525 // ) * 37 + attributes.hashCode() 526 // ) * 37 + children.hashCode() 527 ; 528 } 529 isDeprecated()530 public boolean isDeprecated() { 531 return isDeprecatedElement; 532 } 533 isOrdered()534 public boolean isOrdered() { 535 return isOrderedElement; 536 } 537 getElementStatus()538 public ElementStatus getElementStatus() { 539 return elementStatus; 540 } 541 542 /** 543 * @return the rawModel 544 */ getRawModel()545 public String getRawModel() { 546 return rawModel; 547 } 548 } 549 elementFrom(String name)550 private Element elementFrom(String name) { 551 Element result = nameToElement.get(name); 552 if (result == null) { 553 nameToElement.put(name, result = new Element(name)); 554 } 555 return result; 556 } 557 addElement(String name2, String model)558 private void addElement(String name2, String model) { 559 Element element = elementFrom(name2); 560 element.setChildren(this, model, preCommentCache); 561 preCommentCache = null; 562 lastElement = element; 563 lastAttribute = null; 564 } 565 addComment(String comment)566 private void addComment(String comment) { 567 comment = comment.trim(); 568 if (preCommentCache != null || comment.startsWith("#")) { // the precomments are "sticky" 569 if (comment.startsWith("@")) { 570 throw new IllegalArgumentException("@ annotation comment must follow element or attribute, without intervening # comment"); 571 } 572 preCommentCache = addUnmodifiable(preCommentCache, comment); 573 } else if (lastElement != null) { 574 lastElement.addComment(comment); 575 } else if (lastAttribute != null) { 576 lastAttribute.addComment(comment); 577 } else { 578 if (comment.startsWith("@")) { 579 throw new IllegalArgumentException("@ annotation comment must follow element or attribute, without intervening # comment"); 580 } 581 preCommentCache = addUnmodifiable(preCommentCache, comment); 582 } 583 } 584 585 // TODO hide this 586 /** 587 * @deprecated 588 */ 589 @Override handleElementDecl(String name, String model)590 public void handleElementDecl(String name, String model) { 591 if (SHOW_ALL) { 592 // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, listPatterns?, collations?, posix?, segmentations?, rbnf?, annotations?, metadata?, references?, special*))) > 593 System.out.println(System.lineSeparator() + "<!ELEMENT " + name + " " + model + " >"); 594 } 595 addElement(name, model); 596 } 597 598 // TODO hide this 599 /** 600 * @deprecated 601 */ 602 @Override handleStartDtd(String name, String publicId, String systemId)603 public void handleStartDtd(String name, String publicId, String systemId) { 604 DtdType explicitDtdType = DtdType.valueOf(name); 605 if (explicitDtdType != dtdType && explicitDtdType != dtdType.rootType) { 606 throw new IllegalArgumentException("Mismatch in dtdTypes"); 607 } 608 }; 609 610 /** 611 * @deprecated 612 */ 613 @Override handleAttributeDecl(String eName, String aName, String type, String mode, String value)614 public void handleAttributeDecl(String eName, String aName, String type, String mode, String value) { 615 if (SHOW_ALL) { 616 // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false ) #IMPLIED > 617 // <!ATTLIST version number CDATA #REQUIRED > 618 // <!ATTLIST version cldrVersion CDATA #FIXED "27" > 619 620 System.out.println("<!ATTLIST " + eName 621 + " " + aName 622 + " " + type 623 + " " + mode 624 + (value == null ? "" : " \"" + value + "\"") 625 + " >"); 626 } 627 // HACK for 1.1.1 628 if (eName.equals("draft")) { 629 eName = "week"; 630 } 631 addAttribute(eName, aName, type, mode, value); 632 } 633 634 /** 635 * @deprecated 636 */ 637 @Override handleComment(String path, String comment)638 public void handleComment(String path, String comment) { 639 if (SHOW_ALL) { 640 // <!-- true and false are deprecated. --> 641 System.out.println("<!-- " + comment.trim() + " -->"); 642 } 643 addComment(comment); 644 } 645 646 // TODO hide this 647 /** 648 * @deprecated 649 */ 650 @Override handleEndDtd()651 public void handleEndDtd() { 652 throw new XMLFileReader.AbortException(); 653 } 654 655 /** 656 * Note that it always gets the trunk version 657 * @deprecated depends on static config, use {@link DtdData#getInstance(DtdType, File)} instead 658 */ getInstance(DtdType type)659 public static DtdData getInstance(DtdType type) { 660 return getInstance(type, CLDRConfig.getInstance().getCldrBaseDirectory()); 661 } 662 663 /** 664 * Special form using version, used only by tests, etc. 665 */ getInstance(DtdType type, String version)666 public static DtdData getInstance(DtdType type, String version) { 667 File directory = version == null ? CLDRConfig.getInstance().getCldrBaseDirectory() 668 : new File(CLDRPaths.ARCHIVE_DIRECTORY + "/cldr-" + version); 669 670 return getInstance(type, version, directory); 671 } 672 673 private static final ConcurrentMap<Pair<DtdType, File>, DtdData> CACHE = new ConcurrentHashMap<>(); 674 675 /** 676 * Normal version of DtdData 677 * Get a DtdData, given the CLDR root directory. 678 * @param type which DtdType to return 679 * @param directory the CLDR Root directory, which contains the "common" directory. 680 * @return 681 */ getInstance(DtdType type, File directory)682 public static DtdData getInstance(DtdType type, File directory) { 683 Pair<DtdType, File> key = new Pair<>(type, directory); 684 DtdData data = CACHE.computeIfAbsent(key, k -> getInstance(type, null, directory)); 685 return data; 686 } 687 getInstance(DtdType type, String version, File directory)688 private static DtdData getInstance(DtdType type, String version, File directory) { 689 DtdData simpleHandler = new DtdData(type, version); 690 XMLFileReader xfr = new XMLFileReader().setHandler(simpleHandler); 691 if (type != type.rootType) { 692 // read the real first, then add onto it. 693 readFile(type.rootType, xfr, directory); 694 } 695 readFile(type, xfr, directory); 696 // HACK 697 if (type == DtdType.ldmlICU) { 698 Element special = simpleHandler.nameToElement.get("special"); 699 for (String extraElementName : Arrays.asList( 700 "icu:breakIteratorData", 701 "icu:UCARules", 702 "icu:scripts", 703 "icu:transforms", 704 "icu:ruleBasedNumberFormats", 705 "icu:isLeapMonth", 706 "icu:version", 707 "icu:breakDictionaryData", 708 "icu:depends")) { 709 Element extraElement = simpleHandler.nameToElement.get(extraElementName); 710 special.children.put(extraElement, special.children.size()); 711 } 712 } 713 if (simpleHandler.ROOT.children.size() == 0) { 714 throw new IllegalArgumentException(); // should never happen 715 } 716 simpleHandler.finish(); 717 simpleHandler.freeze(); 718 return simpleHandler; 719 } 720 finish()721 private void finish() { 722 dtdComparator = new DtdComparator(); 723 } 724 readFile(DtdType type, XMLFileReader xfr, File directory)725 public static void readFile(DtdType type, XMLFileReader xfr, File directory) { 726 File file = new File(directory, type.dtdPath); 727 StringReader s = new StringReader("<?xml version='1.0' encoding='UTF-8' ?>" 728 + "<!DOCTYPE " + type 729 + " SYSTEM '" + file.getAbsolutePath() + "'>"); 730 xfr.read(type.toString(), s, -1, true); // DTD_TYPE_TO_FILE.get(type) 731 } 732 freeze()733 private void freeze() { 734 if (version == null) { // only generate for new versions 735 MergeLists<String> elementMergeList = new MergeLists<String>(); 736 elementMergeList.add(dtdType.toString()); 737 MergeLists<String> attributeMergeList = new MergeLists<String>(); 738 attributeMergeList.add("_q"); 739 740 for (Element element : nameToElement.values()) { 741 if (element.children.size() > 0) { 742 Collection<String> names = getNames(element.children.keySet()); 743 elementMergeList.add(names); 744 if (DEBUG) { 745 System.out.println(element.getName() + "\t→\t" + names); 746 } 747 } 748 if (element.attributes.size() > 0) { 749 Collection<String> names = getNames(element.attributes.keySet()); 750 attributeMergeList.add(names); 751 if (DEBUG) { 752 System.out.println(element.getName() + "\t→\t@" + names); 753 } 754 } 755 } 756 List<String> elementList = elementMergeList.merge(); 757 List<String> attributeList = attributeMergeList.merge(); 758 if (DEBUG) { 759 System.out.println("Element Ordering:\t" + elementList); 760 System.out.println("Attribute Ordering:\t" + attributeList); 761 } 762 elementComparator = new MapComparator<String>(elementList).setErrorOnMissing(true).freeze(); 763 attributeComparator = new MapComparator<String>(attributeList).setErrorOnMissing(true).freeze(); 764 } 765 nameToAttributes.freeze(); 766 nameToElement = Collections.unmodifiableMap(nameToElement); 767 } 768 getNames(Collection<? extends Named> keySet)769 private Collection<String> getNames(Collection<? extends Named> keySet) { 770 List<String> result = new ArrayList<String>(); 771 for (Named e : keySet) { 772 result.add(e.getName()); 773 } 774 return result; 775 } 776 777 public enum DtdItem { 778 ELEMENT, ATTRIBUTE, ATTRIBUTE_VALUE 779 } 780 781 public interface AttributeValueComparator { compare(String element, String attribute, String value1, String value2)782 public int compare(String element, String attribute, String value1, String value2); 783 } 784 getDtdComparator(AttributeValueComparator avc)785 public Comparator<String> getDtdComparator(AttributeValueComparator avc) { 786 return dtdComparator; 787 } 788 789 private class DtdComparator implements Comparator<String> { 790 @Override compare(String path1, String path2)791 public int compare(String path1, String path2) { 792 XPathParts a = XPathParts.getFrozenInstance(path1); 793 XPathParts b = XPathParts.getFrozenInstance(path2); 794 // there must always be at least one element 795 String baseA = a.getElement(0); 796 String baseB = b.getElement(0); 797 if (!ROOT.name.equals(baseA) || !ROOT.name.equals(baseB)) { 798 throw new IllegalArgumentException("Comparing different DTDs: " + ROOT.name + ", " + baseA + ", " + baseB); 799 } 800 int min = Math.min(a.size(), b.size()); 801 Element parent = ROOT; 802 Element elementA; 803 for (int i = 1; i < min; ++i, parent = elementA) { 804 // add extra test for "fake" elements, used in diffing. they always start with _ 805 String elementRawA = a.getElement(i); 806 String elementRawB = b.getElement(i); 807 if (elementRawA.startsWith("_")) { 808 return elementRawB.startsWith("_") ? elementRawA.compareTo(elementRawB) : -1; 809 } else if (elementRawB.startsWith("_")) { 810 return 1; 811 } 812 // 813 elementA = nameToElement.get(elementRawA); 814 Element elementB = nameToElement.get(elementRawB); 815 if (elementA != elementB) { 816 int aa = parent.children.get(elementA); 817 int bb = parent.children.get(elementB); 818 return aa - bb; 819 } 820 int countA = a.getAttributeCount(i); 821 int countB = b.getAttributeCount(i); 822 if (countA == 0 && countB == 0) { 823 continue; 824 } 825 // we have two ways to compare the attributes. One based on the dtd, 826 // and one based on explicit comparators 827 828 // at this point the elements are the same and correspond to elementA 829 // in the dtd 830 831 // Handle the special added elements 832 String aqValue = a.getAttributeValue(i, "_q"); 833 if (aqValue != null) { 834 String bqValue = b.getAttributeValue(i, "_q"); 835 if (!aqValue.equals(bqValue)) { 836 int aValue = Integer.parseInt(aqValue); 837 int bValue = Integer.parseInt(bqValue); 838 return aValue - bValue; 839 } 840 --countA; 841 --countB; 842 } 843 844 attributes: for (Entry<Attribute, Integer> attr : elementA.attributes.entrySet()) { 845 Attribute main = attr.getKey(); 846 String valueA = a.getAttributeValue(i, main.name); 847 String valueB = b.getAttributeValue(i, main.name); 848 if (valueA == null) { 849 if (valueB != null) { 850 return -1; 851 } 852 } else if (valueB == null) { 853 return 1; 854 } else if (valueA.equals(valueB)) { 855 --countA; 856 --countB; 857 if (countA == 0 && countB == 0) { 858 break attributes; 859 } 860 continue; // TODO 861 } else if (main.attributeValueComparator != null) { 862 return main.attributeValueComparator.compare(valueA, valueB); 863 } else if (main.values.size() != 0) { 864 int aa = main.values.get(valueA); 865 int bb = main.values.get(valueB); 866 return aa - bb; 867 } else { 868 return valueA.compareTo(valueB); 869 } 870 } 871 if (countA != 0 || countB != 0) { 872 throw new IllegalArgumentException(); 873 } 874 } 875 return a.size() - b.size(); 876 } 877 } 878 getAttributeComparator()879 public MapComparator<String> getAttributeComparator() { 880 return attributeComparator; 881 } 882 getElementComparator()883 public MapComparator<String> getElementComparator() { 884 return elementComparator; 885 } 886 getAttributesFromName()887 public Relation<String, Attribute> getAttributesFromName() { 888 return nameToAttributes; 889 } 890 getElementFromName()891 public Map<String, Element> getElementFromName() { 892 return nameToElement; 893 } 894 toString()895 public String toString() { 896 StringBuilder b = new StringBuilder(); 897 // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, listPatterns?, collations?, posix?, segmentations?, rbnf?, metadata?, references?, special*))) > 898 // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false ) #IMPLIED > <!-- true and false are deprecated. --> 899 Seen seen = new Seen(dtdType); 900 seen.seenElements.add(ANY); 901 seen.seenElements.add(PCDATA); 902 toString(ROOT, b, seen); 903 904 // Hack for ldmlIcu: catch the items that are not mentioned in the original 905 int currentEnd = b.length(); 906 for (Element e : nameToElement.values()) { 907 toString(e, b, seen); 908 } 909 if (currentEnd != b.length()) { 910 b.insert(currentEnd, 911 System.lineSeparator() + System.lineSeparator() 912 + "<!-- Elements not reachable from root! -->" 913 + System.lineSeparator()); 914 } 915 return b.toString(); 916 } 917 918 static final class Seen { 919 Set<Element> seenElements = new HashSet<Element>(); 920 Set<Attribute> seenAttributes = new HashSet<Attribute>(); 921 Seen(DtdType dtdType)922 public Seen(DtdType dtdType) { 923 if (dtdType.rootType == dtdType) { 924 return; 925 } 926 DtdData otherData = DtdData.getInstance(dtdType.rootType); 927 walk(otherData, otherData.ROOT); 928 seenElements.remove(otherData.nameToElement.get("special")); 929 } 930 walk(DtdData otherData, Element current)931 private void walk(DtdData otherData, Element current) { 932 seenElements.add(current); 933 seenAttributes.addAll(current.attributes.keySet()); 934 for (Element e : current.children.keySet()) { 935 walk(otherData, e); 936 } 937 } 938 } 939 getDescendents(Element start, Set<Element> toAddTo)940 public Set<Element> getDescendents(Element start, Set<Element> toAddTo) { 941 if (!toAddTo.contains(start)) { 942 toAddTo.add(start); 943 for (Element e : start.children.keySet()) { 944 getDescendents(e, toAddTo); 945 } 946 } 947 return toAddTo; 948 } 949 toString(Element current, StringBuilder b, Seen seen)950 private void toString(Element current, StringBuilder b, Seen seen) { 951 boolean first = true; 952 if (seen.seenElements.contains(current)) { 953 return; 954 } 955 seen.seenElements.add(current); 956 boolean elementDeprecated = isDeprecated(current.name, "*", "*"); 957 958 showComments(b, current.commentsPre, true); 959 b.append("\n\n<!ELEMENT " + current.name + " " + current.model + " >"); 960 if (USE_SYNTHESIZED) { 961 Element aliasElement = getElementFromName().get("alias"); 962 //b.append(current.rawChildren); 963 if (!current.children.isEmpty()) { 964 LinkedHashSet<Element> elements = new LinkedHashSet<Element>(current.children.keySet()); 965 boolean hasAlias = aliasElement != null && elements.remove(aliasElement); 966 //boolean hasSpecial = specialElement != null && elements.remove(specialElement); 967 if (hasAlias) { 968 b.append("(alias |"); 969 } 970 b.append("("); 971 // <!ELEMENT transformNames ( alias | (transformName | special)* ) > 972 // <!ELEMENT layout ( alias | (orientation*, inList*, inText*, special*) ) > 973 974 for (Element e : elements) { 975 if (first) { 976 first = false; 977 } else { 978 b.append(", "); 979 } 980 b.append(e.name); 981 if (e.type != ElementType.PCDATA) { 982 b.append("*"); 983 } 984 } 985 if (hasAlias) { 986 b.append(")"); 987 } 988 b.append(")"); 989 } else { 990 b.append(current.type == null ? "???" : current.type.source); 991 } 992 b.append(">"); 993 } 994 showComments(b, current.commentsPost, false); 995 if (isOrdered(current.name)) { 996 b.append(COMMENT_PREFIX + "<!--@ORDERED-->"); 997 } 998 if (current.getElementStatus() != ElementStatus.regular) { 999 b.append(COMMENT_PREFIX + "<!--@" 1000 + current.getElementStatus().toString().toUpperCase(Locale.ROOT) 1001 + "-->"); 1002 } 1003 if (elementDeprecated) { 1004 b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->"); 1005 } 1006 1007 LinkedHashSet<String> deprecatedValues = new LinkedHashSet<>(); 1008 1009 for (Attribute a : current.attributes.keySet()) { 1010 if (seen.seenAttributes.contains(a)) { 1011 continue; 1012 } 1013 seen.seenAttributes.add(a); 1014 boolean attributeDeprecated = elementDeprecated || isDeprecated(current.name, a.name, "*"); 1015 1016 deprecatedValues.clear(); 1017 1018 showComments(b, a.commentsPre, true); 1019 b.append("\n<!ATTLIST " + current.name + " " + a.name); 1020 if (a.type == AttributeType.ENUMERATED_TYPE) { 1021 b.append(" ("); 1022 first = true; 1023 for (String s : a.values.keySet()) { 1024 if (first) { 1025 first = false; 1026 } else { 1027 b.append(" | "); 1028 } 1029 b.append(s); 1030 if (!attributeDeprecated && isDeprecated(current.name, a.name, s)) { 1031 deprecatedValues.add(s); 1032 } 1033 } 1034 b.append(")"); 1035 } else { 1036 b.append(' ').append(a.type); 1037 } 1038 if (a.mode != Mode.NULL) { 1039 b.append(" ").append(a.mode.source); 1040 } 1041 if (a.defaultValue != null) { 1042 b.append(" \"").append(a.defaultValue).append('"'); 1043 } 1044 b.append(" >"); 1045 showComments(b, a.commentsPost, false); 1046 // if (attributeDeprecated != deprecatedComment) { 1047 // System.out.println("*** BAD DEPRECATION ***" + a); 1048 // } 1049 if (a.matchValue != null) { 1050 b.append(COMMENT_PREFIX + "<!--@MATCH:" + a.matchValue.getName() + "-->"); 1051 } 1052 if (METADATA.contains(a.name) || a.attributeStatus == AttributeStatus.metadata) { 1053 b.append(COMMENT_PREFIX + "<!--@METADATA-->"); 1054 } else if (!isDistinguishing(current.name, a.name)) { 1055 b.append(COMMENT_PREFIX + "<!--@VALUE-->"); 1056 } 1057 if (attributeDeprecated) { 1058 b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->"); 1059 } else if (!deprecatedValues.isEmpty()) { 1060 b.append(COMMENT_PREFIX + "<!--@DEPRECATED:" + CollectionUtilities.join(deprecatedValues, ", ") + "-->"); 1061 } 1062 } 1063 if (current.children.size() > 0) { 1064 for (Element e : current.children.keySet()) { 1065 toString(e, b, seen); 1066 } 1067 } 1068 } 1069 showComments(StringBuilder b, Set<String> comments, boolean separate)1070 private void showComments(StringBuilder b, Set<String> comments, boolean separate) { 1071 if (comments == null) { 1072 return; 1073 } 1074 if (separate && b.length() != 0) { 1075 b.append(System.lineSeparator()); 1076 } 1077 for (String c : comments) { 1078 boolean deprecatedComment = false; // the following served its purpose... c.toLowerCase(Locale.ENGLISH).contains("deprecat"); 1079 if (!deprecatedComment) { 1080 if (separate) { 1081 // special handling for very first comment 1082 if (b.length() == 0) { 1083 b.append("<!--") 1084 .append(System.lineSeparator()) 1085 .append(c) 1086 .append(System.lineSeparator()) 1087 .append("-->"); 1088 continue; 1089 } 1090 b.append(System.lineSeparator()); 1091 } else { 1092 b.append(COMMENT_PREFIX); 1093 } 1094 b.append("<!-- ").append(c).append(" -->"); 1095 } 1096 } 1097 } 1098 removeFirst(Collection<T> elements, Transform<T, Boolean> matcher)1099 public static <T> T removeFirst(Collection<T> elements, Transform<T, Boolean> matcher) { 1100 for (Iterator<T> it = elements.iterator(); it.hasNext();) { 1101 T item = it.next(); 1102 if (matcher.transform(item) == Boolean.TRUE) { 1103 it.remove(); 1104 return item; 1105 } 1106 } 1107 return null; 1108 } 1109 getElements()1110 public Set<Element> getElements() { 1111 return new LinkedHashSet<Element>(nameToElement.values()); 1112 } 1113 getAttributes()1114 public Set<Attribute> getAttributes() { 1115 return new LinkedHashSet<Attribute>(nameToAttributes.values()); 1116 } 1117 isDistinguishing(String elementName, String attribute)1118 public boolean isDistinguishing(String elementName, String attribute) { 1119 return getAttributeStatus(elementName, attribute) == AttributeStatus.distinguished; 1120 } 1121 1122 static final Set<String> METADATA = new HashSet<>(Arrays.asList("references", "standard", "draft")); 1123 addUnmodifiable(Set<String> comment, String addition)1124 static final Set<String> addUnmodifiable(Set<String> comment, String addition) { 1125 if (comment == null) { 1126 return Collections.singleton(addition); 1127 } else { 1128 comment = new LinkedHashSet<>(comment); 1129 comment.add(addition); 1130 return Collections.unmodifiableSet(comment); 1131 } 1132 } 1133 1134 public class IllegalByDtdException extends RuntimeException { 1135 private static final long serialVersionUID = 1L; 1136 public final String elementName; 1137 public final String attributeName; 1138 public final String attributeValue; 1139 IllegalByDtdException(String elementName, String attributeName, String attributeValue)1140 public IllegalByDtdException(String elementName, String attributeName, String attributeValue) { 1141 this.elementName = elementName; 1142 this.attributeName = attributeName; 1143 this.attributeValue = attributeValue; 1144 } 1145 1146 @Override getMessage()1147 public String getMessage() { 1148 return "Dtd " + dtdType 1149 + " doesn’t allow " 1150 + "element=" + elementName 1151 + (attributeName == null ? "" : ", attribute: " + attributeName) 1152 + (attributeValue == null ? "" : ", attributeValue: " + attributeValue); 1153 } 1154 } 1155 1156 //@SuppressWarnings("unused") isDeprecated(String elementName, String attributeName, String attributeValue)1157 public boolean isDeprecated(String elementName, String attributeName, String attributeValue) { 1158 Element element = nameToElement.get(elementName); 1159 if (element == null) { 1160 throw new IllegalByDtdException(elementName, attributeName, attributeValue); 1161 } else if (element.isDeprecatedElement) { 1162 return true; 1163 } 1164 if ("*".equals(attributeName) || "_q".equals(attributeName)) { 1165 return false; 1166 } 1167 Attribute attribute = element.getAttributeNamed(attributeName); 1168 if (attribute == null) { 1169 throw new IllegalByDtdException(elementName, attributeName, attributeValue); 1170 } else if (attribute.isDeprecatedAttribute) { 1171 return true; 1172 } 1173 return attribute.deprecatedValues.contains(attributeValue); // don't need special test for "*" 1174 } 1175 isOrdered(String elementName)1176 public boolean isOrdered(String elementName) { 1177 Element element = nameToElement.get(elementName); 1178 if (element == null) { 1179 if (elementName.startsWith("icu:")) { 1180 return false; 1181 } 1182 throw new IllegalByDtdException(elementName, null, null); 1183 } 1184 return element.isOrderedElement; 1185 } 1186 getAttributeStatus(String elementName, String attributeName)1187 public AttributeStatus getAttributeStatus(String elementName, String attributeName) { 1188 if ("_q".equals(attributeName)) { 1189 return AttributeStatus.distinguished; // special case 1190 } 1191 Element element = nameToElement.get(elementName); 1192 if (element == null) { 1193 if (elementName.startsWith("icu:")) { 1194 return AttributeStatus.distinguished; 1195 } 1196 throw new IllegalByDtdException(elementName, attributeName, null); 1197 } 1198 Attribute attribute = element.getAttributeNamed(attributeName); 1199 if (attribute == null) { 1200 if (elementName.startsWith("icu:")) { 1201 return AttributeStatus.distinguished; 1202 } 1203 throw new IllegalByDtdException(elementName, attributeName, null); 1204 } 1205 return attribute.attributeStatus; 1206 } 1207 1208 // The default is a map comparator, which compares numbers as numbers, and strings with UCA 1209 private static MapComparator<String> valueOrdering = new MapComparator<String>().setErrorOnMissing(false).freeze(); 1210 1211 static MapComparator<String> dayValueOrder = new MapComparator<String>().add( 1212 "sun", "mon", "tue", "wed", "thu", "fri", "sat").freeze(); 1213 static MapComparator<String> dayPeriodOrder = new MapComparator<String>().add( 1214 "midnight", "am", "noon", "pm", 1215 "morning1", "morning2", "afternoon1", "afternoon2", "evening1", "evening2", "night1", "night2", 1216 // The ones on the following line are no longer used actively. Can be removed later? 1217 "earlyMorning", "morning", "midDay", "afternoon", "evening", "night", "weeHours").freeze(); 1218 static MapComparator<String> listPatternOrder = new MapComparator<String>().add( 1219 "start", "middle", "end", "2", "3").freeze(); 1220 static MapComparator<String> widthOrder = new MapComparator<String>().add( 1221 "abbreviated", "narrow", "short", "wide", "all").freeze(); 1222 static MapComparator<String> lengthOrder = new MapComparator<String>().add( 1223 "full", "long", "medium", "short").freeze(); 1224 static MapComparator<String> dateFieldOrder = new MapComparator<String>().add( 1225 "era", "era-short", "era-narrow", 1226 "year", "year-short", "year-narrow", 1227 "quarter", "quarter-short", "quarter-narrow", 1228 "month", "month-short", "month-narrow", 1229 "week", "week-short", "week-narrow", 1230 "weekOfMonth", "weekOfMonth-short", "weekOfMonth-narrow", 1231 "day", "day-short", "day-narrow", 1232 "dayOfYear", "dayOfYear-short", "dayOfYear-narrow", 1233 "weekday", "weekday-short", "weekday-narrow", 1234 "weekdayOfMonth", "weekdayOfMonth-short", "weekdayOfMonth-narrow", 1235 "sun", "sun-short", "sun-narrow", 1236 "mon", "mon-short", "mon-narrow", 1237 "tue", "tue-short", "tue-narrow", 1238 "wed", "wed-short", "wed-narrow", 1239 "thu", "thu-short", "thu-narrow", 1240 "fri", "fri-short", "fri-narrow", 1241 "sat", "sat-short", "sat-narrow", 1242 "dayperiod-short", "dayperiod", "dayperiod-narrow", 1243 "hour", "hour-short", "hour-narrow", 1244 "minute", "minute-short", "minute-narrow", 1245 "second", "second-short", "second-narrow", 1246 "zone", "zone-short", "zone-narrow").freeze(); 1247 static MapComparator<String> unitOrder = new MapComparator<String>().add( 1248 "acceleration-g-force", "acceleration-meter-per-second-squared", 1249 "angle-revolution", "angle-radian", "angle-degree", "angle-arc-minute", "angle-arc-second", 1250 "area-square-kilometer", "area-hectare", "area-square-meter", "area-square-centimeter", 1251 "area-square-mile", "area-acre", "area-square-yard", "area-square-foot", "area-square-inch", 1252 "area-dunam", 1253 "concentr-karat", 1254 "concentr-milligram-per-deciliter", "concentr-millimole-per-liter", 1255 "concentr-part-per-million", "concentr-percent", "concentr-permille", "concentr-permyriad", 1256 "concentr-mole", 1257 "consumption-liter-per-kilometer", "consumption-liter-per-100kilometers", 1258 "consumption-mile-per-gallon", "consumption-mile-per-gallon-imperial", 1259 "digital-petabyte", "digital-terabyte", "digital-terabit", "digital-gigabyte", "digital-gigabit", 1260 "digital-megabyte", "digital-megabit", "digital-kilobyte", "digital-kilobit", 1261 "digital-byte", "digital-bit", 1262 "duration-century", "duration-decade", 1263 "duration-year", "duration-year-person", 1264 "duration-month", "duration-month-person", 1265 "duration-week", "duration-week-person", 1266 "duration-day", "duration-day-person", 1267 "duration-hour", "duration-minute", "duration-second", 1268 "duration-millisecond", "duration-microsecond", "duration-nanosecond", 1269 "electric-ampere", "electric-milliampere", "electric-ohm", "electric-volt", 1270 "energy-kilocalorie", "energy-calorie", "energy-foodcalorie", "energy-kilojoule", "energy-joule", "energy-kilowatt-hour", 1271 "energy-electronvolt", 1272 "energy-british-thermal-unit", 1273 "energy-therm-us", 1274 "force-pound-force", 1275 "force-newton", 1276 "frequency-gigahertz", "frequency-megahertz", "frequency-kilohertz", "frequency-hertz", 1277 "graphics-em", "graphics-pixel", "graphics-megapixel", 1278 "graphics-pixel-per-centimeter", "graphics-pixel-per-inch", 1279 "graphics-dot-per-centimeter", "graphics-dot-per-inch", 1280 "length-kilometer", "length-meter", "length-decimeter", "length-centimeter", 1281 "length-millimeter", "length-micrometer", "length-nanometer", "length-picometer", 1282 "length-mile", "length-yard", "length-foot", "length-inch", 1283 "length-parsec", "length-light-year", "length-astronomical-unit", 1284 "length-furlong", "length-fathom", 1285 "length-nautical-mile", "length-mile-scandinavian", 1286 "length-point", 1287 "length-solar-radius", 1288 "light-lux", 1289 "light-solar-luminosity", 1290 "mass-metric-ton", "mass-kilogram", "mass-gram", "mass-milligram", "mass-microgram", 1291 "mass-ton", "mass-stone", "mass-pound", "mass-ounce", 1292 "mass-ounce-troy", "mass-carat", 1293 "mass-dalton", 1294 "mass-earth-mass", 1295 "mass-solar-mass", 1296 "power-gigawatt", "power-megawatt", "power-kilowatt", "power-watt", "power-milliwatt", 1297 "power-horsepower", 1298 "pressure-millimeter-of-mercury", 1299 "pressure-pound-per-square-inch", "pressure-inch-hg", "pressure-bar", "pressure-millibar", "pressure-atmosphere", 1300 "pressure-pascal", 1301 "pressure-hectopascal", 1302 "pressure-kilopascal", 1303 "pressure-megapascal", 1304 "speed-kilometer-per-hour", "speed-meter-per-second", "speed-mile-per-hour", "speed-knot", 1305 "temperature-generic", "temperature-celsius", "temperature-fahrenheit", "temperature-kelvin", 1306 "torque-pound-foot", 1307 "torque-newton-meter", 1308 "volume-cubic-kilometer", "volume-cubic-meter", "volume-cubic-centimeter", 1309 "volume-cubic-mile", "volume-cubic-yard", "volume-cubic-foot", "volume-cubic-inch", 1310 "volume-megaliter", "volume-hectoliter", "volume-liter", "volume-deciliter", "volume-centiliter", "volume-milliliter", 1311 "volume-pint-metric", "volume-cup-metric", 1312 "volume-acre-foot", 1313 "volume-bushel", "volume-gallon", "volume-gallon-imperial", "volume-quart", "volume-pint", "volume-cup", 1314 "volume-fluid-ounce", "volume-fluid-ounce-imperial", "volume-tablespoon", "volume-teaspoon", 1315 "volume-barrel").freeze(); 1316 1317 static MapComparator<String> countValueOrder = new MapComparator<String>().add( 1318 "0", "1", "zero", "one", "two", "few", "many", "other").freeze(); 1319 static MapComparator<String> unitLengthOrder = new MapComparator<String>().add( 1320 "long", "short", "narrow").freeze(); 1321 static MapComparator<String> currencyFormatOrder = new MapComparator<String>().add( 1322 "standard", "accounting").freeze(); 1323 static Comparator<String> zoneOrder = StandardCodes.make().getTZIDComparator(); 1324 1325 static final Comparator<String> COMP = (Comparator) CLDRConfig.getInstance().getCollator(); 1326 1327 // Hack for US 1328 static final Comparator<String> UNICODE_SET_COMPARATOR = new Comparator<String>() { 1329 @Override 1330 public int compare(String o1, String o2) { 1331 if (o1.contains("{")) { 1332 o1 = o1.replace("{", ""); 1333 } 1334 if (o2.contains("{")) { 1335 o2 = o2.replace("{", ""); 1336 } 1337 return COMP.compare(o1, o2); 1338 } 1339 1340 }; 1341 getAttributeValueComparator(String element, String attribute)1342 public static Comparator<String> getAttributeValueComparator(String element, String attribute) { 1343 return getAttributeValueComparator(DtdType.ldml, element, attribute); 1344 } 1345 getAttributeValueComparator(DtdType type, String element, String attribute)1346 static Comparator<String> getAttributeValueComparator(DtdType type, String element, String attribute) { 1347 // The default is a map comparator, which compares numbers as numbers, and strings with UCA 1348 Comparator<String> comp = valueOrdering; 1349 if (type != DtdType.ldml && type != DtdType.ldmlICU) { 1350 return comp; 1351 } 1352 if (attribute.equals("day")) { // && (element.startsWith("weekend") 1353 comp = dayValueOrder; 1354 } else if (attribute.equals("type")) { 1355 if (element.endsWith("FormatLength")) { 1356 comp = lengthOrder; 1357 } else if (element.endsWith("Width")) { 1358 comp = widthOrder; 1359 } else if (element.equals("day")) { 1360 comp = dayValueOrder; 1361 } else if (element.equals("field")) { 1362 comp = dateFieldOrder; 1363 } else if (element.equals("zone")) { 1364 comp = zoneOrder; 1365 } else if (element.equals("listPatternPart")) { 1366 comp = listPatternOrder; 1367 } else if (element.equals("currencyFormat")) { 1368 comp = currencyFormatOrder; 1369 } else if (element.equals("unitLength")) { 1370 comp = unitLengthOrder; 1371 } else if (element.equals("unit")) { 1372 comp = unitOrder; 1373 } else if (element.equals("dayPeriod")) { 1374 comp = dayPeriodOrder; 1375 } 1376 } else if (attribute.equals("count") && !element.equals("minDays")) { 1377 comp = countValueOrder; 1378 } else if (attribute.equals("cp") && element.equals("annotation")) { 1379 comp = UNICODE_SET_COMPARATOR; 1380 } 1381 return comp; 1382 } 1383 1384 /** 1385 * Comparator for attributes in CLDR files 1386 */ 1387 private static AttributeValueComparator ldmlAvc = new AttributeValueComparator() { 1388 @Override 1389 public int compare(String element, String attribute, String value1, String value2) { 1390 Comparator<String> comp = getAttributeValueComparator(element, attribute); 1391 return comp.compare(value1, value2); 1392 } 1393 }; 1394 hasValue(String elementName)1395 public boolean hasValue(String elementName) { 1396 return nameToElement.get(elementName).type == ElementType.PCDATA; 1397 } 1398 isMetadata(XPathParts pathPlain)1399 public boolean isMetadata(XPathParts pathPlain) { 1400 for (String s : pathPlain.getElements()) { 1401 Element e = getElementFromName().get(s); 1402 if (e.elementStatus == ElementStatus.metadata) { 1403 return true; 1404 } 1405 } 1406 return false; 1407 } 1408 isMetadataOld(DtdType dtdType2, XPathParts pathPlain)1409 public static boolean isMetadataOld(DtdType dtdType2, XPathParts pathPlain) { 1410 // TODO Don't use hard-coded list; instead add to DTD annotations 1411 final String element1 = pathPlain.getElement(1); 1412 final String element2 = pathPlain.getElement(2); 1413 final String elementN = pathPlain.getElement(-1); 1414 switch (dtdType2) { 1415 case ldml: 1416 switch (element1) { 1417 case "generation": 1418 case "metadata": 1419 return true; 1420 } 1421 break; 1422 case ldmlBCP47: 1423 switch (element1) { 1424 case "generation": 1425 case "version": 1426 return true; 1427 } 1428 break; 1429 ////supplementalData/transforms/transform[@source="am"][@target="am_FONIPA"][@direction="forward"]/comment 1430 case supplementalData: 1431 // these are NOT under /metadata/ but are actually metadata 1432 switch (element1) { 1433 case "generation": 1434 case "version": 1435 case "validity": 1436 case "references": 1437 case "coverageLevels": 1438 return true; 1439 case "transforms": 1440 return elementN.equals("comment"); 1441 case "metadata": 1442 // these ARE under /metadata/, but many others under /metadata/ are NOT actually metadata. 1443 switch (element2) { 1444 case "validity": 1445 case "serialElements": 1446 case "suppress": 1447 case "distinguishing": 1448 case "blocking": 1449 case "casingData": 1450 return true; 1451 } 1452 break; 1453 } 1454 break; 1455 default: 1456 } 1457 return false; 1458 } 1459 isDeprecated(XPathParts pathPlain)1460 public boolean isDeprecated(XPathParts pathPlain) { 1461 for (int i = 0; i < pathPlain.size(); ++i) { 1462 String elementName = pathPlain.getElement(i); 1463 if (isDeprecated(elementName, "*", null)) { 1464 return true; 1465 } 1466 for (String attribute : pathPlain.getAttributeKeys(i)) { 1467 String attributeValue = pathPlain.getAttributeValue(i, attribute); 1468 if (isDeprecated(elementName, attribute, attributeValue)) { 1469 return true; 1470 } 1471 } 1472 } 1473 return false; 1474 } 1475 1476 public final static Splitter SPACE_SPLITTER = Splitter.on(CharMatcher.whitespace()).trimResults().omitEmptyStrings(); 1477 public final static Splitter BAR_SPLITTER = Splitter.on('|').trimResults().omitEmptyStrings(); 1478 public final static Splitter CR_SPLITTER = Splitter.on(CharMatcher.anyOf("\n\r")).trimResults().omitEmptyStrings(); 1479 1480 private static class XPathPartsSet { 1481 private final Set<XPathParts> list = new LinkedHashSet<>(); 1482 addElement(String element)1483 private void addElement(String element) { 1484 if (list.isEmpty()) { 1485 list.add(new XPathParts().addElement(element)); 1486 } else { 1487 for (XPathParts item : list) { 1488 item.addElement(element); 1489 } 1490 } 1491 } 1492 addAttribute(String attribute, String attributeValue)1493 private void addAttribute(String attribute, String attributeValue) { 1494 for (XPathParts item : list) { 1495 item.addAttribute(attribute, attributeValue); 1496 } 1497 } 1498 setElement(int i, String string)1499 private void setElement(int i, String string) { 1500 for (XPathParts item : list) { 1501 item.setElement(i, string); 1502 } 1503 } 1504 addAttributes(String attribute, List<String> attributeValues)1505 private void addAttributes(String attribute, List<String> attributeValues) { 1506 if (attributeValues.size() == 1) { 1507 addAttribute(attribute, attributeValues.iterator().next()); 1508 } else { 1509 // duplicate all the items in the list with the given values 1510 Set<XPathParts> newList = new LinkedHashSet<>(); 1511 for (XPathParts item : list) { 1512 for (String attributeValue : attributeValues) { 1513 XPathParts newItem = item.cloneAsThawed(); 1514 newItem.addAttribute(attribute, attributeValue); 1515 newList.add(newItem); 1516 } 1517 } 1518 list.clear(); 1519 list.addAll(newList); 1520 } 1521 } 1522 toStrings()1523 private ImmutableSet<String> toStrings() { 1524 Builder<String> result = new ImmutableSet.Builder<>(); 1525 1526 for (XPathParts item : list) { 1527 result.add(item.toString()); 1528 } 1529 return result.build(); 1530 } 1531 1532 @Override toString()1533 public String toString() { 1534 return list.toString(); 1535 } 1536 } 1537 getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras)1538 public Set<String> getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras) { 1539 extras.clear(); 1540 Map<String, String> valueAttributes = new HashMap<>(); 1541 XPathPartsSet pathResult = new XPathPartsSet(); 1542 String element = null; 1543 for (int i = 0; i < pathPlain.size(); ++i) { 1544 element = pathPlain.getElement(i); 1545 pathResult.addElement(element); 1546 valueAttributes.clear(); 1547 for (String attribute : pathPlain.getAttributeKeys(i)) { 1548 AttributeStatus status = getAttributeStatus(element, attribute); 1549 final String attributeValue = pathPlain.getAttributeValue(i, attribute); 1550 switch (status) { 1551 case distinguished: 1552 AttributeType attrType = getAttributeType(element, attribute); 1553 if (attrType == AttributeType.NMTOKENS) { 1554 pathResult.addAttributes(attribute, SPACE_SPLITTER.splitToList(attributeValue)); 1555 } else { 1556 pathResult.addAttribute(attribute, attributeValue); 1557 } 1558 break; 1559 case value: 1560 valueAttributes.put(attribute, attributeValue); 1561 break; 1562 case metadata: 1563 break; 1564 } 1565 } 1566 if (!valueAttributes.isEmpty()) { 1567 boolean hasValue = hasValue(element); 1568 // if it doesn't have a value, we construct new child elements, with _ prefix 1569 // if it does have a value, we have to play a further trick, since 1570 // we can't have a value and child elements at the same level. 1571 // So we use a _ suffix on the element. 1572 if (hasValue) { 1573 pathResult.setElement(i, element + "_"); 1574 } else { 1575 int debug = 0; 1576 } 1577 for (Entry<String, String> attributeAndValue : valueAttributes.entrySet()) { 1578 final String attribute = attributeAndValue.getKey(); 1579 final String attributeValue = attributeAndValue.getValue(); 1580 1581 Set<String> pathsShort = pathResult.toStrings(); 1582 AttributeType attrType = getAttributeType(element, attribute); 1583 for (String pathShort : pathsShort) { 1584 pathShort += "/_" + attribute; 1585 if (attrType == AttributeType.NMTOKENS) { 1586 for (String valuePart : SPACE_SPLITTER.split(attributeValue)) { 1587 extras.put(pathShort, valuePart); 1588 } 1589 } else { 1590 extras.put(pathShort, attributeValue); 1591 } 1592 } 1593 } 1594 if (hasValue) { 1595 pathResult.setElement(i, element); // restore 1596 } 1597 } 1598 } 1599 // Only add the path if it could have a value, looking at the last element 1600 if (!hasValue(element)) { 1601 return null; 1602 } 1603 return pathResult.toStrings(); 1604 } 1605 getAttributeType(String elementName, String attributeName)1606 public AttributeType getAttributeType(String elementName, String attributeName) { 1607 Attribute attr = getAttribute(elementName, attributeName); 1608 return (attr != null) ? attr.type : null; 1609 } 1610 getAttribute(String elementName, String attributeName)1611 public Attribute getAttribute(String elementName, String attributeName) { 1612 Element element = nameToElement.get(elementName); 1613 return (element != null) ? element.getAttributeNamed(attributeName) : null; 1614 } 1615 1616 // TODO: add support for following to DTD annotations, and rework API 1617 1618 static final Set<String> SPACED_VALUES = ImmutableSet.of( 1619 "idValidity", 1620 "languageGroup"); 1621 getValueSplitter(XPathParts pathPlain)1622 public static Splitter getValueSplitter(XPathParts pathPlain) { 1623 if (!Collections.disjoint(pathPlain.getElements(), SPACED_VALUES)) { 1624 return SPACE_SPLITTER; 1625 } else if (pathPlain.getElement(-1).equals("annotation") 1626 && !pathPlain.getAttributeKeys(-1).contains("tts")) { 1627 return BAR_SPLITTER; 1628 } 1629 return CR_SPLITTER; 1630 } 1631 isComment(XPathParts pathPlain, String line)1632 public static boolean isComment(XPathParts pathPlain, String line) { 1633 if (pathPlain.contains("transform")) { 1634 if (line.startsWith("#")) { 1635 return true; 1636 } 1637 } 1638 return false; 1639 } 1640 isExtraSplit(String extraPath)1641 public static boolean isExtraSplit(String extraPath) { 1642 if (extraPath.endsWith("/_type") && extraPath.startsWith("//supplementalData/metaZones/mapTimezones")) { 1643 return true; 1644 } 1645 return false; 1646 } 1647 1648 /** 1649 * Return the value status for an EAV 1650 */ getValueStatus(String elementName, String attributeName, String value)1651 public ValueStatus getValueStatus(String elementName, String attributeName, String value) { 1652 Element element = nameToElement.get(elementName); 1653 if (element == null) { 1654 return ValueStatus.invalid; 1655 } 1656 Attribute attr = element.getAttributeNamed(attributeName); 1657 if (attr == null) { 1658 return ValueStatus.invalid; 1659 } 1660 return attr.getValueStatus(value); 1661 } 1662 1663 /** 1664 * Return element-attribute pairs with non-enumerated values, for quick checks. 1665 */ getNonEnumerated(Map<String,String> matchValues)1666 public Multimap<String, String> getNonEnumerated(Map<String,String> matchValues) { 1667 Multimap<String,String> nonEnumeratedElementToAttribute = TreeMultimap.create(); // make tree for ease of debugging 1668 for (Entry<String, Element> entry : nameToElement.entrySet()) { 1669 Element element = entry.getValue(); 1670 for (Attribute attribute : element.attributes.keySet()) { 1671 if (attribute.type != AttributeType.ENUMERATED_TYPE) { 1672 String elementName = element.getName(); 1673 String attrName = attribute.getName(); 1674 nonEnumeratedElementToAttribute.put(elementName, attrName); 1675 if (attribute.matchValue != null) { 1676 matchValues.put(elementName + "\t" + attrName, attribute.matchValue.getName()); 1677 } 1678 } 1679 } 1680 } 1681 return ImmutableSetMultimap.copyOf(nonEnumeratedElementToAttribute); 1682 } 1683 } 1684