1 package org.unicode.cldr.util; 2 3 import com.google.common.base.CharMatcher; 4 import com.google.common.base.Joiner; 5 import com.google.common.base.Splitter; 6 import com.google.common.collect.ImmutableMultimap; 7 import com.google.common.collect.ImmutableSet; 8 import com.google.common.collect.ImmutableSet.Builder; 9 import com.google.common.collect.ImmutableSetMultimap; 10 import com.google.common.collect.Multimap; 11 import com.google.common.collect.TreeMultimap; 12 import com.ibm.icu.impl.Relation; 13 import com.ibm.icu.text.Transform; 14 import java.io.File; 15 import java.io.StringReader; 16 import java.util.ArrayList; 17 import java.util.Arrays; 18 import java.util.Collection; 19 import java.util.Collections; 20 import java.util.Comparator; 21 import java.util.HashMap; 22 import java.util.HashSet; 23 import java.util.Iterator; 24 import java.util.LinkedHashMap; 25 import java.util.LinkedHashSet; 26 import java.util.List; 27 import java.util.Locale; 28 import java.util.Map; 29 import java.util.Map.Entry; 30 import java.util.Set; 31 import java.util.Stack; 32 import java.util.TreeMap; 33 import java.util.concurrent.ConcurrentHashMap; 34 import java.util.concurrent.ConcurrentMap; 35 import java.util.regex.Pattern; 36 import org.unicode.cldr.util.DtdData.Element.ValueConstraint; 37 import org.unicode.cldr.util.MatchValue.LiteralMatchValue; 38 import org.unicode.cldr.util.personname.PersonNameFormatter; 39 40 /** 41 * An immutable object that contains the structure of a DTD. 42 * 43 * @author markdavis 44 */ 45 public class DtdData extends XMLFileReader.SimpleHandler { 46 private static final String COMMENT_PREFIX = System.lineSeparator() + " "; 47 private static final boolean SHOW_ALL = CldrUtility.getProperty("show_all", false); 48 private static final boolean USE_SYNTHESIZED = false; 49 50 private static final boolean DEBUG = false; 51 private static final Pattern FILLER = PatternCache.get("[^-a-zA-Z0-9#_:]"); 52 53 private final Relation<String, Attribute> nameToAttributes = 54 Relation.of(new TreeMap<String, Set<Attribute>>(), LinkedHashSet.class); 55 private Map<String, Element> nameToElement = new HashMap<>(); 56 private MapComparator<String> elementComparator; 57 private MapComparator<String> attributeComparator; 58 59 // TODO Make this data driven. See https://unicode-org.atlassian.net/browse/CLDR-17321 60 public static final Multimap<DtdType, String> HACK_PCDATA_ALLOWS_EMPTY = 61 ImmutableMultimap.<DtdType, String>builder() 62 .putAll( 63 DtdType.ldml, 64 "nameOrderLocales", 65 "foreignSpaceReplacement", 66 "nativeSpaceReplacement", 67 "language", 68 "script", 69 "region", 70 "variant", 71 "territory") 72 .putAll(DtdType.supplementalData, "variable", "attributeValues") 73 .build(); 74 75 public final Element ROOT; 76 public final Element PCDATA = elementFrom("#PCDATA"); 77 public final Element ANY = elementFrom("ANY"); 78 public final DtdType dtdType; 79 public final String version; 80 private Element lastElement; 81 private Attribute lastAttribute; 82 private Set<String> preCommentCache; 83 private DtdComparator dtdComparator; 84 85 public enum AttributeStatus { 86 distinguished("§d"), 87 value("§v"), 88 metadata("§m︎"); 89 public final String shortName; 90 AttributeStatus(String shortName)91 AttributeStatus(String shortName) { 92 this.shortName = shortName; 93 } 94 getShortName(AttributeStatus status)95 public static String getShortName(AttributeStatus status) { 96 return status == null ? "" : status.shortName; 97 } 98 } 99 100 public enum Mode { 101 REQUIRED("#REQUIRED"), 102 OPTIONAL("#IMPLIED"), 103 FIXED("#FIXED"), 104 NULL("null"); 105 106 public final String source; 107 Mode(String s)108 Mode(String s) { 109 source = s; 110 } 111 forString(String mode)112 public static Mode forString(String mode) { 113 for (Mode value : Mode.values()) { 114 if (value.source.equals(mode)) { 115 return value; 116 } 117 } 118 if (mode == null) { 119 return NULL; 120 } 121 throw new IllegalArgumentException(mode); 122 } 123 } 124 125 public enum AttributeType { 126 CDATA, 127 ID, 128 IDREF, 129 IDREFS, 130 ENTITY, 131 ENTITIES, 132 NMTOKEN, 133 NMTOKENS, 134 ENUMERATED_TYPE 135 } 136 137 static final Set<String> DRAFT_ON_NON_LEAF_ALLOWED = 138 ImmutableSet.of("collation", "transform", "unitPreferenceData", "rulesetGrouping"); 139 140 public static class Attribute implements Named { 141 private static final Joiner JOINER_COMMA_SPACE = Joiner.on(", "); 142 public static final String AUG_TRAIL = "⟫"; 143 public static final String AUG_LEAD = "⟪"; 144 public static final String ENUM_TRAIL = "⟩"; 145 public static final String ENUM_LEAD = "⟨"; 146 public static final Pattern LEAD_TRAIL = 147 Pattern.compile( 148 "(.*[" 149 + AUG_LEAD 150 + ENUM_LEAD 151 + "])(.*)([" 152 + AUG_TRAIL 153 + ENUM_TRAIL 154 + "].*)"); 155 public final String name; 156 public final Element element; 157 public final Mode mode; 158 public final String defaultValue; 159 public final AttributeType type; 160 public final Map<String, Integer> values; // immutable 161 private final Set<String> commentsPre; 162 private Set<String> commentsPost; 163 private boolean isDeprecatedAttribute; 164 private boolean attributeAllowsUEscape = false; 165 public AttributeStatus attributeStatus = 166 AttributeStatus.distinguished; // default unless reset by annotations, or for xml: 167 // attributes 168 private Set<String> deprecatedValues = Collections.emptySet(); 169 public MatchValue matchValue; 170 private final Comparator<String> attributeValueComparator; 171 Attribute( DtdType dtdType, Element element2, String aName, Mode mode2, String[] split, String value2, Set<String> firstComment)172 private Attribute( 173 DtdType dtdType, 174 Element element2, 175 String aName, 176 Mode mode2, 177 String[] split, 178 String value2, 179 Set<String> firstComment) { 180 commentsPre = firstComment; 181 element = element2; 182 name = aName.intern(); 183 if (name.equals("draft") // normally never permitted on elements with children, but 184 // special cases... 185 && dtdType == DtdType.ldml 186 && !DRAFT_ON_NON_LEAF_ALLOWED.contains(element.getName())) { 187 int elementChildrenCount = element.getChildren().size(); 188 if (elementChildrenCount > 1 189 || elementChildrenCount == 1 190 && !element.getChildren() 191 .keySet() 192 .iterator() 193 .next() 194 .getName() 195 .equals("cp")) { 196 isDeprecatedAttribute = true; 197 if (DEBUG) { 198 System.out.println(element.getName() + ":" + element.getChildren()); 199 } 200 } 201 } else if (name.startsWith("xml:")) { 202 attributeStatus = AttributeStatus.metadata; 203 } 204 mode = mode2; 205 defaultValue = value2 == null ? null : value2.intern(); 206 AttributeType _type = AttributeType.ENUMERATED_TYPE; 207 Map<String, Integer> _values = Collections.emptyMap(); 208 if (split.length == 1) { 209 try { 210 _type = AttributeType.valueOf(split[0]); 211 } catch (Exception e) { 212 } 213 } 214 type = _type; 215 216 if (_type == AttributeType.ENUMERATED_TYPE) { 217 LinkedHashMap<String, Integer> temp = new LinkedHashMap<>(); 218 for (String part : split) { 219 if (part.length() != 0) { 220 temp.put(part.intern(), temp.size()); 221 } 222 } 223 _values = Collections.unmodifiableMap(temp); 224 } 225 values = _values; 226 attributeValueComparator = getAttributeValueComparator(dtdType, element.name, name); 227 } 228 229 @Override toString()230 public String toString() { 231 return element.name + ":" + name; 232 } 233 getSampleValue()234 public String getSampleValue() { 235 return type == AttributeType.ENUMERATED_TYPE 236 ? (values.containsKey("year") ? "year" : values.keySet().iterator().next()) 237 : matchValue != null ? matchValue.getSample() : MatchValue.DEFAULT_SAMPLE; 238 } 239 appendDtdString(StringBuilder b)240 public StringBuilder appendDtdString(StringBuilder b) { 241 Attribute a = this; 242 b.append("<!ATTLIST " + element.name + " " + a.name); 243 boolean first; 244 if (a.type == AttributeType.ENUMERATED_TYPE) { 245 b.append(" ("); 246 first = true; 247 for (String s : a.values.keySet()) { 248 if (deprecatedValues.contains(s)) { 249 continue; 250 } 251 if (first) { 252 first = false; 253 } else { 254 b.append(" | "); 255 } 256 b.append(s); 257 } 258 b.append(")"); 259 } else { 260 b.append(' ').append(a.type); 261 } 262 if (a.mode != Mode.NULL) { 263 b.append(" ").append(a.mode.source); 264 } 265 if (a.defaultValue != null) { 266 b.append(" \"").append(a.defaultValue).append('"'); 267 } 268 b.append(" >"); 269 return b; 270 } 271 features()272 public String features() { 273 return (type == AttributeType.ENUMERATED_TYPE 274 ? values.keySet().toString() 275 : type.toString()) 276 + (mode == Mode.NULL ? "" : ", mode=" + mode) 277 + (defaultValue == null ? "" : ", default=" + defaultValue); 278 } 279 280 @Override getName()281 public String getName() { 282 return name; 283 } 284 285 private static Splitter COMMA = Splitter.on(',').trimResults(); 286 addComment(String commentIn)287 public void addComment(String commentIn) { 288 if (commentIn.startsWith("@")) { 289 switch (commentIn) { 290 case "@METADATA": 291 attributeStatus = AttributeStatus.metadata; 292 break; 293 case "@VALUE": 294 attributeStatus = AttributeStatus.value; 295 break; 296 case "@DEPRECATED": 297 isDeprecatedAttribute = true; 298 break; 299 case "@ALLOWS_UESC": 300 attributeAllowsUEscape = true; 301 break; 302 303 default: 304 int colonPos = commentIn.indexOf(':'); 305 if (colonPos < 0) { 306 throw new IllegalArgumentException( 307 element.name 308 + " " 309 + name 310 + "= : Unrecognized ATTLIST annotation: " 311 + commentIn); 312 } 313 String command = commentIn.substring(0, colonPos); 314 String argument = commentIn.substring(colonPos + 1); 315 switch (command) { 316 case "@DEPRECATED": 317 deprecatedValues = 318 Collections.unmodifiableSet( 319 new HashSet<>(COMMA.splitToList(argument))); 320 break; 321 case "@MATCH": 322 if (matchValue != null) { 323 throw new IllegalArgumentException( 324 element.name 325 + " " 326 + name 327 + "= : Conflicting @MATCH: " 328 + matchValue.getName() 329 + " & " 330 + argument); 331 } 332 matchValue = MatchValue.of(argument); 333 break; 334 default: 335 throw new IllegalArgumentException( 336 element.name 337 + " " 338 + name 339 + "= : Unrecognized ATTLIST annotation: " 340 + commentIn); 341 } 342 } 343 return; 344 } 345 commentsPost = addUnmodifiable(commentsPost, commentIn.trim()); 346 } 347 348 /** Special version of identity; only considers name and name of element */ 349 @Override equals(Object obj)350 public boolean equals(Object obj) { 351 if (!(obj instanceof Attribute)) { 352 return false; 353 } 354 Attribute that = (Attribute) obj; 355 return name.equals(that.name) 356 && element.name.equals( 357 that.element.name) // don't use plain element: circularity 358 // not relevant to identity 359 // && Objects.equals(comment, that.comment) 360 // && mode.equals(that.mode) 361 // && Objects.equals(defaultValue, that.defaultValue) 362 // && type.equals(that.type) 363 // && values.equals(that.values) 364 ; 365 } 366 367 /** Special version of identity; only considers name and name of element */ 368 @Override hashCode()369 public int hashCode() { 370 return name.hashCode() * 37 371 + element.name.hashCode() // don't use plain element: circularity 372 // not relevant to identity 373 // ) * 37 + Objects.hashCode(comment)) * 37 374 // + mode.hashCode()) * 37 375 // + Objects.hashCode(defaultValue)) * 37 376 // + type.hashCode()) * 37 377 // + values.hashCode() 378 ; 379 } 380 isDeprecated()381 public boolean isDeprecated() { 382 return isDeprecatedAttribute; 383 } 384 allowsUEscape()385 public boolean allowsUEscape() { 386 return attributeAllowsUEscape; 387 } 388 isDeprecatedValue(String value)389 public boolean isDeprecatedValue(String value) { 390 return deprecatedValues.contains(value); 391 } 392 getStatus()393 public AttributeStatus getStatus() { 394 return attributeStatus; 395 } 396 getValueStatus(String value)397 public ValueStatus getValueStatus(String value) { 398 return deprecatedValues.contains(value) 399 ? ValueStatus.invalid 400 : type == AttributeType.ENUMERATED_TYPE 401 ? (values.containsKey(value) ? ValueStatus.valid : ValueStatus.invalid) 402 : matchValue == null 403 ? ValueStatus.unknown 404 : matchValue.is(value) 405 ? ValueStatus.valid 406 : ValueStatus.invalid; 407 } 408 getMatchString()409 public String getMatchString() { 410 return type == AttributeType.ENUMERATED_TYPE 411 ? ENUM_LEAD + JOINER_COMMA_SPACE.join(values.keySet()) + ENUM_TRAIL 412 : matchValue != null ? AUG_LEAD + matchValue.toString() + AUG_TRAIL : ""; 413 } 414 getMatchLiterals()415 public Set<String> getMatchLiterals() { 416 if (type == AttributeType.ENUMERATED_TYPE) { 417 return values.keySet(); 418 } else if (matchValue != null && matchValue instanceof LiteralMatchValue) { 419 return ((LiteralMatchValue) matchValue).getItems(); 420 } 421 return null; 422 } 423 getMatchingName(Map<Attribute, Integer> attributes)424 public Attribute getMatchingName(Map<Attribute, Integer> attributes) { 425 for (Attribute attribute : attributes.keySet()) { 426 if (name.equals(attribute.getName())) { 427 return attribute; 428 } 429 } 430 return null; 431 } 432 } 433 434 public enum ValueStatus { 435 invalid, 436 unknown, 437 valid 438 } 439 DtdData(DtdType type, String version)440 private DtdData(DtdType type, String version) { 441 this.dtdType = type; 442 this.ROOT = elementFrom(type.rootElement()); 443 this.version = version; 444 } 445 addAttribute(String eName, String aName, String type, String mode, String value)446 private void addAttribute(String eName, String aName, String type, String mode, String value) { 447 Attribute a = 448 new Attribute( 449 dtdType, 450 nameToElement.get(eName), 451 aName, 452 Mode.forString(mode), 453 FILLER.split(type), 454 value, 455 preCommentCache); 456 preCommentCache = null; 457 getAttributesFromName().put(aName, a); 458 CldrUtility.putNew(a.element.attributes, a, a.element.attributes.size()); 459 lastElement = null; 460 lastAttribute = a; 461 } 462 463 public enum ElementType { 464 EMPTY, 465 ANY, 466 PCDATA("(#PCDATA)"), 467 CHILDREN; 468 public final String source; 469 ElementType(String s)470 private ElementType(String s) { 471 source = s; 472 } 473 ElementType()474 private ElementType() { 475 source = name(); 476 } 477 } 478 479 interface Named { getName()480 String getName(); 481 } 482 483 public enum ElementStatus { 484 regular, 485 metadata 486 } 487 488 public static class Element implements Named { 489 public enum ValueConstraint { 490 empty, 491 nonempty, 492 any 493 } 494 495 public final String name; 496 private String rawModel; 497 private ElementType type; 498 private final Map<Element, Integer> children = new LinkedHashMap<>(); 499 private final Map<Attribute, Integer> attributes = new LinkedHashMap<>(); 500 private Set<String> commentsPre; 501 private Set<String> commentsPost; 502 private String model; 503 private boolean isOrderedElement; 504 private boolean isDeprecatedElement; 505 private boolean isTechPreviewElement; 506 private ElementStatus elementStatus = ElementStatus.regular; 507 private ValueConstraint valueConstraint = ValueConstraint.nonempty; 508 Element(String name2)509 private Element(String name2) { 510 name = name2.intern(); 511 } 512 setChildren(DtdData dtdData, String model, Set<String> precomments)513 private void setChildren(DtdData dtdData, String model, Set<String> precomments) { 514 this.commentsPre = precomments; 515 rawModel = model; 516 this.model = clean(model); 517 valueConstraint = ValueConstraint.empty; 518 if (model.equals("EMPTY")) { 519 type = ElementType.EMPTY; 520 return; 521 } 522 type = ElementType.CHILDREN; 523 for (String part : FILLER.split(model)) { 524 if (part.length() != 0) { 525 if (part.equals("#PCDATA")) { 526 type = ElementType.PCDATA; 527 if (HACK_PCDATA_ALLOWS_EMPTY.get(dtdData.dtdType).contains(name)) { 528 // TODO move to @ annotation in .dtd file 529 valueConstraint = ValueConstraint.any; 530 } else { 531 valueConstraint = ValueConstraint.nonempty; 532 } 533 } else if (part.equals("ANY")) { 534 type = ElementType.ANY; 535 } else { 536 CldrUtility.putNew(children, dtdData.elementFrom(part), children.size()); 537 } 538 } 539 } 540 if ((type == ElementType.CHILDREN) == (children.size() == 0) 541 && !model.startsWith("(#PCDATA|cp")) { 542 throw new IllegalArgumentException( 543 "CLDR does not permit Mixed content. " + name + ":" + model); 544 } 545 } 546 547 static final Pattern CLEANER1 = PatternCache.get("([,|(])(?=\\S)"); 548 static final Pattern CLEANER2 = PatternCache.get("(?=\\S)([|)])"); 549 clean(String model2)550 private String clean(String model2) { 551 // (x) -> ( x ); 552 // x,y -> x, y 553 // x|y -> x | y 554 String result = CLEANER1.matcher(model2).replaceAll("$1 "); 555 result = CLEANER2.matcher(result).replaceAll(" $1"); 556 return result.equals(model2) ? model2 : result; // for debugging 557 } 558 containsAttribute(String string)559 public boolean containsAttribute(String string) { 560 for (Attribute a : attributes.keySet()) { 561 if (a.name.equals(string)) { 562 return true; 563 } 564 } 565 return false; 566 } 567 568 @Override toString()569 public String toString() { 570 return name; 571 } 572 toDtdString()573 public String toDtdString() { 574 return "<!ELEMENT " + name + " " + getRawModel() + " >"; 575 } 576 getType()577 public ElementType getType() { 578 return type; 579 } 580 getChildren()581 public Map<Element, Integer> getChildren() { 582 return Collections.unmodifiableMap(children); 583 } 584 getAttributes()585 public Map<Attribute, Integer> getAttributes() { 586 return Collections.unmodifiableMap(attributes); 587 } 588 589 @Override getName()590 public String getName() { 591 return name; 592 } 593 getChildNamed(String string)594 public Element getChildNamed(String string) { 595 for (Element e : children.keySet()) { 596 if (e.name.equals(string)) { 597 return e; 598 } 599 } 600 return null; 601 } 602 getAttributeNamed(String string)603 public Attribute getAttributeNamed(String string) { 604 for (Attribute a : attributes.keySet()) { 605 if (a.name.equals(string)) { 606 return a; 607 } 608 } 609 return null; 610 } 611 addComment(String addition)612 public void addComment(String addition) { 613 if (addition.startsWith("@")) { 614 // there are exactly 4 cases: deprecated, ordered, techPreview and metadata 615 switch (addition) { 616 case "@ORDERED": 617 isOrderedElement = true; 618 break; 619 case "@DEPRECATED": 620 isDeprecatedElement = true; 621 break; 622 case "@METADATA": 623 elementStatus = ElementStatus.metadata; 624 break; 625 case "@TECHPREVIEW": 626 isTechPreviewElement = true; 627 break; 628 default: 629 if (addition.startsWith("@MATCH") || addition.startsWith("@VALUE")) { 630 // Try to catch this case 631 throw new IllegalArgumentException( 632 name 633 + ": Unrecognized ELEMENT annotation (this isn't ATTLIST!): " 634 + addition); 635 } else { 636 throw new IllegalArgumentException( 637 name + ": Unrecognized ELEMENT annotation: " + addition); 638 } 639 } 640 return; 641 } 642 commentsPost = addUnmodifiable(commentsPost, addition.trim()); 643 } 644 645 /** Special version of equals. Only the name is considered in the identity. */ 646 @Override equals(Object obj)647 public boolean equals(Object obj) { 648 if (!(obj instanceof Element)) { 649 return false; 650 } 651 Element that = (Element) obj; 652 return name.equals(that.name) 653 // not relevant to the identity of the object 654 // && Objects.equals(comment, that.comment) 655 // && type == that.type 656 // && attributes.equals(that.attributes) 657 // && children.equals(that.children) 658 ; 659 } 660 661 /** Special version of hashcode. Only the name is considered in the identity. */ 662 @Override hashCode()663 public int hashCode() { 664 return name.hashCode() 665 // not relevant to the identity of the object 666 // * 37 + Objects.hashCode(comment) 667 // ) * 37 + Objects.hashCode(type) 668 // ) * 37 + attributes.hashCode() 669 // ) * 37 + children.hashCode() 670 ; 671 } 672 isDeprecated()673 public boolean isDeprecated() { 674 return isDeprecatedElement; 675 } 676 isOrdered()677 public boolean isOrdered() { 678 return isOrderedElement; 679 } 680 isTechPreview()681 public boolean isTechPreview() { 682 return isTechPreviewElement; 683 } 684 getElementStatus()685 public ElementStatus getElementStatus() { 686 return elementStatus; 687 } 688 getValueConstraint()689 public ValueConstraint getValueConstraint() { 690 return valueConstraint; 691 } 692 693 /** 694 * @return the rawModel 695 */ getRawModel()696 public String getRawModel() { 697 return rawModel; 698 } 699 } 700 elementFrom(String name)701 private Element elementFrom(String name) { 702 Element result = nameToElement.get(name); 703 if (result == null) { 704 nameToElement.put(name, result = new Element(name)); 705 } 706 return result; 707 } 708 addElement(String name2, String model)709 private void addElement(String name2, String model) { 710 Element element = elementFrom(name2); 711 element.setChildren(this, model, preCommentCache); 712 preCommentCache = null; 713 lastElement = element; 714 lastAttribute = null; 715 } 716 addComment(String comment)717 private void addComment(String comment) { 718 comment = comment.trim(); 719 if (preCommentCache != null || comment.startsWith("#")) { // the precomments are "sticky" 720 if (comment.startsWith("@")) { 721 throw new IllegalArgumentException( 722 "@ annotation comment must follow element or attribute, without intervening # comment"); 723 } 724 preCommentCache = addUnmodifiable(preCommentCache, comment); 725 } else if (lastElement != null) { 726 lastElement.addComment(comment); 727 } else if (lastAttribute != null) { 728 lastAttribute.addComment(comment); 729 } else { 730 if (comment.startsWith("@")) { 731 throw new IllegalArgumentException( 732 "@ annotation comment must follow element or attribute, without intervening # comment"); 733 } 734 preCommentCache = addUnmodifiable(preCommentCache, comment); 735 } 736 } 737 738 // TODO hide this 739 /** 740 * @deprecated 741 */ 742 @Deprecated 743 @Override handleElementDecl(String name, String model)744 public void handleElementDecl(String name, String model) { 745 if (SHOW_ALL) { 746 // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, 747 // contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, 748 // listPatterns?, collations?, posix?, segmentations?, rbnf?, annotations?, metadata?, 749 // references?, special*))) > 750 System.out.println(System.lineSeparator() + "<!ELEMENT " + name + " " + model + " >"); 751 } 752 addElement(name, model); 753 } 754 755 // TODO hide this 756 /** 757 * @deprecated 758 */ 759 @Deprecated 760 @Override handleStartDtd(String name, String publicId, String systemId)761 public void handleStartDtd(String name, String publicId, String systemId) { 762 DtdType explicitDtdType = DtdType.valueOf(name); 763 if (explicitDtdType != dtdType && explicitDtdType != dtdType.rootType) { 764 throw new IllegalArgumentException("Mismatch in dtdTypes"); 765 } 766 } 767 768 /** 769 * @deprecated 770 */ 771 @Deprecated 772 @Override handleAttributeDecl( String eName, String aName, String type, String mode, String value)773 public void handleAttributeDecl( 774 String eName, String aName, String type, String mode, String value) { 775 if (SHOW_ALL) { 776 // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | 777 // false ) #IMPLIED > 778 // <!ATTLIST version number CDATA #REQUIRED > 779 // <!ATTLIST version cldrVersion CDATA #FIXED "27" > 780 781 System.out.println( 782 "<!ATTLIST " 783 + eName 784 + " " 785 + aName 786 + " " 787 + type 788 + " " 789 + mode 790 + (value == null ? "" : " \"" + value + "\"") 791 + " >"); 792 } 793 // HACK for 1.1.1 794 if (eName.equals("draft")) { 795 eName = "week"; 796 } 797 addAttribute(eName, aName, type, mode, value); 798 } 799 800 /** 801 * @deprecated 802 */ 803 @Deprecated 804 @Override handleComment(String path, String comment)805 public void handleComment(String path, String comment) { 806 if (comment.contains("Copyright")) { 807 // Zap the copyright comment, replace it with the current one. 808 comment = CldrUtility.getCopyrightString(); 809 } 810 if (SHOW_ALL) { 811 // <!-- true and false are deprecated. --> 812 System.out.println("<!-- " + comment.trim() + " -->"); 813 } 814 addComment(comment); 815 } 816 817 // TODO hide this 818 /** 819 * @deprecated 820 */ 821 @Deprecated 822 @Override handleEndDtd()823 public void handleEndDtd() { 824 throw new XMLFileReader.AbortException(); 825 } 826 827 /** 828 * Note that it always gets the trunk version 829 * 830 * @deprecated depends on static config, use {@link DtdData#getInstance(DtdType, File)} instead 831 */ 832 @Deprecated getInstance(DtdType type)833 public static DtdData getInstance(DtdType type) { 834 return getInstance(type, CLDRConfig.getInstance().getCldrBaseDirectory()); 835 } 836 837 /** Special form using version, used only by tests, etc. */ getInstance(DtdType type, String version)838 public static DtdData getInstance(DtdType type, String version) { 839 // Map out versions that had no DTD 840 if (version != null) { 841 switch (version) { 842 case "1.1.1": 843 version = "1.1"; 844 break; 845 case "1.4.1": 846 version = "1.4"; 847 break; 848 case "1.5.1": 849 version = "1.5.0.1"; 850 break; 851 default: 852 } 853 } 854 File directory = 855 version == null 856 ? CLDRConfig.getInstance().getCldrBaseDirectory() 857 : new File(CLDRPaths.ARCHIVE_DIRECTORY + "/cldr-" + version); 858 859 return getInstance(type, version, directory); 860 } 861 862 private static final ConcurrentMap<Pair<DtdType, File>, DtdData> CACHE = 863 new ConcurrentHashMap<>(); 864 865 /** 866 * Normal version of DtdData Get a DtdData, given the CLDR root directory. 867 * 868 * @param type which DtdType to return 869 * @param directory the CLDR Root directory, which contains the "common" directory. 870 * @return 871 */ getInstance(DtdType type, File directory)872 public static DtdData getInstance(DtdType type, File directory) { 873 Pair<DtdType, File> key = new Pair<>(type, directory); 874 DtdData data = CACHE.computeIfAbsent(key, k -> getInstance(type, null, directory)); 875 return data; 876 } 877 getInstance(DtdType type, String version, File directory)878 private static DtdData getInstance(DtdType type, String version, File directory) { 879 DtdData simpleHandler = new DtdData(type, version); 880 XMLFileReader xfr = new XMLFileReader().setHandler(simpleHandler); 881 if (type != type.rootType) { 882 // read the real first, then add onto it. 883 readFile(type.rootType, xfr, directory); 884 } 885 readFile(type, xfr, directory); 886 // HACK 887 if (type == DtdType.ldmlICU) { 888 Element special = simpleHandler.nameToElement.get("special"); 889 for (String extraElementName : 890 Arrays.asList( 891 "icu:breakIteratorData", 892 "icu:UCARules", 893 "icu:scripts", 894 "icu:transforms", 895 "icu:ruleBasedNumberFormats", 896 "icu:isLeapMonth", 897 "icu:version", 898 "icu:breakDictionaryData", 899 "icu:depends")) { 900 Element extraElement = simpleHandler.nameToElement.get(extraElementName); 901 special.children.put(extraElement, special.children.size()); 902 } 903 } 904 if (simpleHandler.ROOT.children.size() == 0) { 905 throw new IllegalArgumentException( 906 "Internal Error: DtdData.getInstance(" 907 + type 908 + ", ...): readFile() failed to return any children!"); 909 // should never happen 910 } 911 simpleHandler.finish(); 912 simpleHandler.freeze(); 913 return simpleHandler; 914 } 915 finish()916 private void finish() { 917 dtdComparator = new DtdComparator(); 918 } 919 readFile(DtdType type, XMLFileReader xfr, File directory)920 public static void readFile(DtdType type, XMLFileReader xfr, File directory) { 921 File file = new File(directory, type.dtdPath); 922 StringReader s = 923 new StringReader( 924 "<?xml version='1.0' encoding='UTF-8' ?>" 925 + "<!DOCTYPE " 926 + type 927 + " SYSTEM '" 928 + file.getAbsolutePath() 929 + "'>"); 930 try { 931 xfr.read(type.toString(), s, -1, true); // DTD_TYPE_TO_FILE.get(type) 932 } catch (IllegalArgumentException iae) { 933 // rethrow 934 throw new IllegalArgumentException("Error while reading " + type, iae); 935 } 936 } 937 freeze()938 private void freeze() { 939 if (version == null) { // only generate for new versions 940 MergeLists<String> elementMergeList = new MergeLists<>(); 941 elementMergeList.add(dtdType.toString()); 942 MergeLists<String> attributeMergeList = new MergeLists<>(); 943 attributeMergeList.add("_q"); 944 945 for (Element element : nameToElement.values()) { 946 if (element.children.size() > 0) { 947 Collection<String> names = getNames(element.children.keySet()); 948 elementMergeList.add(names); 949 if (DEBUG) { 950 System.out.println(element.getName() + "\t→\t" + names); 951 } 952 } 953 if (element.attributes.size() > 0) { 954 Collection<String> names = getNames(element.attributes.keySet()); 955 attributeMergeList.add(names); 956 if (DEBUG) { 957 System.out.println(element.getName() + "\t→\t@" + names); 958 } 959 } 960 } 961 List<String> elementList = elementMergeList.merge(); 962 List<String> attributeList = attributeMergeList.merge(); 963 if (DEBUG) { 964 System.out.println("Element Ordering:\t" + elementList); 965 System.out.println("Attribute Ordering:\t" + attributeList); 966 } 967 elementComparator = new MapComparator<>(elementList).setErrorOnMissing(true).freeze(); 968 attributeComparator = 969 new MapComparator<>(attributeList).setErrorOnMissing(true).freeze(); 970 } 971 nameToAttributes.freeze(); 972 nameToElement = Collections.unmodifiableMap(nameToElement); 973 } 974 getNames(Collection<? extends Named> keySet)975 private Collection<String> getNames(Collection<? extends Named> keySet) { 976 List<String> result = new ArrayList<>(); 977 for (Named e : keySet) { 978 result.add(e.getName()); 979 } 980 return result; 981 } 982 983 public enum DtdItem { 984 ELEMENT, 985 ATTRIBUTE, 986 ATTRIBUTE_VALUE 987 } 988 989 public interface AttributeValueComparator { compare(String element, String attribute, String value1, String value2)990 public int compare(String element, String attribute, String value1, String value2); 991 } 992 getDtdComparator(AttributeValueComparator avc)993 public Comparator<String> getDtdComparator(AttributeValueComparator avc) { 994 return dtdComparator; 995 } 996 getDtdComparator()997 public DtdComparator getDtdComparator() { 998 return dtdComparator; 999 } 1000 1001 public class DtdComparator implements Comparator<String> { 1002 @Override compare(String path1, String path2)1003 public int compare(String path1, String path2) { 1004 XPathParts a = XPathParts.getFrozenInstance(path1); 1005 XPathParts b = XPathParts.getFrozenInstance(path2); 1006 return xpathComparator(a, b); 1007 } 1008 xpathComparator(XPathParts a, XPathParts b)1009 public int xpathComparator(XPathParts a, XPathParts b) { 1010 // there must always be at least one element 1011 String baseA = a.getElement(0); 1012 String baseB = b.getElement(0); 1013 if (!ROOT.name.equals(baseA) || !ROOT.name.equals(baseB)) { 1014 throw new IllegalArgumentException( 1015 "Comparing different DTDs: " + ROOT.name + ", " + baseA + ", " + baseB); 1016 } 1017 int min = Math.min(a.size(), b.size()); 1018 Element parent = ROOT; 1019 Element elementA; 1020 for (int i = 1; i < min; ++i, parent = elementA) { 1021 // add extra test for "fake" elements, used in diffing. they always start with _ 1022 String elementRawA = a.getElement(i); 1023 String elementRawB = b.getElement(i); 1024 if (elementRawA.startsWith("_")) { 1025 return elementRawB.startsWith("_") ? elementRawA.compareTo(elementRawB) : -1; 1026 } else if (elementRawB.startsWith("_")) { 1027 return 1; 1028 } 1029 // 1030 elementA = nameToElement.get(elementRawA); 1031 Element elementB = nameToElement.get(elementRawB); 1032 if (elementA != elementB) { 1033 int aa = parent.children.get(elementA); 1034 int bb = parent.children.get(elementB); 1035 return aa - bb; 1036 } 1037 int countA = a.getAttributeCount(i); 1038 int countB = b.getAttributeCount(i); 1039 if (countA == 0 && countB == 0) { 1040 continue; 1041 } 1042 // we have two ways to compare the attributes. One based on the dtd, 1043 // and one based on explicit comparators 1044 1045 // at this point the elements are the same and correspond to elementA 1046 // in the dtd 1047 1048 // Handle the special added elements 1049 String aqValue = a.getAttributeValue(i, "_q"); 1050 if (aqValue != null) { 1051 String bqValue = b.getAttributeValue(i, "_q"); 1052 if (!aqValue.equals(bqValue)) { 1053 int aValue = Integer.parseInt(aqValue); 1054 int bValue = Integer.parseInt(bqValue); 1055 return aValue - bValue; 1056 } 1057 --countA; 1058 --countB; 1059 } 1060 1061 attributes: 1062 for (Entry<Attribute, Integer> attr : elementA.attributes.entrySet()) { 1063 Attribute main = attr.getKey(); 1064 String valueA = a.getAttributeValue(i, main.name); 1065 String valueB = b.getAttributeValue(i, main.name); 1066 if (valueA == null) { 1067 if (valueB != null) { 1068 return -1; 1069 } 1070 } else if (valueB == null) { 1071 return 1; 1072 } else if (valueA.equals(valueB)) { 1073 --countA; 1074 --countB; 1075 if (countA == 0 && countB == 0) { 1076 break attributes; 1077 } 1078 continue; // TODO 1079 } else if (main.attributeValueComparator != null) { 1080 return main.attributeValueComparator.compare(valueA, valueB); 1081 } else if (main.values.size() != 0) { 1082 int aa = main.values.get(valueA); 1083 int bb = main.values.get(valueB); 1084 return aa - bb; 1085 } else { 1086 return valueA.compareTo(valueB); 1087 } 1088 } 1089 if (countA != 0 || countB != 0) { 1090 throw new IllegalArgumentException(); 1091 } 1092 } 1093 return a.size() - b.size(); 1094 } 1095 } 1096 getAttributeComparator()1097 public MapComparator<String> getAttributeComparator() { 1098 return attributeComparator; 1099 } 1100 getElementComparator()1101 public MapComparator<String> getElementComparator() { 1102 return elementComparator; 1103 } 1104 getAttributesFromName()1105 public Relation<String, Attribute> getAttributesFromName() { 1106 return nameToAttributes; 1107 } 1108 getElementFromName()1109 public Map<String, Element> getElementFromName() { 1110 return nameToElement; 1111 } 1112 1113 @Override toString()1114 public String toString() { 1115 StringBuilder b = new StringBuilder(); 1116 // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, 1117 // contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, 1118 // listPatterns?, collations?, posix?, segmentations?, rbnf?, metadata?, references?, 1119 // special*))) > 1120 // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false 1121 // ) #IMPLIED > <!-- true and false are deprecated. --> 1122 Seen seen = new Seen(dtdType); 1123 seen.seenElements.add(ANY); 1124 seen.seenElements.add(PCDATA); 1125 toString(ROOT, b, seen); 1126 1127 // Hack for ldmlIcu: catch the items that are not mentioned in the original 1128 int currentEnd = b.length(); 1129 for (Element e : nameToElement.values()) { 1130 toString(e, b, seen); 1131 } 1132 if (currentEnd != b.length()) { 1133 b.insert( 1134 currentEnd, 1135 System.lineSeparator() 1136 + System.lineSeparator() 1137 + "<!-- Elements not reachable from root! -->" 1138 + System.lineSeparator()); 1139 } 1140 return b.toString(); 1141 } 1142 1143 static final class Seen { 1144 Set<Element> seenElements = new HashSet<>(); 1145 Set<Attribute> seenAttributes = new HashSet<>(); 1146 Seen(DtdType dtdType)1147 public Seen(DtdType dtdType) { 1148 if (dtdType.rootType == dtdType) { 1149 return; 1150 } 1151 DtdData otherData = DtdData.getInstance(dtdType.rootType); 1152 walk(otherData, otherData.ROOT); 1153 seenElements.remove(otherData.nameToElement.get("special")); 1154 } 1155 walk(DtdData otherData, Element current)1156 private void walk(DtdData otherData, Element current) { 1157 seenElements.add(current); 1158 seenAttributes.addAll(current.attributes.keySet()); 1159 for (Element e : current.children.keySet()) { 1160 walk(otherData, e); 1161 } 1162 } 1163 } 1164 getDescendents(Element start, Set<Element> toAddTo)1165 public Set<Element> getDescendents(Element start, Set<Element> toAddTo) { 1166 if (!toAddTo.contains(start)) { 1167 toAddTo.add(start); 1168 for (Element e : start.children.keySet()) { 1169 getDescendents(e, toAddTo); 1170 } 1171 } 1172 return toAddTo; 1173 } 1174 toString(Element current, StringBuilder b, Seen seen)1175 private void toString(Element current, StringBuilder b, Seen seen) { 1176 boolean first = true; 1177 if (seen.seenElements.contains(current)) { 1178 return; 1179 } 1180 seen.seenElements.add(current); 1181 boolean elementDeprecated = isDeprecated(current.name, "*", "*"); 1182 1183 showComments(b, current.commentsPre, true); 1184 b.append("\n\n<!ELEMENT " + current.name + " " + current.model + " >"); 1185 if (USE_SYNTHESIZED) { 1186 Element aliasElement = getElementFromName().get("alias"); 1187 // b.append(current.rawChildren); 1188 if (!current.children.isEmpty()) { 1189 LinkedHashSet<Element> elements = new LinkedHashSet<>(current.children.keySet()); 1190 boolean hasAlias = aliasElement != null && elements.remove(aliasElement); 1191 // boolean hasSpecial = specialElement != null && elements.remove(specialElement); 1192 if (hasAlias) { 1193 b.append("(alias |"); 1194 } 1195 b.append("("); 1196 // <!ELEMENT transformNames ( alias | (transformName | special)* ) > 1197 // <!ELEMENT layout ( alias | (orientation*, inList*, inText*, special*) ) > 1198 1199 for (Element e : elements) { 1200 if (first) { 1201 first = false; 1202 } else { 1203 b.append(", "); 1204 } 1205 b.append(e.name); 1206 if (e.type != ElementType.PCDATA) { 1207 b.append("*"); 1208 } 1209 } 1210 if (hasAlias) { 1211 b.append(")"); 1212 } 1213 b.append(")"); 1214 } else { 1215 b.append(current.type == null ? "???" : current.type.source); 1216 } 1217 b.append(">"); 1218 } 1219 showComments(b, current.commentsPost, false); 1220 if (isOrdered(current.name)) { 1221 b.append(COMMENT_PREFIX + "<!--@ORDERED-->"); 1222 } 1223 if (isTechPreview(current.name)) { 1224 b.append(COMMENT_PREFIX + "<!--@TECHPREVIEW-->"); 1225 } 1226 if (current.getElementStatus() != ElementStatus.regular) { 1227 b.append( 1228 COMMENT_PREFIX 1229 + "<!--@" 1230 + current.getElementStatus().toString().toUpperCase(Locale.ROOT) 1231 + "-->"); 1232 } 1233 if (elementDeprecated) { 1234 b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->"); 1235 } 1236 1237 LinkedHashSet<String> deprecatedValues = new LinkedHashSet<>(); 1238 1239 for (Attribute a : current.attributes.keySet()) { 1240 if (seen.seenAttributes.contains(a)) { 1241 continue; 1242 } 1243 seen.seenAttributes.add(a); 1244 boolean attributeDeprecated = 1245 elementDeprecated || isDeprecated(current.name, a.name, "*"); 1246 boolean attributeUEscaped = allowsUEscape(current.name, a.name, "*"); 1247 deprecatedValues.clear(); 1248 1249 showComments(b, a.commentsPre, true); 1250 b.append("\n<!ATTLIST " + current.name + " " + a.name); 1251 if (a.type == AttributeType.ENUMERATED_TYPE) { 1252 b.append(" ("); 1253 first = true; 1254 for (String s : a.values.keySet()) { 1255 if (first) { 1256 first = false; 1257 } else { 1258 b.append(" | "); 1259 } 1260 b.append(s); 1261 if (!attributeDeprecated && isDeprecated(current.name, a.name, s)) { 1262 deprecatedValues.add(s); 1263 } 1264 } 1265 b.append(")"); 1266 } else { 1267 b.append(' ').append(a.type); 1268 } 1269 if (a.mode != Mode.NULL) { 1270 b.append(" ").append(a.mode.source); 1271 } 1272 if (a.defaultValue != null) { 1273 b.append(" \"").append(a.defaultValue).append('"'); 1274 } 1275 b.append(" >"); 1276 showComments(b, a.commentsPost, false); 1277 // if (attributeDeprecated != deprecatedComment) { 1278 // System.out.println("*** BAD DEPRECATION ***" + a); 1279 // } 1280 if (a.matchValue != null) { 1281 b.append(COMMENT_PREFIX + "<!--@MATCH:" + a.matchValue.getName() + "-->"); 1282 } 1283 if (METADATA.contains(a.name) || a.attributeStatus == AttributeStatus.metadata) { 1284 b.append(COMMENT_PREFIX + "<!--@METADATA-->"); 1285 } else if (!isDistinguishing(current.name, a.name)) { 1286 b.append(COMMENT_PREFIX + "<!--@VALUE-->"); 1287 } 1288 if (attributeDeprecated) { 1289 b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->"); 1290 } else if (!deprecatedValues.isEmpty()) { 1291 b.append( 1292 COMMENT_PREFIX 1293 + "<!--@DEPRECATED:" 1294 + Joiner.on(", ").join(deprecatedValues) 1295 + "-->"); 1296 } 1297 if (attributeUEscaped) { 1298 b.append(COMMENT_PREFIX + "<!--@ALLOWS_UESC-->"); 1299 } 1300 } 1301 if (current.children.size() > 0) { 1302 for (Element e : current.children.keySet()) { 1303 toString(e, b, seen); 1304 } 1305 } 1306 } 1307 showComments(StringBuilder b, Set<String> comments, boolean separate)1308 private void showComments(StringBuilder b, Set<String> comments, boolean separate) { 1309 if (comments == null) { 1310 return; 1311 } 1312 if (separate && b.length() != 0) { 1313 b.append(System.lineSeparator()); 1314 } 1315 for (String c : comments) { 1316 boolean deprecatedComment = false; // the following served its purpose... 1317 // c.toLowerCase(Locale.ENGLISH).contains("deprecat"); 1318 if (!deprecatedComment) { 1319 if (separate) { 1320 // special handling for very first comment 1321 if (b.length() == 0) { 1322 b.append("<!--") 1323 .append(System.lineSeparator()) 1324 .append(c) 1325 .append(System.lineSeparator()) 1326 .append("-->"); 1327 continue; 1328 } 1329 b.append(System.lineSeparator()); 1330 } else { 1331 b.append(COMMENT_PREFIX); 1332 } 1333 b.append("<!-- ").append(c).append(" -->"); 1334 } 1335 } 1336 } 1337 removeFirst(Collection<T> elements, Transform<T, Boolean> matcher)1338 public static <T> T removeFirst(Collection<T> elements, Transform<T, Boolean> matcher) { 1339 for (Iterator<T> it = elements.iterator(); it.hasNext(); ) { 1340 T item = it.next(); 1341 if (matcher.transform(item) == Boolean.TRUE) { 1342 it.remove(); 1343 return item; 1344 } 1345 } 1346 return null; 1347 } 1348 getElements()1349 public Set<Element> getElements() { 1350 return new LinkedHashSet<>(nameToElement.values()); 1351 } 1352 getAttributes()1353 public Set<Attribute> getAttributes() { 1354 return new LinkedHashSet<>(nameToAttributes.values()); 1355 } 1356 isDistinguishing(String elementName, String attribute)1357 public boolean isDistinguishing(String elementName, String attribute) { 1358 return getAttributeStatus(elementName, attribute) == AttributeStatus.distinguished; 1359 } 1360 1361 static final Set<String> METADATA = 1362 new HashSet<>(Arrays.asList("references", "standard", "draft")); 1363 addUnmodifiable(Set<String> comment, String addition)1364 static final Set<String> addUnmodifiable(Set<String> comment, String addition) { 1365 if (comment == null) { 1366 return Collections.singleton(addition); 1367 } else { 1368 comment = new LinkedHashSet<>(comment); 1369 comment.add(addition); 1370 return Collections.unmodifiableSet(comment); 1371 } 1372 } 1373 1374 public class IllegalByDtdException extends RuntimeException { 1375 private static final long serialVersionUID = 1L; 1376 public final String elementName; 1377 public final String attributeName; 1378 public final String attributeValue; 1379 IllegalByDtdException( String elementName, String attributeName, String attributeValue)1380 public IllegalByDtdException( 1381 String elementName, String attributeName, String attributeValue) { 1382 this.elementName = elementName; 1383 this.attributeName = attributeName; 1384 this.attributeValue = attributeValue; 1385 } 1386 1387 @Override getMessage()1388 public String getMessage() { 1389 return "Dtd " 1390 + dtdType 1391 + " doesn’t allow " 1392 + "element=" 1393 + elementName 1394 + (attributeName == null ? "" : ", attribute: " + attributeName) 1395 + (attributeValue == null ? "" : ", attributeValue: " + attributeValue); 1396 } 1397 } 1398 1399 // @SuppressWarnings("unused") isDeprecated(String elementName, String attributeName, String attributeValue)1400 public boolean isDeprecated(String elementName, String attributeName, String attributeValue) { 1401 Element element = getElementThrowingIfNull(elementName, null, null); 1402 if (element.isDeprecatedElement) { 1403 return true; 1404 } 1405 if ("*".equals(attributeName) || "_q".equals(attributeName)) { 1406 return false; 1407 } 1408 Attribute attribute = element.getAttributeNamed(attributeName); 1409 if (attribute == null) { 1410 throw new IllegalByDtdException(elementName, attributeName, attributeValue); 1411 } else if (attribute.isDeprecatedAttribute) { 1412 return true; 1413 } 1414 return attribute.deprecatedValues.contains( 1415 attributeValue); // don't need special test for "*" 1416 } 1417 allowsUEscape(String elementName, String attributeName, String attributeValue)1418 public boolean allowsUEscape(String elementName, String attributeName, String attributeValue) { 1419 Element element = getElementThrowingIfNull(elementName, null, null); 1420 Attribute attribute = element.getAttributeNamed(attributeName); 1421 if (attribute == null) { 1422 throw new IllegalByDtdException(elementName, attributeName, attributeValue); 1423 } else if (attribute.allowsUEscape()) { 1424 return true; 1425 } 1426 return false; 1427 } 1428 1429 /** 1430 * Returns whether an element (specified by its full name) is ordered. This method understands 1431 * all elements in the DTDs used (including the ICU extensions), but will throw 1432 * IllegalByDtdException for unknown elements. See CLDR-8614 for more background. 1433 */ isOrdered(String elementName)1434 public boolean isOrdered(String elementName) { 1435 Element element = getElementThrowingIfNull(elementName, null, null); 1436 return element.isOrdered(); 1437 } 1438 getElementThrowingIfNull( String elementName, String attributeName, String value)1439 public Element getElementThrowingIfNull( 1440 String elementName, String attributeName, String value) { 1441 Element element = nameToElement.get(elementName); 1442 if (element == null) { 1443 throw new IllegalByDtdException(elementName, attributeName, value); 1444 } 1445 return element; 1446 } 1447 1448 /** 1449 * Returns whether an element (specified by its full name) is a tech preview. This method 1450 * understands all elements in the DTDs used (including the ICU extensions), but will throw 1451 * IllegalByDtdException for unknown elements. See CLDR-8614 for more background. 1452 */ isTechPreview(String elementName)1453 public boolean isTechPreview(String elementName) { 1454 Element element = getElementThrowingIfNull(elementName, null, null); 1455 return element.isTechPreview(); 1456 } 1457 getAttributeStatus(String elementName, String attributeName)1458 public AttributeStatus getAttributeStatus(String elementName, String attributeName) { 1459 if ("_q".equals(attributeName)) { 1460 return AttributeStatus.distinguished; // special case 1461 } 1462 Element element = nameToElement.get(elementName); 1463 if (element == null) { 1464 if (elementName.startsWith("icu:")) { 1465 return AttributeStatus.distinguished; 1466 } 1467 throw new IllegalByDtdException(elementName, attributeName, null); 1468 } 1469 Attribute attribute = element.getAttributeNamed(attributeName); 1470 if (attribute == null) { 1471 if (elementName.startsWith("icu:")) { 1472 return AttributeStatus.distinguished; 1473 } 1474 throw new IllegalByDtdException(elementName, attributeName, null); 1475 } 1476 return attribute.attributeStatus; 1477 } 1478 1479 // The default is a map comparator, which compares numbers as numbers, and strings with UCA 1480 private static MapComparator<String> valueOrdering = 1481 new MapComparator<String>().setErrorOnMissing(false).freeze(); 1482 1483 static MapComparator<String> dayValueOrder = 1484 new MapComparator<String>() 1485 .add("sun", "mon", "tue", "wed", "thu", "fri", "sat") 1486 .freeze(); 1487 static MapComparator<String> dayPeriodOrder = 1488 new MapComparator<String>() 1489 .add( 1490 "midnight", 1491 "am", 1492 "noon", 1493 "pm", 1494 "morning1", 1495 "morning2", 1496 "afternoon1", 1497 "afternoon2", 1498 "evening1", 1499 "evening2", 1500 "night1", 1501 "night2", 1502 // The ones on the following line are no longer used actively. Can be 1503 // removed later? 1504 "earlyMorning", 1505 "morning", 1506 "midDay", 1507 "afternoon", 1508 "evening", 1509 "night", 1510 "weeHours") 1511 .freeze(); 1512 static MapComparator<String> dateTimeFormatOrder = 1513 new MapComparator<String>().add("standard", "atTime").freeze(); 1514 static MapComparator<String> listPatternOrder = 1515 new MapComparator<String>().add("start", "middle", "end", "2", "3").freeze(); 1516 static MapComparator<String> widthOrder = 1517 new MapComparator<String>() 1518 .add("abbreviated", "narrow", "short", "wide", "all") 1519 .freeze(); 1520 static MapComparator<String> lengthOrder = 1521 new MapComparator<String>().add("full", "long", "medium", "short").freeze(); 1522 static MapComparator<String> dateFieldOrder = 1523 new MapComparator<String>() 1524 .add( 1525 "era", 1526 "era-short", 1527 "era-narrow", 1528 "year", 1529 "year-short", 1530 "year-narrow", 1531 "quarter", 1532 "quarter-short", 1533 "quarter-narrow", 1534 "month", 1535 "month-short", 1536 "month-narrow", 1537 "week", 1538 "week-short", 1539 "week-narrow", 1540 "weekOfMonth", 1541 "weekOfMonth-short", 1542 "weekOfMonth-narrow", 1543 "day", 1544 "day-short", 1545 "day-narrow", 1546 "dayOfYear", 1547 "dayOfYear-short", 1548 "dayOfYear-narrow", 1549 "weekday", 1550 "weekday-short", 1551 "weekday-narrow", 1552 "weekdayOfMonth", 1553 "weekdayOfMonth-short", 1554 "weekdayOfMonth-narrow", 1555 "sun", 1556 "sun-short", 1557 "sun-narrow", 1558 "mon", 1559 "mon-short", 1560 "mon-narrow", 1561 "tue", 1562 "tue-short", 1563 "tue-narrow", 1564 "wed", 1565 "wed-short", 1566 "wed-narrow", 1567 "thu", 1568 "thu-short", 1569 "thu-narrow", 1570 "fri", 1571 "fri-short", 1572 "fri-narrow", 1573 "sat", 1574 "sat-short", 1575 "sat-narrow", 1576 "dayperiod-short", 1577 "dayperiod", 1578 "dayperiod-narrow", 1579 "hour", 1580 "hour-short", 1581 "hour-narrow", 1582 "minute", 1583 "minute-short", 1584 "minute-narrow", 1585 "second", 1586 "second-short", 1587 "second-narrow", 1588 "zone", 1589 "zone-short", 1590 "zone-narrow") 1591 .freeze(); 1592 static MapComparator<String> nameFieldOrder = 1593 new MapComparator<String>().add(PersonNameFormatter.ModifiedField.ALL_SAMPLES).freeze(); 1594 static MapComparator<String> orderValueOrder = 1595 new MapComparator<String>() 1596 .add(PersonNameFormatter.Order.ALL, Object::toString) 1597 .freeze(); 1598 static MapComparator<String> lengthValueOrder = 1599 new MapComparator<String>() 1600 .add(PersonNameFormatter.Length.ALL, Object::toString) 1601 .freeze(); 1602 static MapComparator<String> usageValueOrder = 1603 new MapComparator<String>() 1604 .add(PersonNameFormatter.Usage.ALL, Object::toString) 1605 .freeze(); 1606 static MapComparator<String> formalityValueOrder = 1607 new MapComparator<String>() 1608 .add(PersonNameFormatter.Formality.ALL, Object::toString) 1609 .freeze(); 1610 static MapComparator<String> sampleNameItemOrder = 1611 new MapComparator<String>() 1612 .add(PersonNameFormatter.SampleType.ALL, Object::toString) 1613 .freeze(); 1614 1615 // TODO We could build most of the above from the dtd data for literal values. That way they 1616 // would always be 1617 // in sync. 1618 getUnitOrder()1619 public static MapComparator<String> getUnitOrder() { 1620 return UnitOrderHolder.INSTANCE; 1621 } 1622 1623 private static final class UnitOrderHolder { 1624 private static final MapComparator<String> INSTANCE = 1625 // new MapComparator<String>() 1626 // 1627 // .add(Validity.getInstance().getCodeToStatus(LstrType.unit).keySet()) 1628 // .freeze(); 1629 // } 1630 new MapComparator<>( 1631 Arrays.asList( 1632 "acceleration-g-force", 1633 "acceleration-meter-per-square-second", 1634 "acceleration-meter-per-second-squared", // deprecated 1635 "angle-revolution", 1636 "angle-radian", 1637 "angle-degree", 1638 "angle-arc-minute", 1639 "angle-arc-second", 1640 "area-square-kilometer", 1641 "area-hectare", 1642 "area-square-meter", 1643 "area-square-centimeter", 1644 "area-square-mile", 1645 "area-acre", 1646 "area-square-yard", 1647 "area-square-foot", 1648 "area-square-inch", 1649 "area-dunam", 1650 "concentr-karat", 1651 "proportion-karat", // deprecated 1652 "concentr-milligram-ofglucose-per-deciliter", 1653 "concentr-milligram-per-deciliter", 1654 "concentr-millimole-per-liter", 1655 "concentr-item", 1656 "concentr-portion", 1657 "concentr-permillion", 1658 "concentr-part-per-million", // deprecated 1659 "concentr-percent", 1660 "concentr-permille", 1661 "concentr-permyriad", 1662 "concentr-mole", 1663 "concentr-ofglucose", 1664 "consumption-liter-per-kilometer", 1665 "consumption-liter-per-100-kilometer", 1666 "consumption-liter-per-100kilometers", // deprecated 1667 "consumption-mile-per-gallon", 1668 "consumption-mile-per-gallon-imperial", 1669 "digital-petabyte", 1670 "digital-terabyte", 1671 "digital-terabit", 1672 "digital-gigabyte", 1673 "digital-gigabit", 1674 "digital-megabyte", 1675 "digital-megabit", 1676 "digital-kilobyte", 1677 "digital-kilobit", 1678 "digital-byte", 1679 "digital-bit", 1680 "duration-century", 1681 "duration-decade", 1682 "duration-year", 1683 "duration-year-person", 1684 "duration-quarter", 1685 "duration-month", 1686 "duration-month-person", 1687 "duration-week", 1688 "duration-week-person", 1689 "duration-day", 1690 "duration-day-person", 1691 "duration-hour", 1692 "duration-minute", 1693 "duration-second", 1694 "duration-millisecond", 1695 "duration-microsecond", 1696 "duration-nanosecond", 1697 "electric-ampere", 1698 "electric-milliampere", 1699 "electric-ohm", 1700 "electric-volt", 1701 "energy-kilocalorie", 1702 "energy-calorie", 1703 "energy-foodcalorie", 1704 "energy-kilojoule", 1705 "energy-joule", 1706 "energy-kilowatt-hour", 1707 "energy-electronvolt", 1708 "energy-british-thermal-unit", 1709 "energy-therm-us", 1710 "force-pound-force", 1711 "force-newton", 1712 "force-kilowatt-hour-per-100-kilometer", 1713 "frequency-gigahertz", 1714 "frequency-megahertz", 1715 "frequency-kilohertz", 1716 "frequency-hertz", 1717 "graphics-em", 1718 "graphics-pixel", 1719 "graphics-megapixel", 1720 "graphics-pixel-per-centimeter", 1721 "graphics-pixel-per-inch", 1722 "graphics-dot-per-centimeter", 1723 "graphics-dot-per-inch", 1724 "graphics-dot", 1725 "length-earth-radius", 1726 "length-100-kilometer", 1727 "length-kilometer", 1728 "length-meter", 1729 "length-decimeter", 1730 "length-centimeter", 1731 "length-millimeter", 1732 "length-micrometer", 1733 "length-nanometer", 1734 "length-picometer", 1735 "length-mile", 1736 "length-yard", 1737 "length-foot", 1738 "length-inch", 1739 "length-parsec", 1740 "length-light-year", 1741 "length-astronomical-unit", 1742 "length-furlong", 1743 "length-fathom", 1744 "length-nautical-mile", 1745 "length-mile-scandinavian", 1746 "length-point", 1747 "length-solar-radius", 1748 "light-lux", 1749 "light-candela", 1750 "light-lumen", 1751 "light-solar-luminosity", 1752 "mass-tonne", 1753 "mass-metric-ton", 1754 "mass-kilogram", 1755 "mass-gram", 1756 "mass-milligram", 1757 "mass-microgram", 1758 "mass-ton", 1759 "mass-stone", 1760 "mass-pound", 1761 "mass-ounce", 1762 "mass-ounce-troy", 1763 "mass-carat", 1764 "mass-dalton", 1765 "mass-earth-mass", 1766 "mass-solar-mass", 1767 "mass-grain", 1768 "power-gigawatt", 1769 "power-megawatt", 1770 "power-kilowatt", 1771 "power-watt", 1772 "power-milliwatt", 1773 "power-horsepower", 1774 "pressure-millimeter-ofhg", 1775 "pressure-millimeter-of-mercury", // deprecated 1776 "pressure-ofhg", 1777 "pressure-pound-force-per-square-inch", 1778 "pressure-pound-per-square-inch", // deprecated 1779 "pressure-inch-ofhg", 1780 "pressure-inch-hg", // deprecated 1781 "pressure-bar", 1782 "pressure-millibar", 1783 "pressure-atmosphere", 1784 "pressure-pascal", 1785 "pressure-hectopascal", 1786 "pressure-kilopascal", 1787 "pressure-megapascal", 1788 "speed-kilometer-per-hour", 1789 "speed-meter-per-second", 1790 "speed-mile-per-hour", 1791 "speed-knot", 1792 "speed-beaufort", 1793 "temperature-generic", 1794 "temperature-celsius", 1795 "temperature-fahrenheit", 1796 "temperature-kelvin", 1797 "torque-pound-force-foot", 1798 "torque-pound-foot", // deprecated 1799 "torque-newton-meter", 1800 "volume-cubic-kilometer", 1801 "volume-cubic-meter", 1802 "volume-cubic-centimeter", 1803 "volume-cubic-mile", 1804 "volume-cubic-yard", 1805 "volume-cubic-foot", 1806 "volume-cubic-inch", 1807 "volume-megaliter", 1808 "volume-hectoliter", 1809 "volume-liter", 1810 "volume-deciliter", 1811 "volume-centiliter", 1812 "volume-milliliter", 1813 "volume-pint-metric", 1814 "volume-cup-metric", 1815 "volume-acre-foot", 1816 "volume-bushel", 1817 "volume-gallon", 1818 "volume-gallon-imperial", 1819 "volume-quart", 1820 "volume-pint", 1821 "volume-pint-imperial", 1822 "volume-cup", 1823 "volume-fluid-ounce", 1824 "volume-fluid-ounce-imperial", 1825 "volume-tablespoon", 1826 "volume-teaspoon", 1827 "volume-barrel", 1828 "volume-dessert-spoon", 1829 "volume-dessert-spoon-imperial", 1830 "volume-drop", 1831 "volume-dram", 1832 "volume-jigger", 1833 "volume-pinch", 1834 "volume-quart-imperial", 1835 "angle-steradian", 1836 "concentr-katal", 1837 "electric-coulomb", 1838 "electric-farad", 1839 "electric-henry", 1840 "electric-siemens", 1841 "energy-calorie-it", 1842 "energy-british-thermal-unit-it", 1843 "energy-becquerel", 1844 "energy-sievert", 1845 "energy-gray", 1846 "force-kilogram-force", 1847 "length-rod", 1848 "length-chain", 1849 "magnetic-tesla", 1850 "magnetic-weber", 1851 "temperature-rankine", 1852 "duration-fortnight", 1853 "mass-slug", 1854 "pressure-gasoline-energy-density", 1855 "length-rin", 1856 "length-sun", 1857 "length-shaku-length", 1858 "length-shaku-cloth", 1859 "length-ken", 1860 "length-jo-jp", 1861 "length-ri-jp", 1862 "area-bu-jp", 1863 "area-se-jp", 1864 "area-cho", 1865 "volume-kosaji", 1866 "volume-osaji", 1867 "volume-cup-jp", 1868 "volume-shaku", 1869 "volume-sai", 1870 "volume-to-jp", 1871 "volume-koku", 1872 "speed-light-speed", 1873 "mass-fun", 1874 "concentr-portion-per-1e9", 1875 "duration-night")) 1876 .freeze(); 1877 } 1878 1879 static MapComparator<String> countValueOrder = 1880 new MapComparator<String>() 1881 .add("0", "1", "zero", "one", "two", "few", "many", "other") 1882 .freeze(); 1883 static MapComparator<String> unitLengthOrder = 1884 new MapComparator<String>().add("long", "short", "narrow").freeze(); 1885 static MapComparator<String> currencyFormatOrder = 1886 new MapComparator<String>().add("standard", "accounting").freeze(); 1887 static Comparator<String> zoneOrder = StandardCodes.make().getTZIDComparator(); 1888 1889 static final Comparator<String> COMP = (Comparator) CLDRConfig.getInstance().getCollator(); 1890 1891 // Hack for US 1892 static final Comparator<String> UNICODE_SET_COMPARATOR = 1893 new Comparator<>() { 1894 @Override 1895 public int compare(String o1, String o2) { 1896 if (o1.contains("{")) { 1897 o1 = o1.replace("{", ""); 1898 } 1899 if (o2.contains("{")) { 1900 o2 = o2.replace("{", ""); 1901 } 1902 return COMP.compare(o1, o2); 1903 } 1904 }; 1905 getAttributeValueComparator(String element, String attribute)1906 public static Comparator<String> getAttributeValueComparator(String element, String attribute) { 1907 return getAttributeValueComparator(DtdType.ldml, element, attribute); 1908 } 1909 getAttributeValueComparator( DtdType type, String element, String attribute)1910 static Comparator<String> getAttributeValueComparator( 1911 DtdType type, String element, String attribute) { 1912 // The default is a map comparator, which compares numbers as numbers, and strings with UCA 1913 Comparator<String> comp = valueOrdering; 1914 if (type != DtdType.ldml && type != DtdType.ldmlICU) { 1915 return comp; 1916 } 1917 if (attribute.equals("day")) { // && (element.startsWith("weekend") 1918 comp = dayValueOrder; 1919 } else if (attribute.equals("type")) { 1920 if (element.endsWith("FormatLength")) { 1921 comp = lengthOrder; 1922 } else if (element.endsWith("Width")) { 1923 comp = widthOrder; 1924 } else if (element.equals("day")) { 1925 comp = dayValueOrder; 1926 } else if (element.equals("field")) { 1927 comp = dateFieldOrder; 1928 } else if (element.equals("zone")) { 1929 comp = zoneOrder; 1930 } else if (element.equals("listPatternPart")) { 1931 comp = listPatternOrder; 1932 } else if (element.equals("currencyFormat")) { 1933 comp = currencyFormatOrder; 1934 } else if (element.equals("unitLength")) { 1935 comp = unitLengthOrder; 1936 } else if (element.equals("unit")) { 1937 comp = getUnitOrder(); 1938 } else if (element.equals("dayPeriod")) { 1939 comp = dayPeriodOrder; 1940 } else if (element.equals("dateTimeFormat")) { 1941 comp = dateTimeFormatOrder; 1942 } else if (element.equals("nameField")) { 1943 comp = nameFieldOrder; 1944 } 1945 } else if (attribute.equals("order") && element.equals("personName")) { 1946 comp = orderValueOrder; 1947 } else if (attribute.equals("length") && element.equals("personName")) { 1948 comp = lengthValueOrder; 1949 } else if (attribute.equals("usage") && element.equals("personName")) { 1950 comp = usageValueOrder; 1951 } else if (attribute.equals("formality")) { 1952 comp = formalityValueOrder; 1953 } else if (attribute.equals("item") && element.equals("sampleName")) { 1954 comp = sampleNameItemOrder; 1955 } else if (attribute.equals("count") && !element.equals("minDays")) { 1956 comp = countValueOrder; 1957 } else if (attribute.equals("cp") && element.equals("annotation")) { 1958 comp = UNICODE_SET_COMPARATOR; 1959 } 1960 return comp; 1961 } 1962 1963 /** Comparator for attributes in CLDR files */ 1964 private static AttributeValueComparator ldmlAvc = 1965 new AttributeValueComparator() { 1966 @Override 1967 public int compare(String element, String attribute, String value1, String value2) { 1968 Comparator<String> comp = getAttributeValueComparator(element, attribute); 1969 return comp.compare(value1, value2); 1970 } 1971 }; 1972 hasValue(String elementName)1973 public boolean hasValue(String elementName) { 1974 return nameToElement.get(elementName).type == ElementType.PCDATA; 1975 } 1976 isMetadata(XPathParts pathPlain)1977 public boolean isMetadata(XPathParts pathPlain) { 1978 for (String s : pathPlain.getElements()) { 1979 Element e = getElementFromName().get(s); 1980 if (e.elementStatus == ElementStatus.metadata) { 1981 return true; 1982 } 1983 } 1984 return false; 1985 } 1986 isMetadataOld(DtdType dtdType2, XPathParts pathPlain)1987 public static boolean isMetadataOld(DtdType dtdType2, XPathParts pathPlain) { 1988 // TODO Don't use hard-coded list; instead add to DTD annotations 1989 final String element1 = pathPlain.getElement(1); 1990 final String element2 = pathPlain.getElement(2); 1991 final String elementN = pathPlain.getElement(-1); 1992 switch (dtdType2) { 1993 case ldml: 1994 switch (element1) { 1995 case "generation": 1996 case "metadata": 1997 return true; 1998 } 1999 break; 2000 case ldmlBCP47: 2001 switch (element1) { 2002 case "generation": 2003 case "version": 2004 return true; 2005 } 2006 break; 2007 //// 2008 // supplementalData/transforms/transform[@source="am"][@target="am_FONIPA"][@direction="forward"]/comment 2009 case supplementalData: 2010 // these are NOT under /metadata/ but are actually metadata 2011 switch (element1) { 2012 case "generation": 2013 case "version": 2014 case "validity": 2015 case "references": 2016 case "coverageLevels": 2017 return true; 2018 case "transforms": 2019 return elementN.equals("comment"); 2020 case "metadata": 2021 // these ARE under /metadata/, but many others under /metadata/ are NOT 2022 // actually metadata. 2023 switch (element2) { 2024 case "validity": 2025 case "serialElements": 2026 case "suppress": 2027 case "distinguishing": 2028 case "blocking": 2029 case "casingData": 2030 return true; 2031 } 2032 break; 2033 } 2034 break; 2035 default: 2036 } 2037 return false; 2038 } 2039 isDeprecated(XPathParts pathPlain)2040 public boolean isDeprecated(XPathParts pathPlain) { 2041 for (int i = 0; i < pathPlain.size(); ++i) { 2042 String elementName = pathPlain.getElement(i); 2043 if (isDeprecated(elementName, "*", null)) { 2044 return true; 2045 } 2046 for (String attribute : pathPlain.getAttributeKeys(i)) { 2047 String attributeValue = pathPlain.getAttributeValue(i, attribute); 2048 if (isDeprecated(elementName, attribute, attributeValue)) { 2049 return true; 2050 } 2051 } 2052 } 2053 return false; 2054 } 2055 2056 public static final Splitter SPACE_SPLITTER = 2057 Splitter.on(CharMatcher.whitespace()).trimResults().omitEmptyStrings(); 2058 public static final Splitter BAR_SPLITTER = Splitter.on('|').trimResults().omitEmptyStrings(); 2059 public static final Splitter CR_SPLITTER = 2060 Splitter.on(CharMatcher.anyOf("\n\r")).trimResults().omitEmptyStrings(); 2061 2062 private static class XPathPartsSet { 2063 private final Set<XPathParts> list = new LinkedHashSet<>(); 2064 addElement(String element)2065 private void addElement(String element) { 2066 if (list.isEmpty()) { 2067 list.add(new XPathParts().addElement(element)); 2068 } else { 2069 for (XPathParts item : list) { 2070 item.addElement(element); 2071 } 2072 } 2073 } 2074 addAttribute(String attribute, String attributeValue)2075 private void addAttribute(String attribute, String attributeValue) { 2076 for (XPathParts item : list) { 2077 item.addAttribute(attribute, attributeValue); 2078 } 2079 } 2080 setElement(int i, String string)2081 private void setElement(int i, String string) { 2082 for (XPathParts item : list) { 2083 item.setElement(i, string); 2084 } 2085 } 2086 addAttributes(String attribute, List<String> attributeValues)2087 private void addAttributes(String attribute, List<String> attributeValues) { 2088 if (attributeValues.size() == 1) { 2089 addAttribute(attribute, attributeValues.iterator().next()); 2090 } else { 2091 // duplicate all the items in the list with the given values 2092 Set<XPathParts> newList = new LinkedHashSet<>(); 2093 for (XPathParts item : list) { 2094 for (String attributeValue : attributeValues) { 2095 XPathParts newItem = item.cloneAsThawed(); 2096 newItem.addAttribute(attribute, attributeValue); 2097 newList.add(newItem); 2098 } 2099 } 2100 list.clear(); 2101 list.addAll(newList); 2102 } 2103 } 2104 toStrings()2105 private ImmutableSet<String> toStrings() { 2106 Builder<String> result = new ImmutableSet.Builder<>(); 2107 2108 for (XPathParts item : list) { 2109 result.add(item.toString()); 2110 } 2111 return result.build(); 2112 } 2113 2114 @Override toString()2115 public String toString() { 2116 return list.toString(); 2117 } 2118 } 2119 getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras)2120 public Set<String> getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras) { 2121 extras.clear(); 2122 Map<String, String> valueAttributes = new HashMap<>(); 2123 XPathPartsSet pathResult = new XPathPartsSet(); 2124 String element = null; 2125 for (int i = 0; i < pathPlain.size(); ++i) { 2126 element = pathPlain.getElement(i); 2127 pathResult.addElement(element); 2128 valueAttributes.clear(); 2129 for (String attribute : pathPlain.getAttributeKeys(i)) { 2130 AttributeStatus status = getAttributeStatus(element, attribute); 2131 final String attributeValue = pathPlain.getAttributeValue(i, attribute); 2132 switch (status) { 2133 case distinguished: 2134 AttributeType attrType = getAttributeType(element, attribute); 2135 if (attrType == AttributeType.NMTOKENS) { 2136 pathResult.addAttributes( 2137 attribute, SPACE_SPLITTER.splitToList(attributeValue)); 2138 } else { 2139 pathResult.addAttribute(attribute, attributeValue); 2140 } 2141 break; 2142 case value: 2143 valueAttributes.put(attribute, attributeValue); 2144 break; 2145 case metadata: 2146 break; 2147 } 2148 } 2149 if (!valueAttributes.isEmpty()) { 2150 boolean hasValue = hasValue(element); 2151 // if it doesn't have a value, we construct new child elements, with _ prefix 2152 // if it does have a value, we have to play a further trick, since 2153 // we can't have a value and child elements at the same level. 2154 // So we use a _ suffix on the element. 2155 if (hasValue) { 2156 pathResult.setElement(i, element + "_"); 2157 } else { 2158 int debug = 0; 2159 } 2160 for (Entry<String, String> attributeAndValue : valueAttributes.entrySet()) { 2161 final String attribute = attributeAndValue.getKey(); 2162 final String attributeValue = attributeAndValue.getValue(); 2163 2164 Set<String> pathsShort = pathResult.toStrings(); 2165 AttributeType attrType = getAttributeType(element, attribute); 2166 for (String pathShort : pathsShort) { 2167 pathShort += "/_" + attribute; 2168 if (attrType == AttributeType.NMTOKENS) { 2169 for (String valuePart : SPACE_SPLITTER.split(attributeValue)) { 2170 extras.put(pathShort, valuePart); 2171 } 2172 } else { 2173 extras.put(pathShort, attributeValue); 2174 } 2175 } 2176 } 2177 if (hasValue) { 2178 pathResult.setElement(i, element); // restore 2179 } 2180 } 2181 } 2182 // Only add the path if it could have a value, looking at the last element 2183 if (!hasValue(element)) { 2184 return null; 2185 } 2186 return pathResult.toStrings(); 2187 } 2188 getAttributeType(String elementName, String attributeName)2189 public AttributeType getAttributeType(String elementName, String attributeName) { 2190 Attribute attr = getAttribute(elementName, attributeName); 2191 return (attr != null) ? attr.type : null; 2192 } 2193 getAttribute(String elementName, String attributeName)2194 public Attribute getAttribute(String elementName, String attributeName) { 2195 Element element = nameToElement.get(elementName); 2196 return (element != null) ? element.getAttributeNamed(attributeName) : null; 2197 } 2198 2199 // TODO: add support for following to DTD annotations, and rework API 2200 2201 static final Set<String> SPACED_VALUES = ImmutableSet.of("idValidity", "languageGroup"); 2202 getValueSplitter(XPathParts pathPlain)2203 public static Splitter getValueSplitter(XPathParts pathPlain) { 2204 if (!Collections.disjoint(pathPlain.getElements(), SPACED_VALUES)) { 2205 return SPACE_SPLITTER; 2206 } else if (pathPlain.getElement(-1).equals("annotation") 2207 && !pathPlain.getAttributeKeys(-1).contains("tts")) { 2208 return BAR_SPLITTER; 2209 } 2210 return CR_SPLITTER; 2211 } 2212 isComment(XPathParts pathPlain, String line)2213 public static boolean isComment(XPathParts pathPlain, String line) { 2214 if (pathPlain.contains("transform")) { 2215 if (line.startsWith("#")) { 2216 return true; 2217 } 2218 } 2219 return false; 2220 } 2221 isExtraSplit(String extraPath)2222 public static boolean isExtraSplit(String extraPath) { 2223 if (extraPath.endsWith("/_type") 2224 && extraPath.startsWith("//supplementalData/metaZones/mapTimezones")) { 2225 return true; 2226 } 2227 return false; 2228 } 2229 2230 /** Return the value status for an EAV */ getValueStatus(String elementName, String attributeName, String value)2231 public ValueStatus getValueStatus(String elementName, String attributeName, String value) { 2232 Element element = nameToElement.get(elementName); 2233 if (element == null) { 2234 return ValueStatus.invalid; 2235 } 2236 Attribute attr = element.getAttributeNamed(attributeName); 2237 if (attr == null) { 2238 return ValueStatus.invalid; 2239 } 2240 return attr.getValueStatus(value); 2241 } 2242 2243 /** Return element-attribute pairs with non-enumerated values, for quick checks. */ getNonEnumerated(Map<String, String> matchValues)2244 public Multimap<String, String> getNonEnumerated(Map<String, String> matchValues) { 2245 Multimap<String, String> nonEnumeratedElementToAttribute = 2246 TreeMultimap.create(); // make tree for ease of debugging 2247 for (Entry<String, Element> entry : nameToElement.entrySet()) { 2248 Element element = entry.getValue(); 2249 for (Attribute attribute : element.attributes.keySet()) { 2250 if (attribute.type != AttributeType.ENUMERATED_TYPE) { 2251 String elementName = element.getName(); 2252 String attrName = attribute.getName(); 2253 nonEnumeratedElementToAttribute.put(elementName, attrName); 2254 if (attribute.matchValue != null) { 2255 matchValues.put( 2256 elementName + "\t" + attrName, attribute.matchValue.getName()); 2257 } 2258 } 2259 } 2260 } 2261 return ImmutableSetMultimap.copyOf(nonEnumeratedElementToAttribute); 2262 } 2263 2264 /** Get the value constraint on the last element in a path */ getValueConstraint(String xpath)2265 public static ValueConstraint getValueConstraint(String xpath) { 2266 return getElement(xpath, -1).getValueConstraint(); 2267 } 2268 2269 /** Get an element from a path and element index. */ getElement(String xpath, int elementIndex)2270 public static Element getElement(String xpath, int elementIndex) { 2271 XPathParts parts = XPathParts.getFrozenInstance(xpath); 2272 return DtdData.getInstance(DtdType.valueOf(parts.getElement(0))) 2273 .getElementFromName() 2274 .get(parts.getElement(elementIndex)); 2275 } 2276 2277 public static class DtdGuide { 2278 public interface DtdVisitor { 2279 /** Return false if all children should be skipped */ visit( DtdType dtdType, Stack<Element> ancestors, Element child, Attribute attribute)2280 public boolean visit( 2281 DtdType dtdType, Stack<Element> ancestors, Element child, Attribute attribute); 2282 } 2283 2284 private Set<Element> seenElements = new HashSet<>(); 2285 private DtdVisitor dtdVisitor; 2286 private DtdType dtdType; 2287 private Stack<Element> ancestors = new Stack<>(); 2288 private boolean skipDeprecated; 2289 DtdGuide(boolean skipDeprecated, DtdVisitor dtdVisitor)2290 public DtdGuide(boolean skipDeprecated, DtdVisitor dtdVisitor) { 2291 this.dtdVisitor = dtdVisitor; 2292 this.skipDeprecated = skipDeprecated; 2293 process(DtdType.values()); 2294 } 2295 process(DtdType... dtdTypes)2296 public void process(DtdType... dtdTypes) { 2297 for (DtdType dt : dtdTypes.length != 0 ? dtdTypes : DtdType.values()) { 2298 dtdType = dt; 2299 process(getInstance(dtdType).ROOT); 2300 } 2301 } 2302 process(Element element)2303 private void process(Element element) { 2304 if (seenElements.contains(element) || !skipDeprecated && element.isDeprecated()) { 2305 return; 2306 } 2307 seenElements.add(element); 2308 for (Attribute attribute : element.getAttributes().keySet()) { 2309 if (!skipDeprecated && attribute.isDeprecated()) { 2310 continue; 2311 } 2312 if (!dtdVisitor.visit(dtdType, ancestors, element, attribute)) { 2313 return; 2314 } 2315 } 2316 ancestors.push(element); 2317 for (Element child : element.getChildren().keySet()) { 2318 process(child); 2319 } 2320 ancestors.pop(); 2321 } 2322 } 2323 } 2324