1 package org.unicode.cldr.util; 2 3 import java.io.File; 4 import java.io.StringReader; 5 import java.util.ArrayList; 6 import java.util.Arrays; 7 import java.util.Collection; 8 import java.util.Collections; 9 import java.util.Comparator; 10 import java.util.HashMap; 11 import java.util.HashSet; 12 import java.util.Iterator; 13 import java.util.LinkedHashMap; 14 import java.util.LinkedHashSet; 15 import java.util.List; 16 import java.util.Locale; 17 import java.util.Map; 18 import java.util.Map.Entry; 19 import java.util.Set; 20 import java.util.TreeMap; 21 import java.util.concurrent.ConcurrentHashMap; 22 import java.util.concurrent.ConcurrentMap; 23 import java.util.regex.Pattern; 24 25 import com.google.common.base.CharMatcher; 26 import com.google.common.base.Joiner; 27 import com.google.common.base.Splitter; 28 import com.google.common.collect.ImmutableSet; 29 import com.google.common.collect.ImmutableSet.Builder; 30 import com.google.common.collect.ImmutableSetMultimap; 31 import com.google.common.collect.Multimap; 32 import com.google.common.collect.TreeMultimap; 33 import com.ibm.icu.impl.Relation; 34 import com.ibm.icu.text.Transform; 35 36 /** 37 * An immutable object that contains the structure of a DTD. 38 * @author markdavis 39 */ 40 public class DtdData extends XMLFileReader.SimpleHandler { 41 private static final String COMMENT_PREFIX = System.lineSeparator() + " "; 42 private static final boolean SHOW_ALL = CldrUtility.getProperty("show_all", false); 43 private static final boolean USE_SYNTHESIZED = false; 44 45 private static final boolean DEBUG = false; 46 private static final Pattern FILLER = PatternCache.get("[^-a-zA-Z0-9#_:]"); 47 48 private final Relation<String, Attribute> nameToAttributes = Relation.of(new TreeMap<String, Set<Attribute>>(), LinkedHashSet.class); 49 private Map<String, Element> nameToElement = new HashMap<>(); 50 private MapComparator<String> elementComparator; 51 private MapComparator<String> attributeComparator; 52 53 public final Element ROOT; 54 public final Element PCDATA = elementFrom("#PCDATA"); 55 public final Element ANY = elementFrom("ANY"); 56 public final DtdType dtdType; 57 public final String version; 58 private Element lastElement; 59 private Attribute lastAttribute; 60 private Set<String> preCommentCache; 61 private DtdComparator dtdComparator; 62 63 public enum AttributeStatus { 64 distinguished ("§d"), 65 value ("§v"), 66 metadata ("§m︎"); 67 public final String shortName; AttributeStatus(String shortName)68 AttributeStatus(String shortName) { 69 this.shortName = shortName; 70 } getShortName(AttributeStatus status)71 public static String getShortName(AttributeStatus status) { 72 return status == null ? "" : status.shortName; 73 } 74 } 75 76 public enum Mode { 77 REQUIRED("#REQUIRED"), OPTIONAL("#IMPLIED"), FIXED("#FIXED"), NULL("null"); 78 79 public final String source; 80 Mode(String s)81 Mode(String s) { 82 source = s; 83 } 84 forString(String mode)85 public static Mode forString(String mode) { 86 for (Mode value : Mode.values()) { 87 if (value.source.equals(mode)) { 88 return value; 89 } 90 } 91 if (mode == null) { 92 return NULL; 93 } 94 throw new IllegalArgumentException(mode); 95 } 96 } 97 98 public enum AttributeType { 99 CDATA, ID, IDREF, IDREFS, ENTITY, ENTITIES, NMTOKEN, NMTOKENS, ENUMERATED_TYPE 100 } 101 102 static final Set<String> DRAFT_ON_NON_LEAF_ALLOWED = ImmutableSet.of("collation", "transform", "unitPreferenceData", "rulesetGrouping"); 103 104 public static class Attribute implements Named { 105 private static final Joiner JOINER_COMMA_SPACE = Joiner.on(", "); 106 public static final String AUG_TRAIL = "⟫"; 107 public static final String AUG_LEAD = "⟪"; 108 public static final String ENUM_TRAIL = "⟩"; 109 public static final String ENUM_LEAD = "⟨"; 110 public static final Pattern LEAD_TRAIL = Pattern.compile("(.*[" + AUG_LEAD + ENUM_LEAD + "])(.*)([" + AUG_TRAIL + ENUM_TRAIL + "].*)"); 111 public final String name; 112 public final Element element; 113 public final Mode mode; 114 public final String defaultValue; 115 public final AttributeType type; 116 public final Map<String, Integer> values; 117 private final Set<String> commentsPre; 118 private Set<String> commentsPost; 119 private boolean isDeprecatedAttribute; 120 public AttributeStatus attributeStatus = AttributeStatus.distinguished; // default unless reset by annotations 121 private Set<String> deprecatedValues = Collections.emptySet(); 122 public MatchValue matchValue; 123 private final Comparator<String> attributeValueComparator; 124 Attribute(DtdType dtdType, Element element2, String aName, Mode mode2, String[] split, String value2, Set<String> firstComment)125 private Attribute(DtdType dtdType, Element element2, String aName, Mode mode2, String[] split, String value2, Set<String> firstComment) { 126 commentsPre = firstComment; 127 element = element2; 128 name = aName.intern(); 129 if (name.equals("draft") // normally never permitted on elements with children, but special cases... 130 && !DRAFT_ON_NON_LEAF_ALLOWED.contains(element.getName())) { 131 int elementChildrenCount = element.getChildren().size(); 132 if (elementChildrenCount > 1 133 || elementChildrenCount == 1 && !element.getChildren().keySet().iterator().next().getName().equals("cp")) { 134 isDeprecatedAttribute = true; 135 if (DEBUG) { 136 System.out.println(element.getName() + ":" + element.getChildren()); 137 } 138 } 139 } 140 mode = mode2; 141 defaultValue = value2 == null ? null 142 : value2.intern(); 143 AttributeType _type = AttributeType.ENUMERATED_TYPE; 144 Map<String, Integer> _values = Collections.emptyMap(); 145 if (split.length == 1) { 146 try { 147 _type = AttributeType.valueOf(split[0]); 148 } catch (Exception e) { 149 } 150 } 151 type = _type; 152 153 if (_type == AttributeType.ENUMERATED_TYPE) { 154 LinkedHashMap<String, Integer> temp = new LinkedHashMap<>(); 155 for (String part : split) { 156 if (part.length() != 0) { 157 temp.put(part.intern(), temp.size()); 158 } 159 } 160 _values = Collections.unmodifiableMap(temp); 161 } 162 values = _values; 163 attributeValueComparator = getAttributeValueComparator(dtdType, element.name, name); 164 } 165 166 @Override toString()167 public String toString() { 168 return element.name + ":" + name; 169 } 170 getSampleValue()171 public String getSampleValue() { 172 return type == AttributeType.ENUMERATED_TYPE ? (values.containsKey("year") ? "year" : values.keySet().iterator().next()) 173 : matchValue != null ? matchValue.getSample() 174 : MatchValue.DEFAULT_SAMPLE; 175 } 176 appendDtdString(StringBuilder b)177 public StringBuilder appendDtdString(StringBuilder b) { 178 Attribute a = this; 179 b.append("<!ATTLIST " + element.name + " " + a.name); 180 boolean first; 181 if (a.type == AttributeType.ENUMERATED_TYPE) { 182 b.append(" ("); 183 first = true; 184 for (String s : a.values.keySet()) { 185 if (deprecatedValues.contains(s)) { 186 continue; 187 } 188 if (first) { 189 first = false; 190 } else { 191 b.append(" | "); 192 } 193 b.append(s); 194 } 195 b.append(")"); 196 } else { 197 b.append(' ').append(a.type); 198 } 199 if (a.mode != Mode.NULL) { 200 b.append(" ").append(a.mode.source); 201 } 202 if (a.defaultValue != null) { 203 b.append(" \"").append(a.defaultValue).append('"'); 204 } 205 b.append(" >"); 206 return b; 207 } 208 features()209 public String features() { 210 return (type == AttributeType.ENUMERATED_TYPE ? values.keySet().toString() : type.toString()) 211 + (mode == Mode.NULL ? "" : ", mode=" + mode) 212 + (defaultValue == null ? "" : ", default=" + defaultValue); 213 } 214 215 @Override getName()216 public String getName() { 217 return name; 218 } 219 220 private static Splitter COMMA = Splitter.on(',').trimResults(); 221 addComment(String commentIn)222 public void addComment(String commentIn) { 223 if (commentIn.startsWith("@")) { 224 // there are exactly 2 cases: deprecated and ordered 225 switch (commentIn) { 226 case "@METADATA": 227 attributeStatus = AttributeStatus.metadata; 228 break; 229 case "@VALUE": 230 attributeStatus = AttributeStatus.value; 231 break; 232 case "@DEPRECATED": 233 isDeprecatedAttribute = true; 234 break; 235 default: 236 int colonPos = commentIn.indexOf(':'); 237 if (colonPos < 0) { 238 throw new IllegalArgumentException("Unrecognized annotation: " + commentIn); 239 } 240 String command = commentIn.substring(0, colonPos); 241 String argument = commentIn.substring(colonPos + 1); 242 switch(command) { 243 case "@DEPRECATED": 244 deprecatedValues = Collections.unmodifiableSet(new HashSet<>(COMMA.splitToList(argument))); 245 break; 246 case "@MATCH": 247 if (matchValue != null) { 248 throw new IllegalArgumentException("Conflicting @MATCH: " + matchValue.getName() + " & " + argument); 249 } 250 matchValue = MatchValue.of(argument); 251 break; 252 default: 253 throw new IllegalArgumentException("Unrecognized annotation: " + commentIn); 254 } 255 } 256 return; 257 } 258 commentsPost = addUnmodifiable(commentsPost, commentIn.trim()); 259 } 260 261 /** 262 * Special version of identity; only considers name and name of element 263 */ 264 @Override equals(Object obj)265 public boolean equals(Object obj) { 266 if (!(obj instanceof Attribute)) { 267 return false; 268 } 269 Attribute that = (Attribute) obj; 270 return name.equals(that.name) 271 && element.name.equals(that.element.name) // don't use plain element: circularity 272 // not relevant to identity 273 // && Objects.equals(comment, that.comment) 274 // && mode.equals(that.mode) 275 // && Objects.equals(defaultValue, that.defaultValue) 276 // && type.equals(that.type) 277 // && values.equals(that.values) 278 ; 279 } 280 281 /** 282 * Special version of identity; only considers name and name of element 283 */ 284 @Override hashCode()285 public int hashCode() { 286 return name.hashCode() * 37 287 + element.name.hashCode() // don't use plain element: circularity 288 // not relevant to identity 289 // ) * 37 + Objects.hashCode(comment)) * 37 290 // + mode.hashCode()) * 37 291 // + Objects.hashCode(defaultValue)) * 37 292 // + type.hashCode()) * 37 293 // + values.hashCode() 294 ; 295 } 296 isDeprecated()297 public boolean isDeprecated() { 298 return isDeprecatedAttribute; 299 } 300 isDeprecatedValue(String value)301 public boolean isDeprecatedValue(String value) { 302 return deprecatedValues.contains(value); 303 } 304 getStatus()305 public AttributeStatus getStatus() { 306 return attributeStatus; 307 } 308 getValueStatus(String value)309 public ValueStatus getValueStatus(String value) { 310 return deprecatedValues.contains(value) ? ValueStatus.invalid 311 : type == AttributeType.ENUMERATED_TYPE ? (values.containsKey(value) ? ValueStatus.valid : ValueStatus.invalid) 312 : matchValue == null ? ValueStatus.unknown 313 : matchValue.is(value) ? ValueStatus.valid 314 : ValueStatus.invalid; 315 } 316 getMatchString()317 public String getMatchString() { 318 return type == AttributeType.ENUMERATED_TYPE ? ENUM_LEAD + JOINER_COMMA_SPACE.join(values.keySet()) + ENUM_TRAIL 319 : matchValue != null ? AUG_LEAD + matchValue.toString() + AUG_TRAIL 320 : ""; 321 } 322 getMatchingName(Map<Attribute, Integer> attributes)323 public Attribute getMatchingName(Map<Attribute, Integer> attributes) { 324 for (Attribute attribute : attributes.keySet()) { 325 if (name.equals(attribute.getName())) { 326 return attribute; 327 } 328 } 329 return null; 330 } 331 332 } 333 334 public enum ValueStatus {invalid, unknown, valid} 335 DtdData(DtdType type, String version)336 private DtdData(DtdType type, String version) { 337 this.dtdType = type; 338 this.ROOT = elementFrom(type.rootType.toString()); 339 this.version = version; 340 } 341 addAttribute(String eName, String aName, String type, String mode, String value)342 private void addAttribute(String eName, String aName, String type, String mode, String value) { 343 Attribute a = new Attribute(dtdType, nameToElement.get(eName), aName, Mode.forString(mode), FILLER.split(type), value, preCommentCache); 344 preCommentCache = null; 345 getAttributesFromName().put(aName, a); 346 CldrUtility.putNew(a.element.attributes, a, a.element.attributes.size()); 347 lastElement = null; 348 lastAttribute = a; 349 } 350 351 public enum ElementType { 352 EMPTY, ANY, PCDATA("(#PCDATA)"), CHILDREN; 353 public final String source; 354 ElementType(String s)355 private ElementType(String s) { 356 source = s; 357 } 358 ElementType()359 private ElementType() { 360 source = name(); 361 } 362 } 363 364 interface Named { getName()365 String getName(); 366 } 367 368 public enum ElementStatus { 369 regular, metadata 370 } 371 372 public static class Element implements Named { 373 public final String name; 374 private String rawModel; 375 private ElementType type; 376 private final Map<Element, Integer> children = new LinkedHashMap<>(); 377 private final Map<Attribute, Integer> attributes = new LinkedHashMap<>(); 378 private Set<String> commentsPre; 379 private Set<String> commentsPost; 380 private String model; 381 private boolean isOrderedElement; 382 private boolean isDeprecatedElement; 383 private ElementStatus elementStatus = ElementStatus.regular; 384 Element(String name2)385 private Element(String name2) { 386 name = name2.intern(); 387 } 388 setChildren(DtdData dtdData, String model, Set<String> precomments)389 private void setChildren(DtdData dtdData, String model, Set<String> precomments) { 390 this.commentsPre = precomments; 391 rawModel = model; 392 this.model = clean(model); 393 if (model.equals("EMPTY")) { 394 type = ElementType.EMPTY; 395 return; 396 } 397 type = ElementType.CHILDREN; 398 for (String part : FILLER.split(model)) { 399 if (part.length() != 0) { 400 if (part.equals("#PCDATA")) { 401 type = ElementType.PCDATA; 402 } else if (part.equals("ANY")) { 403 type = ElementType.ANY; 404 } else { 405 CldrUtility.putNew(children, dtdData.elementFrom(part), children.size()); 406 } 407 } 408 } 409 if ((type == ElementType.CHILDREN) == (children.size() == 0) 410 && !model.startsWith("(#PCDATA|cp")) { 411 throw new IllegalArgumentException("CLDR does not permit Mixed content. " + name + ":" + model); 412 } 413 } 414 415 static final Pattern CLEANER1 = PatternCache.get("([,|(])(?=\\S)"); 416 static final Pattern CLEANER2 = PatternCache.get("(?=\\S)([|)])"); 417 clean(String model2)418 private String clean(String model2) { 419 // (x) -> ( x ); 420 // x,y -> x, y 421 // x|y -> x | y 422 String result = CLEANER1.matcher(model2).replaceAll("$1 "); 423 result = CLEANER2.matcher(result).replaceAll(" $1"); 424 return result.equals(model2) 425 ? model2 426 : result; // for debugging 427 } 428 containsAttribute(String string)429 public boolean containsAttribute(String string) { 430 for (Attribute a : attributes.keySet()) { 431 if (a.name.equals(string)) { 432 return true; 433 } 434 } 435 return false; 436 } 437 438 @Override toString()439 public String toString() { 440 return name; 441 } 442 toDtdString()443 public String toDtdString() { 444 return "<!ELEMENT " + name + " " + getRawModel() + " >"; 445 } 446 getType()447 public ElementType getType() { 448 return type; 449 } 450 getChildren()451 public Map<Element, Integer> getChildren() { 452 return Collections.unmodifiableMap(children); 453 } 454 getAttributes()455 public Map<Attribute, Integer> getAttributes() { 456 return Collections.unmodifiableMap(attributes); 457 } 458 459 @Override getName()460 public String getName() { 461 return name; 462 } 463 getChildNamed(String string)464 public Element getChildNamed(String string) { 465 for (Element e : children.keySet()) { 466 if (e.name.equals(string)) { 467 return e; 468 } 469 } 470 return null; 471 } 472 getAttributeNamed(String string)473 public Attribute getAttributeNamed(String string) { 474 for (Attribute a : attributes.keySet()) { 475 if (a.name.equals(string)) { 476 return a; 477 } 478 } 479 return null; 480 } 481 addComment(String addition)482 public void addComment(String addition) { 483 if (addition.startsWith("@")) { 484 // there are exactly 3 cases: deprecated, ordered, and metadata 485 switch (addition) { 486 case "@ORDERED": 487 isOrderedElement = true; 488 break; 489 case "@DEPRECATED": 490 isDeprecatedElement = true; 491 break; 492 case "@METADATA": 493 elementStatus = ElementStatus.metadata; 494 break; 495 default: 496 throw new IllegalArgumentException("Unrecognized annotation: " + addition); 497 } 498 return; 499 } 500 commentsPost = addUnmodifiable(commentsPost, addition.trim()); 501 } 502 503 /** 504 * Special version of equals. Only the name is considered in the identity. 505 */ 506 @Override equals(Object obj)507 public boolean equals(Object obj) { 508 if (!(obj instanceof Element)) { 509 return false; 510 } 511 Element that = (Element) obj; 512 return name.equals(that.name) 513 // not relevant to the identity of the object 514 // && Objects.equals(comment, that.comment) 515 // && type == that.type 516 // && attributes.equals(that.attributes) 517 // && children.equals(that.children) 518 ; 519 } 520 521 /** 522 * Special version of hashcode. Only the name is considered in the identity. 523 */ 524 @Override hashCode()525 public int hashCode() { 526 return name.hashCode() 527 // not relevant to the identity of the object 528 // * 37 + Objects.hashCode(comment) 529 //) * 37 + Objects.hashCode(type) 530 // ) * 37 + attributes.hashCode() 531 // ) * 37 + children.hashCode() 532 ; 533 } 534 isDeprecated()535 public boolean isDeprecated() { 536 return isDeprecatedElement; 537 } 538 isOrdered()539 public boolean isOrdered() { 540 return isOrderedElement; 541 } 542 getElementStatus()543 public ElementStatus getElementStatus() { 544 return elementStatus; 545 } 546 547 /** 548 * @return the rawModel 549 */ getRawModel()550 public String getRawModel() { 551 return rawModel; 552 } 553 } 554 elementFrom(String name)555 private Element elementFrom(String name) { 556 Element result = nameToElement.get(name); 557 if (result == null) { 558 nameToElement.put(name, result = new Element(name)); 559 } 560 return result; 561 } 562 addElement(String name2, String model)563 private void addElement(String name2, String model) { 564 Element element = elementFrom(name2); 565 element.setChildren(this, model, preCommentCache); 566 preCommentCache = null; 567 lastElement = element; 568 lastAttribute = null; 569 } 570 addComment(String comment)571 private void addComment(String comment) { 572 comment = comment.trim(); 573 if (preCommentCache != null || comment.startsWith("#")) { // the precomments are "sticky" 574 if (comment.startsWith("@")) { 575 throw new IllegalArgumentException("@ annotation comment must follow element or attribute, without intervening # comment"); 576 } 577 preCommentCache = addUnmodifiable(preCommentCache, comment); 578 } else if (lastElement != null) { 579 lastElement.addComment(comment); 580 } else if (lastAttribute != null) { 581 lastAttribute.addComment(comment); 582 } else { 583 if (comment.startsWith("@")) { 584 throw new IllegalArgumentException("@ annotation comment must follow element or attribute, without intervening # comment"); 585 } 586 preCommentCache = addUnmodifiable(preCommentCache, comment); 587 } 588 } 589 590 // TODO hide this 591 /** 592 * @deprecated 593 */ 594 @Deprecated 595 @Override handleElementDecl(String name, String model)596 public void handleElementDecl(String name, String model) { 597 if (SHOW_ALL) { 598 // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, listPatterns?, collations?, posix?, segmentations?, rbnf?, annotations?, metadata?, references?, special*))) > 599 System.out.println(System.lineSeparator() + "<!ELEMENT " + name + " " + model + " >"); 600 } 601 addElement(name, model); 602 } 603 604 // TODO hide this 605 /** 606 * @deprecated 607 */ 608 @Deprecated 609 @Override handleStartDtd(String name, String publicId, String systemId)610 public void handleStartDtd(String name, String publicId, String systemId) { 611 DtdType explicitDtdType = DtdType.valueOf(name); 612 if (explicitDtdType != dtdType && explicitDtdType != dtdType.rootType) { 613 throw new IllegalArgumentException("Mismatch in dtdTypes"); 614 } 615 } 616 617 /** 618 * @deprecated 619 */ 620 @Deprecated 621 @Override handleAttributeDecl(String eName, String aName, String type, String mode, String value)622 public void handleAttributeDecl(String eName, String aName, String type, String mode, String value) { 623 if (SHOW_ALL) { 624 // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false ) #IMPLIED > 625 // <!ATTLIST version number CDATA #REQUIRED > 626 // <!ATTLIST version cldrVersion CDATA #FIXED "27" > 627 628 System.out.println("<!ATTLIST " + eName 629 + " " + aName 630 + " " + type 631 + " " + mode 632 + (value == null ? "" : " \"" + value + "\"") 633 + " >"); 634 } 635 // HACK for 1.1.1 636 if (eName.equals("draft")) { 637 eName = "week"; 638 } 639 addAttribute(eName, aName, type, mode, value); 640 } 641 642 /** 643 * @deprecated 644 */ 645 @Deprecated 646 @Override handleComment(String path, String comment)647 public void handleComment(String path, String comment) { 648 if (comment.contains("Copyright")) { 649 // Zap the copyright comment, replace it with the current one. 650 comment = CldrUtility.getCopyrightString(); 651 } 652 if (SHOW_ALL) { 653 // <!-- true and false are deprecated. --> 654 System.out.println("<!-- " + comment.trim() + " -->"); 655 } 656 addComment(comment); 657 } 658 659 // TODO hide this 660 /** 661 * @deprecated 662 */ 663 @Deprecated 664 @Override handleEndDtd()665 public void handleEndDtd() { 666 throw new XMLFileReader.AbortException(); 667 } 668 669 /** 670 * Note that it always gets the trunk version 671 * @deprecated depends on static config, use {@link DtdData#getInstance(DtdType, File)} instead 672 */ 673 @Deprecated getInstance(DtdType type)674 public static DtdData getInstance(DtdType type) { 675 return getInstance(type, CLDRConfig.getInstance().getCldrBaseDirectory()); 676 } 677 678 /** 679 * Special form using version, used only by tests, etc. 680 */ getInstance(DtdType type, String version)681 public static DtdData getInstance(DtdType type, String version) { 682 File directory = version == null ? CLDRConfig.getInstance().getCldrBaseDirectory() 683 : new File(CLDRPaths.ARCHIVE_DIRECTORY + "/cldr-" + version); 684 685 return getInstance(type, version, directory); 686 } 687 688 private static final ConcurrentMap<Pair<DtdType, File>, DtdData> CACHE = new ConcurrentHashMap<>(); 689 690 /** 691 * Normal version of DtdData 692 * Get a DtdData, given the CLDR root directory. 693 * @param type which DtdType to return 694 * @param directory the CLDR Root directory, which contains the "common" directory. 695 * @return 696 */ getInstance(DtdType type, File directory)697 public static DtdData getInstance(DtdType type, File directory) { 698 Pair<DtdType, File> key = new Pair<>(type, directory); 699 DtdData data = CACHE.computeIfAbsent(key, k -> getInstance(type, null, directory)); 700 return data; 701 } 702 getInstance(DtdType type, String version, File directory)703 private static DtdData getInstance(DtdType type, String version, File directory) { 704 DtdData simpleHandler = new DtdData(type, version); 705 XMLFileReader xfr = new XMLFileReader().setHandler(simpleHandler); 706 if (type != type.rootType) { 707 // read the real first, then add onto it. 708 readFile(type.rootType, xfr, directory); 709 } 710 readFile(type, xfr, directory); 711 // HACK 712 if (type == DtdType.ldmlICU) { 713 Element special = simpleHandler.nameToElement.get("special"); 714 for (String extraElementName : Arrays.asList( 715 "icu:breakIteratorData", 716 "icu:UCARules", 717 "icu:scripts", 718 "icu:transforms", 719 "icu:ruleBasedNumberFormats", 720 "icu:isLeapMonth", 721 "icu:version", 722 "icu:breakDictionaryData", 723 "icu:depends")) { 724 Element extraElement = simpleHandler.nameToElement.get(extraElementName); 725 special.children.put(extraElement, special.children.size()); 726 } 727 } 728 if (simpleHandler.ROOT.children.size() == 0) { 729 throw new IllegalArgumentException(); // should never happen 730 } 731 simpleHandler.finish(); 732 simpleHandler.freeze(); 733 return simpleHandler; 734 } 735 finish()736 private void finish() { 737 dtdComparator = new DtdComparator(); 738 } 739 readFile(DtdType type, XMLFileReader xfr, File directory)740 public static void readFile(DtdType type, XMLFileReader xfr, File directory) { 741 File file = new File(directory, type.dtdPath); 742 StringReader s = new StringReader("<?xml version='1.0' encoding='UTF-8' ?>" 743 + "<!DOCTYPE " + type 744 + " SYSTEM '" + file.getAbsolutePath() + "'>"); 745 xfr.read(type.toString(), s, -1, true); // DTD_TYPE_TO_FILE.get(type) 746 } 747 freeze()748 private void freeze() { 749 if (version == null) { // only generate for new versions 750 MergeLists<String> elementMergeList = new MergeLists<>(); 751 elementMergeList.add(dtdType.toString()); 752 MergeLists<String> attributeMergeList = new MergeLists<>(); 753 attributeMergeList.add("_q"); 754 755 for (Element element : nameToElement.values()) { 756 if (element.children.size() > 0) { 757 Collection<String> names = getNames(element.children.keySet()); 758 elementMergeList.add(names); 759 if (DEBUG) { 760 System.out.println(element.getName() + "\t→\t" + names); 761 } 762 } 763 if (element.attributes.size() > 0) { 764 Collection<String> names = getNames(element.attributes.keySet()); 765 attributeMergeList.add(names); 766 if (DEBUG) { 767 System.out.println(element.getName() + "\t→\t@" + names); 768 } 769 } 770 } 771 List<String> elementList = elementMergeList.merge(); 772 List<String> attributeList = attributeMergeList.merge(); 773 if (DEBUG) { 774 System.out.println("Element Ordering:\t" + elementList); 775 System.out.println("Attribute Ordering:\t" + attributeList); 776 } 777 elementComparator = new MapComparator<>(elementList).setErrorOnMissing(true).freeze(); 778 attributeComparator = new MapComparator<>(attributeList).setErrorOnMissing(true).freeze(); 779 } 780 nameToAttributes.freeze(); 781 nameToElement = Collections.unmodifiableMap(nameToElement); 782 } 783 getNames(Collection<? extends Named> keySet)784 private Collection<String> getNames(Collection<? extends Named> keySet) { 785 List<String> result = new ArrayList<>(); 786 for (Named e : keySet) { 787 result.add(e.getName()); 788 } 789 return result; 790 } 791 792 public enum DtdItem { 793 ELEMENT, ATTRIBUTE, ATTRIBUTE_VALUE 794 } 795 796 public interface AttributeValueComparator { compare(String element, String attribute, String value1, String value2)797 public int compare(String element, String attribute, String value1, String value2); 798 } 799 getDtdComparator(AttributeValueComparator avc)800 public Comparator<String> getDtdComparator(AttributeValueComparator avc) { 801 return dtdComparator; 802 } 803 getDtdComparator()804 public DtdComparator getDtdComparator() { 805 return dtdComparator; 806 } 807 808 public class DtdComparator implements Comparator<String> { 809 @Override compare(String path1, String path2)810 public int compare(String path1, String path2) { 811 XPathParts a = XPathParts.getFrozenInstance(path1); 812 XPathParts b = XPathParts.getFrozenInstance(path2); 813 return xpathComparator(a, b); 814 } 815 xpathComparator(XPathParts a, XPathParts b)816 public int xpathComparator(XPathParts a, XPathParts b) { 817 // there must always be at least one element 818 String baseA = a.getElement(0); 819 String baseB = b.getElement(0); 820 if (!ROOT.name.equals(baseA) || !ROOT.name.equals(baseB)) { 821 throw new IllegalArgumentException("Comparing different DTDs: " + ROOT.name + ", " + baseA + ", " + baseB); 822 } 823 int min = Math.min(a.size(), b.size()); 824 Element parent = ROOT; 825 Element elementA; 826 for (int i = 1; i < min; ++i, parent = elementA) { 827 // add extra test for "fake" elements, used in diffing. they always start with _ 828 String elementRawA = a.getElement(i); 829 String elementRawB = b.getElement(i); 830 if (elementRawA.startsWith("_")) { 831 return elementRawB.startsWith("_") ? elementRawA.compareTo(elementRawB) : -1; 832 } else if (elementRawB.startsWith("_")) { 833 return 1; 834 } 835 // 836 elementA = nameToElement.get(elementRawA); 837 Element elementB = nameToElement.get(elementRawB); 838 if (elementA != elementB) { 839 int aa = parent.children.get(elementA); 840 int bb = parent.children.get(elementB); 841 return aa - bb; 842 } 843 int countA = a.getAttributeCount(i); 844 int countB = b.getAttributeCount(i); 845 if (countA == 0 && countB == 0) { 846 continue; 847 } 848 // we have two ways to compare the attributes. One based on the dtd, 849 // and one based on explicit comparators 850 851 // at this point the elements are the same and correspond to elementA 852 // in the dtd 853 854 // Handle the special added elements 855 String aqValue = a.getAttributeValue(i, "_q"); 856 if (aqValue != null) { 857 String bqValue = b.getAttributeValue(i, "_q"); 858 if (!aqValue.equals(bqValue)) { 859 int aValue = Integer.parseInt(aqValue); 860 int bValue = Integer.parseInt(bqValue); 861 return aValue - bValue; 862 } 863 --countA; 864 --countB; 865 } 866 867 attributes: for (Entry<Attribute, Integer> attr : elementA.attributes.entrySet()) { 868 Attribute main = attr.getKey(); 869 String valueA = a.getAttributeValue(i, main.name); 870 String valueB = b.getAttributeValue(i, main.name); 871 if (valueA == null) { 872 if (valueB != null) { 873 return -1; 874 } 875 } else if (valueB == null) { 876 return 1; 877 } else if (valueA.equals(valueB)) { 878 --countA; 879 --countB; 880 if (countA == 0 && countB == 0) { 881 break attributes; 882 } 883 continue; // TODO 884 } else if (main.attributeValueComparator != null) { 885 return main.attributeValueComparator.compare(valueA, valueB); 886 } else if (main.values.size() != 0) { 887 int aa = main.values.get(valueA); 888 int bb = main.values.get(valueB); 889 return aa - bb; 890 } else { 891 return valueA.compareTo(valueB); 892 } 893 } 894 if (countA != 0 || countB != 0) { 895 throw new IllegalArgumentException(); 896 } 897 } 898 return a.size() - b.size(); 899 } 900 } 901 getAttributeComparator()902 public MapComparator<String> getAttributeComparator() { 903 return attributeComparator; 904 } 905 906 getElementComparator()907 public MapComparator<String> getElementComparator() { 908 return elementComparator; 909 } 910 getAttributesFromName()911 public Relation<String, Attribute> getAttributesFromName() { 912 return nameToAttributes; 913 } 914 getElementFromName()915 public Map<String, Element> getElementFromName() { 916 return nameToElement; 917 } 918 919 @Override toString()920 public String toString() { 921 StringBuilder b = new StringBuilder(); 922 // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, listPatterns?, collations?, posix?, segmentations?, rbnf?, metadata?, references?, special*))) > 923 // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false ) #IMPLIED > <!-- true and false are deprecated. --> 924 Seen seen = new Seen(dtdType); 925 seen.seenElements.add(ANY); 926 seen.seenElements.add(PCDATA); 927 toString(ROOT, b, seen); 928 929 // Hack for ldmlIcu: catch the items that are not mentioned in the original 930 int currentEnd = b.length(); 931 for (Element e : nameToElement.values()) { 932 toString(e, b, seen); 933 } 934 if (currentEnd != b.length()) { 935 b.insert(currentEnd, 936 System.lineSeparator() + System.lineSeparator() 937 + "<!-- Elements not reachable from root! -->" 938 + System.lineSeparator()); 939 } 940 return b.toString(); 941 } 942 943 static final class Seen { 944 Set<Element> seenElements = new HashSet<>(); 945 Set<Attribute> seenAttributes = new HashSet<>(); 946 Seen(DtdType dtdType)947 public Seen(DtdType dtdType) { 948 if (dtdType.rootType == dtdType) { 949 return; 950 } 951 DtdData otherData = DtdData.getInstance(dtdType.rootType); 952 walk(otherData, otherData.ROOT); 953 seenElements.remove(otherData.nameToElement.get("special")); 954 } 955 walk(DtdData otherData, Element current)956 private void walk(DtdData otherData, Element current) { 957 seenElements.add(current); 958 seenAttributes.addAll(current.attributes.keySet()); 959 for (Element e : current.children.keySet()) { 960 walk(otherData, e); 961 } 962 } 963 } 964 getDescendents(Element start, Set<Element> toAddTo)965 public Set<Element> getDescendents(Element start, Set<Element> toAddTo) { 966 if (!toAddTo.contains(start)) { 967 toAddTo.add(start); 968 for (Element e : start.children.keySet()) { 969 getDescendents(e, toAddTo); 970 } 971 } 972 return toAddTo; 973 } 974 toString(Element current, StringBuilder b, Seen seen)975 private void toString(Element current, StringBuilder b, Seen seen) { 976 boolean first = true; 977 if (seen.seenElements.contains(current)) { 978 return; 979 } 980 seen.seenElements.add(current); 981 boolean elementDeprecated = isDeprecated(current.name, "*", "*"); 982 983 showComments(b, current.commentsPre, true); 984 b.append("\n\n<!ELEMENT " + current.name + " " + current.model + " >"); 985 if (USE_SYNTHESIZED) { 986 Element aliasElement = getElementFromName().get("alias"); 987 //b.append(current.rawChildren); 988 if (!current.children.isEmpty()) { 989 LinkedHashSet<Element> elements = new LinkedHashSet<>(current.children.keySet()); 990 boolean hasAlias = aliasElement != null && elements.remove(aliasElement); 991 //boolean hasSpecial = specialElement != null && elements.remove(specialElement); 992 if (hasAlias) { 993 b.append("(alias |"); 994 } 995 b.append("("); 996 // <!ELEMENT transformNames ( alias | (transformName | special)* ) > 997 // <!ELEMENT layout ( alias | (orientation*, inList*, inText*, special*) ) > 998 999 for (Element e : elements) { 1000 if (first) { 1001 first = false; 1002 } else { 1003 b.append(", "); 1004 } 1005 b.append(e.name); 1006 if (e.type != ElementType.PCDATA) { 1007 b.append("*"); 1008 } 1009 } 1010 if (hasAlias) { 1011 b.append(")"); 1012 } 1013 b.append(")"); 1014 } else { 1015 b.append(current.type == null ? "???" : current.type.source); 1016 } 1017 b.append(">"); 1018 } 1019 showComments(b, current.commentsPost, false); 1020 if (isOrdered(current.name)) { 1021 b.append(COMMENT_PREFIX + "<!--@ORDERED-->"); 1022 } 1023 if (current.getElementStatus() != ElementStatus.regular) { 1024 b.append(COMMENT_PREFIX + "<!--@" 1025 + current.getElementStatus().toString().toUpperCase(Locale.ROOT) 1026 + "-->"); 1027 } 1028 if (elementDeprecated) { 1029 b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->"); 1030 } 1031 1032 LinkedHashSet<String> deprecatedValues = new LinkedHashSet<>(); 1033 1034 for (Attribute a : current.attributes.keySet()) { 1035 if (seen.seenAttributes.contains(a)) { 1036 continue; 1037 } 1038 seen.seenAttributes.add(a); 1039 boolean attributeDeprecated = elementDeprecated || isDeprecated(current.name, a.name, "*"); 1040 1041 deprecatedValues.clear(); 1042 1043 showComments(b, a.commentsPre, true); 1044 b.append("\n<!ATTLIST " + current.name + " " + a.name); 1045 if (a.type == AttributeType.ENUMERATED_TYPE) { 1046 b.append(" ("); 1047 first = true; 1048 for (String s : a.values.keySet()) { 1049 if (first) { 1050 first = false; 1051 } else { 1052 b.append(" | "); 1053 } 1054 b.append(s); 1055 if (!attributeDeprecated && isDeprecated(current.name, a.name, s)) { 1056 deprecatedValues.add(s); 1057 } 1058 } 1059 b.append(")"); 1060 } else { 1061 b.append(' ').append(a.type); 1062 } 1063 if (a.mode != Mode.NULL) { 1064 b.append(" ").append(a.mode.source); 1065 } 1066 if (a.defaultValue != null) { 1067 b.append(" \"").append(a.defaultValue).append('"'); 1068 } 1069 b.append(" >"); 1070 showComments(b, a.commentsPost, false); 1071 // if (attributeDeprecated != deprecatedComment) { 1072 // System.out.println("*** BAD DEPRECATION ***" + a); 1073 // } 1074 if (a.matchValue != null) { 1075 b.append(COMMENT_PREFIX + "<!--@MATCH:" + a.matchValue.getName() + "-->"); 1076 } 1077 if (METADATA.contains(a.name) || a.attributeStatus == AttributeStatus.metadata) { 1078 b.append(COMMENT_PREFIX + "<!--@METADATA-->"); 1079 } else if (!isDistinguishing(current.name, a.name)) { 1080 b.append(COMMENT_PREFIX + "<!--@VALUE-->"); 1081 } 1082 if (attributeDeprecated) { 1083 b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->"); 1084 } else if (!deprecatedValues.isEmpty()) { 1085 b.append(COMMENT_PREFIX + "<!--@DEPRECATED:" + Joiner.on(", ") 1086 .join(deprecatedValues) + "-->"); 1087 } 1088 } 1089 if (current.children.size() > 0) { 1090 for (Element e : current.children.keySet()) { 1091 toString(e, b, seen); 1092 } 1093 } 1094 } 1095 showComments(StringBuilder b, Set<String> comments, boolean separate)1096 private void showComments(StringBuilder b, Set<String> comments, boolean separate) { 1097 if (comments == null) { 1098 return; 1099 } 1100 if (separate && b.length() != 0) { 1101 b.append(System.lineSeparator()); 1102 } 1103 for (String c : comments) { 1104 boolean deprecatedComment = false; // the following served its purpose... c.toLowerCase(Locale.ENGLISH).contains("deprecat"); 1105 if (!deprecatedComment) { 1106 if (separate) { 1107 // special handling for very first comment 1108 if (b.length() == 0) { 1109 b.append("<!--") 1110 .append(System.lineSeparator()) 1111 .append(c) 1112 .append(System.lineSeparator()) 1113 .append("-->"); 1114 continue; 1115 } 1116 b.append(System.lineSeparator()); 1117 } else { 1118 b.append(COMMENT_PREFIX); 1119 } 1120 b.append("<!-- ").append(c).append(" -->"); 1121 } 1122 } 1123 } 1124 removeFirst(Collection<T> elements, Transform<T, Boolean> matcher)1125 public static <T> T removeFirst(Collection<T> elements, Transform<T, Boolean> matcher) { 1126 for (Iterator<T> it = elements.iterator(); it.hasNext();) { 1127 T item = it.next(); 1128 if (matcher.transform(item) == Boolean.TRUE) { 1129 it.remove(); 1130 return item; 1131 } 1132 } 1133 return null; 1134 } 1135 getElements()1136 public Set<Element> getElements() { 1137 return new LinkedHashSet<>(nameToElement.values()); 1138 } 1139 getAttributes()1140 public Set<Attribute> getAttributes() { 1141 return new LinkedHashSet<>(nameToAttributes.values()); 1142 } 1143 isDistinguishing(String elementName, String attribute)1144 public boolean isDistinguishing(String elementName, String attribute) { 1145 return getAttributeStatus(elementName, attribute) == AttributeStatus.distinguished; 1146 } 1147 1148 static final Set<String> METADATA = new HashSet<>(Arrays.asList("references", "standard", "draft")); 1149 addUnmodifiable(Set<String> comment, String addition)1150 static final Set<String> addUnmodifiable(Set<String> comment, String addition) { 1151 if (comment == null) { 1152 return Collections.singleton(addition); 1153 } else { 1154 comment = new LinkedHashSet<>(comment); 1155 comment.add(addition); 1156 return Collections.unmodifiableSet(comment); 1157 } 1158 } 1159 1160 public class IllegalByDtdException extends RuntimeException { 1161 private static final long serialVersionUID = 1L; 1162 public final String elementName; 1163 public final String attributeName; 1164 public final String attributeValue; 1165 IllegalByDtdException(String elementName, String attributeName, String attributeValue)1166 public IllegalByDtdException(String elementName, String attributeName, String attributeValue) { 1167 this.elementName = elementName; 1168 this.attributeName = attributeName; 1169 this.attributeValue = attributeValue; 1170 } 1171 1172 @Override getMessage()1173 public String getMessage() { 1174 return "Dtd " + dtdType 1175 + " doesn’t allow " 1176 + "element=" + elementName 1177 + (attributeName == null ? "" : ", attribute: " + attributeName) 1178 + (attributeValue == null ? "" : ", attributeValue: " + attributeValue); 1179 } 1180 } 1181 1182 //@SuppressWarnings("unused") isDeprecated(String elementName, String attributeName, String attributeValue)1183 public boolean isDeprecated(String elementName, String attributeName, String attributeValue) { 1184 Element element = nameToElement.get(elementName); 1185 if (element == null) { 1186 throw new IllegalByDtdException(elementName, attributeName, attributeValue); 1187 } else if (element.isDeprecatedElement) { 1188 return true; 1189 } 1190 if ("*".equals(attributeName) || "_q".equals(attributeName)) { 1191 return false; 1192 } 1193 Attribute attribute = element.getAttributeNamed(attributeName); 1194 if (attribute == null) { 1195 throw new IllegalByDtdException(elementName, attributeName, attributeValue); 1196 } else if (attribute.isDeprecatedAttribute) { 1197 return true; 1198 } 1199 return attribute.deprecatedValues.contains(attributeValue); // don't need special test for "*" 1200 } 1201 isOrdered(String elementName)1202 public boolean isOrdered(String elementName) { 1203 Element element = nameToElement.get(elementName); 1204 if (element == null) { 1205 if (elementName.startsWith("icu:")) { 1206 return false; 1207 } 1208 throw new IllegalByDtdException(elementName, null, null); 1209 } 1210 return element.isOrderedElement; 1211 } 1212 getAttributeStatus(String elementName, String attributeName)1213 public AttributeStatus getAttributeStatus(String elementName, String attributeName) { 1214 if ("_q".equals(attributeName)) { 1215 return AttributeStatus.distinguished; // special case 1216 } 1217 Element element = nameToElement.get(elementName); 1218 if (element == null) { 1219 if (elementName.startsWith("icu:")) { 1220 return AttributeStatus.distinguished; 1221 } 1222 throw new IllegalByDtdException(elementName, attributeName, null); 1223 } 1224 Attribute attribute = element.getAttributeNamed(attributeName); 1225 if (attribute == null) { 1226 if (elementName.startsWith("icu:")) { 1227 return AttributeStatus.distinguished; 1228 } 1229 throw new IllegalByDtdException(elementName, attributeName, null); 1230 } 1231 return attribute.attributeStatus; 1232 } 1233 1234 // The default is a map comparator, which compares numbers as numbers, and strings with UCA 1235 private static MapComparator<String> valueOrdering = new MapComparator<String>().setErrorOnMissing(false).freeze(); 1236 1237 static MapComparator<String> dayValueOrder = new MapComparator<String>().add( 1238 "sun", "mon", "tue", "wed", "thu", "fri", "sat").freeze(); 1239 static MapComparator<String> dayPeriodOrder = new MapComparator<String>().add( 1240 "midnight", "am", "noon", "pm", 1241 "morning1", "morning2", "afternoon1", "afternoon2", "evening1", "evening2", "night1", "night2", 1242 // The ones on the following line are no longer used actively. Can be removed later? 1243 "earlyMorning", "morning", "midDay", "afternoon", "evening", "night", "weeHours").freeze(); 1244 static MapComparator<String> listPatternOrder = new MapComparator<String>().add( 1245 "start", "middle", "end", "2", "3").freeze(); 1246 static MapComparator<String> widthOrder = new MapComparator<String>().add( 1247 "abbreviated", "narrow", "short", "wide", "all").freeze(); 1248 static MapComparator<String> lengthOrder = new MapComparator<String>().add( 1249 "full", "long", "medium", "short").freeze(); 1250 static MapComparator<String> dateFieldOrder = new MapComparator<String>().add( 1251 "era", "era-short", "era-narrow", 1252 "year", "year-short", "year-narrow", 1253 "quarter", "quarter-short", "quarter-narrow", 1254 "month", "month-short", "month-narrow", 1255 "week", "week-short", "week-narrow", 1256 "weekOfMonth", "weekOfMonth-short", "weekOfMonth-narrow", 1257 "day", "day-short", "day-narrow", 1258 "dayOfYear", "dayOfYear-short", "dayOfYear-narrow", 1259 "weekday", "weekday-short", "weekday-narrow", 1260 "weekdayOfMonth", "weekdayOfMonth-short", "weekdayOfMonth-narrow", 1261 "sun", "sun-short", "sun-narrow", 1262 "mon", "mon-short", "mon-narrow", 1263 "tue", "tue-short", "tue-narrow", 1264 "wed", "wed-short", "wed-narrow", 1265 "thu", "thu-short", "thu-narrow", 1266 "fri", "fri-short", "fri-narrow", 1267 "sat", "sat-short", "sat-narrow", 1268 "dayperiod-short", "dayperiod", "dayperiod-narrow", 1269 "hour", "hour-short", "hour-narrow", 1270 "minute", "minute-short", "minute-narrow", 1271 "second", "second-short", "second-narrow", 1272 "zone", "zone-short", "zone-narrow").freeze(); 1273 1274 /* TODO: change this to be data-file driven. Can do with new Unit preferences info; also put them in a more meaningful order (metric vs other; size) */ 1275 1276 public static final MapComparator<String> unitOrder = new MapComparator<String>().add( 1277 "acceleration-g-force", "acceleration-meter-per-square-second", 1278 "acceleration-meter-per-second-squared", // deprecated 1279 "angle-revolution", "angle-radian", "angle-degree", "angle-arc-minute", "angle-arc-second", 1280 "area-square-kilometer", "area-hectare", "area-square-meter", "area-square-centimeter", 1281 "area-square-mile", "area-acre", "area-square-yard", "area-square-foot", "area-square-inch", 1282 "area-dunam", 1283 "concentr-karat", 1284 "proportion-karat", // deprecated 1285 "concentr-milligram-ofglucose-per-deciliter", 1286 "concentr-milligram-per-deciliter", 1287 "concentr-millimole-per-liter", 1288 "concentr-item", 1289 "concentr-portion", 1290 "concentr-permillion", 1291 "concentr-part-per-million", // deprecated 1292 "concentr-percent", "concentr-permille", "concentr-permyriad", 1293 "concentr-mole", 1294 "concentr-ofglucose", 1295 "consumption-liter-per-kilometer", "consumption-liter-per-100-kilometer", 1296 "consumption-liter-per-100kilometers", // deprecated 1297 "consumption-mile-per-gallon", "consumption-mile-per-gallon-imperial", 1298 "digital-petabyte", "digital-terabyte", "digital-terabit", "digital-gigabyte", "digital-gigabit", 1299 "digital-megabyte", "digital-megabit", "digital-kilobyte", "digital-kilobit", 1300 "digital-byte", "digital-bit", 1301 "duration-century", "duration-decade", 1302 "duration-year", "duration-year-person", 1303 "duration-month", "duration-month-person", 1304 "duration-week", "duration-week-person", 1305 "duration-day", "duration-day-person", 1306 "duration-hour", "duration-minute", "duration-second", 1307 "duration-millisecond", "duration-microsecond", "duration-nanosecond", 1308 "electric-ampere", "electric-milliampere", "electric-ohm", "electric-volt", 1309 "energy-kilocalorie", "energy-calorie", "energy-foodcalorie", "energy-kilojoule", "energy-joule", "energy-kilowatt-hour", 1310 "energy-electronvolt", 1311 "energy-british-thermal-unit", 1312 "energy-therm-us", 1313 "force-pound-force", 1314 "force-newton", 1315 "force-kilowatt-hour-per-100-kilometer", 1316 "frequency-gigahertz", "frequency-megahertz", "frequency-kilohertz", "frequency-hertz", 1317 "graphics-em", "graphics-pixel", "graphics-megapixel", 1318 "graphics-pixel-per-centimeter", "graphics-pixel-per-inch", 1319 "graphics-dot-per-centimeter", "graphics-dot-per-inch", 1320 "graphics-dot", 1321 "length-earth-radius", 1322 "length-100-kilometer", 1323 "length-kilometer", "length-meter", "length-decimeter", "length-centimeter", 1324 "length-millimeter", "length-micrometer", "length-nanometer", "length-picometer", 1325 "length-mile", "length-yard", "length-foot", "length-inch", 1326 "length-parsec", "length-light-year", "length-astronomical-unit", 1327 "length-furlong", "length-fathom", 1328 "length-nautical-mile", "length-mile-scandinavian", 1329 "length-point", 1330 "length-solar-radius", 1331 "light-lux", 1332 "light-candela", 1333 "light-lumen", 1334 "light-solar-luminosity", 1335 "mass-metric-ton", "mass-kilogram", "mass-gram", "mass-milligram", "mass-microgram", 1336 "mass-ton", "mass-stone", "mass-pound", "mass-ounce", 1337 "mass-ounce-troy", "mass-carat", 1338 "mass-dalton", 1339 "mass-earth-mass", 1340 "mass-solar-mass", 1341 1342 "mass-grain", 1343 1344 "power-gigawatt", "power-megawatt", "power-kilowatt", "power-watt", "power-milliwatt", 1345 "power-horsepower", 1346 "pressure-millimeter-ofhg", 1347 "pressure-millimeter-of-mercury", // deprecated 1348 "pressure-ofhg", 1349 "pressure-pound-force-per-square-inch", 1350 "pressure-pound-per-square-inch", // deprecated 1351 "pressure-inch-ofhg", 1352 "pressure-inch-hg", // deprecated 1353 "pressure-bar", "pressure-millibar", "pressure-atmosphere", 1354 "pressure-pascal", 1355 "pressure-hectopascal", 1356 "pressure-kilopascal", 1357 "pressure-megapascal", 1358 "speed-kilometer-per-hour", "speed-meter-per-second", "speed-mile-per-hour", "speed-knot", 1359 "temperature-generic", "temperature-celsius", "temperature-fahrenheit", "temperature-kelvin", 1360 "torque-pound-force-foot", 1361 "torque-pound-foot", // deprecated 1362 "torque-newton-meter", 1363 "volume-cubic-kilometer", "volume-cubic-meter", "volume-cubic-centimeter", 1364 "volume-cubic-mile", "volume-cubic-yard", "volume-cubic-foot", "volume-cubic-inch", 1365 "volume-megaliter", "volume-hectoliter", "volume-liter", "volume-deciliter", "volume-centiliter", "volume-milliliter", 1366 "volume-pint-metric", "volume-cup-metric", 1367 "volume-acre-foot", 1368 "volume-bushel", "volume-gallon", "volume-gallon-imperial", "volume-quart", "volume-pint", "volume-cup", 1369 "volume-fluid-ounce", "volume-fluid-ounce-imperial", "volume-tablespoon", "volume-teaspoon", 1370 "volume-barrel", 1371 1372 "volume-dessert-spoon", 1373 "volume-dessert-spoon-imperial", 1374 "volume-drop", 1375 "volume-dram", 1376 "volume-jigger", 1377 "volume-pinch", 1378 "volume-quart-imperial" 1379 // "volume-pint-imperial" 1380 ).freeze(); 1381 1382 static MapComparator<String> countValueOrder = new MapComparator<String>().add( 1383 "0", "1", "zero", "one", "two", "few", "many", "other").freeze(); 1384 static MapComparator<String> unitLengthOrder = new MapComparator<String>().add( 1385 "long", "short", "narrow").freeze(); 1386 static MapComparator<String> currencyFormatOrder = new MapComparator<String>().add( 1387 "standard", "accounting").freeze(); 1388 static Comparator<String> zoneOrder = StandardCodes.make().getTZIDComparator(); 1389 1390 static final Comparator<String> COMP = (Comparator) CLDRConfig.getInstance().getCollator(); 1391 1392 // Hack for US 1393 static final Comparator<String> UNICODE_SET_COMPARATOR = new Comparator<String>() { 1394 @Override 1395 public int compare(String o1, String o2) { 1396 if (o1.contains("{")) { 1397 o1 = o1.replace("{", ""); 1398 } 1399 if (o2.contains("{")) { 1400 o2 = o2.replace("{", ""); 1401 } 1402 return COMP.compare(o1, o2); 1403 } 1404 1405 }; 1406 getAttributeValueComparator(String element, String attribute)1407 public static Comparator<String> getAttributeValueComparator(String element, String attribute) { 1408 return getAttributeValueComparator(DtdType.ldml, element, attribute); 1409 } 1410 getAttributeValueComparator(DtdType type, String element, String attribute)1411 static Comparator<String> getAttributeValueComparator(DtdType type, String element, String attribute) { 1412 // The default is a map comparator, which compares numbers as numbers, and strings with UCA 1413 Comparator<String> comp = valueOrdering; 1414 if (type != DtdType.ldml && type != DtdType.ldmlICU) { 1415 return comp; 1416 } 1417 if (attribute.equals("day")) { // && (element.startsWith("weekend") 1418 comp = dayValueOrder; 1419 } else if (attribute.equals("type")) { 1420 if (element.endsWith("FormatLength")) { 1421 comp = lengthOrder; 1422 } else if (element.endsWith("Width")) { 1423 comp = widthOrder; 1424 } else if (element.equals("day")) { 1425 comp = dayValueOrder; 1426 } else if (element.equals("field")) { 1427 comp = dateFieldOrder; 1428 } else if (element.equals("zone")) { 1429 comp = zoneOrder; 1430 } else if (element.equals("listPatternPart")) { 1431 comp = listPatternOrder; 1432 } else if (element.equals("currencyFormat")) { 1433 comp = currencyFormatOrder; 1434 } else if (element.equals("unitLength")) { 1435 comp = unitLengthOrder; 1436 } else if (element.equals("unit")) { 1437 comp = unitOrder; 1438 } else if (element.equals("dayPeriod")) { 1439 comp = dayPeriodOrder; 1440 } 1441 } else if (attribute.equals("count") && !element.equals("minDays")) { 1442 comp = countValueOrder; 1443 } else if (attribute.equals("cp") && element.equals("annotation")) { 1444 comp = UNICODE_SET_COMPARATOR; 1445 } 1446 return comp; 1447 } 1448 1449 /** 1450 * Comparator for attributes in CLDR files 1451 */ 1452 private static AttributeValueComparator ldmlAvc = new AttributeValueComparator() { 1453 @Override 1454 public int compare(String element, String attribute, String value1, String value2) { 1455 Comparator<String> comp = getAttributeValueComparator(element, attribute); 1456 return comp.compare(value1, value2); 1457 } 1458 }; 1459 hasValue(String elementName)1460 public boolean hasValue(String elementName) { 1461 return nameToElement.get(elementName).type == ElementType.PCDATA; 1462 } 1463 isMetadata(XPathParts pathPlain)1464 public boolean isMetadata(XPathParts pathPlain) { 1465 for (String s : pathPlain.getElements()) { 1466 Element e = getElementFromName().get(s); 1467 if (e.elementStatus == ElementStatus.metadata) { 1468 return true; 1469 } 1470 } 1471 return false; 1472 } 1473 isMetadataOld(DtdType dtdType2, XPathParts pathPlain)1474 public static boolean isMetadataOld(DtdType dtdType2, XPathParts pathPlain) { 1475 // TODO Don't use hard-coded list; instead add to DTD annotations 1476 final String element1 = pathPlain.getElement(1); 1477 final String element2 = pathPlain.getElement(2); 1478 final String elementN = pathPlain.getElement(-1); 1479 switch (dtdType2) { 1480 case ldml: 1481 switch (element1) { 1482 case "generation": 1483 case "metadata": 1484 return true; 1485 } 1486 break; 1487 case ldmlBCP47: 1488 switch (element1) { 1489 case "generation": 1490 case "version": 1491 return true; 1492 } 1493 break; 1494 ////supplementalData/transforms/transform[@source="am"][@target="am_FONIPA"][@direction="forward"]/comment 1495 case supplementalData: 1496 // these are NOT under /metadata/ but are actually metadata 1497 switch (element1) { 1498 case "generation": 1499 case "version": 1500 case "validity": 1501 case "references": 1502 case "coverageLevels": 1503 return true; 1504 case "transforms": 1505 return elementN.equals("comment"); 1506 case "metadata": 1507 // these ARE under /metadata/, but many others under /metadata/ are NOT actually metadata. 1508 switch (element2) { 1509 case "validity": 1510 case "serialElements": 1511 case "suppress": 1512 case "distinguishing": 1513 case "blocking": 1514 case "casingData": 1515 return true; 1516 } 1517 break; 1518 } 1519 break; 1520 default: 1521 } 1522 return false; 1523 } 1524 isDeprecated(XPathParts pathPlain)1525 public boolean isDeprecated(XPathParts pathPlain) { 1526 for (int i = 0; i < pathPlain.size(); ++i) { 1527 String elementName = pathPlain.getElement(i); 1528 if (isDeprecated(elementName, "*", null)) { 1529 return true; 1530 } 1531 for (String attribute : pathPlain.getAttributeKeys(i)) { 1532 String attributeValue = pathPlain.getAttributeValue(i, attribute); 1533 if (isDeprecated(elementName, attribute, attributeValue)) { 1534 return true; 1535 } 1536 } 1537 } 1538 return false; 1539 } 1540 1541 public final static Splitter SPACE_SPLITTER = Splitter.on(CharMatcher.whitespace()).trimResults().omitEmptyStrings(); 1542 public final static Splitter BAR_SPLITTER = Splitter.on('|').trimResults().omitEmptyStrings(); 1543 public final static Splitter CR_SPLITTER = Splitter.on(CharMatcher.anyOf("\n\r")).trimResults().omitEmptyStrings(); 1544 1545 private static class XPathPartsSet { 1546 private final Set<XPathParts> list = new LinkedHashSet<>(); 1547 addElement(String element)1548 private void addElement(String element) { 1549 if (list.isEmpty()) { 1550 list.add(new XPathParts().addElement(element)); 1551 } else { 1552 for (XPathParts item : list) { 1553 item.addElement(element); 1554 } 1555 } 1556 } 1557 addAttribute(String attribute, String attributeValue)1558 private void addAttribute(String attribute, String attributeValue) { 1559 for (XPathParts item : list) { 1560 item.addAttribute(attribute, attributeValue); 1561 } 1562 } 1563 setElement(int i, String string)1564 private void setElement(int i, String string) { 1565 for (XPathParts item : list) { 1566 item.setElement(i, string); 1567 } 1568 } 1569 addAttributes(String attribute, List<String> attributeValues)1570 private void addAttributes(String attribute, List<String> attributeValues) { 1571 if (attributeValues.size() == 1) { 1572 addAttribute(attribute, attributeValues.iterator().next()); 1573 } else { 1574 // duplicate all the items in the list with the given values 1575 Set<XPathParts> newList = new LinkedHashSet<>(); 1576 for (XPathParts item : list) { 1577 for (String attributeValue : attributeValues) { 1578 XPathParts newItem = item.cloneAsThawed(); 1579 newItem.addAttribute(attribute, attributeValue); 1580 newList.add(newItem); 1581 } 1582 } 1583 list.clear(); 1584 list.addAll(newList); 1585 } 1586 } 1587 toStrings()1588 private ImmutableSet<String> toStrings() { 1589 Builder<String> result = new ImmutableSet.Builder<>(); 1590 1591 for (XPathParts item : list) { 1592 result.add(item.toString()); 1593 } 1594 return result.build(); 1595 } 1596 1597 @Override toString()1598 public String toString() { 1599 return list.toString(); 1600 } 1601 } 1602 getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras)1603 public Set<String> getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras) { 1604 extras.clear(); 1605 Map<String, String> valueAttributes = new HashMap<>(); 1606 XPathPartsSet pathResult = new XPathPartsSet(); 1607 String element = null; 1608 for (int i = 0; i < pathPlain.size(); ++i) { 1609 element = pathPlain.getElement(i); 1610 pathResult.addElement(element); 1611 valueAttributes.clear(); 1612 for (String attribute : pathPlain.getAttributeKeys(i)) { 1613 AttributeStatus status = getAttributeStatus(element, attribute); 1614 final String attributeValue = pathPlain.getAttributeValue(i, attribute); 1615 switch (status) { 1616 case distinguished: 1617 AttributeType attrType = getAttributeType(element, attribute); 1618 if (attrType == AttributeType.NMTOKENS) { 1619 pathResult.addAttributes(attribute, SPACE_SPLITTER.splitToList(attributeValue)); 1620 } else { 1621 pathResult.addAttribute(attribute, attributeValue); 1622 } 1623 break; 1624 case value: 1625 valueAttributes.put(attribute, attributeValue); 1626 break; 1627 case metadata: 1628 break; 1629 } 1630 } 1631 if (!valueAttributes.isEmpty()) { 1632 boolean hasValue = hasValue(element); 1633 // if it doesn't have a value, we construct new child elements, with _ prefix 1634 // if it does have a value, we have to play a further trick, since 1635 // we can't have a value and child elements at the same level. 1636 // So we use a _ suffix on the element. 1637 if (hasValue) { 1638 pathResult.setElement(i, element + "_"); 1639 } else { 1640 int debug = 0; 1641 } 1642 for (Entry<String, String> attributeAndValue : valueAttributes.entrySet()) { 1643 final String attribute = attributeAndValue.getKey(); 1644 final String attributeValue = attributeAndValue.getValue(); 1645 1646 Set<String> pathsShort = pathResult.toStrings(); 1647 AttributeType attrType = getAttributeType(element, attribute); 1648 for (String pathShort : pathsShort) { 1649 pathShort += "/_" + attribute; 1650 if (attrType == AttributeType.NMTOKENS) { 1651 for (String valuePart : SPACE_SPLITTER.split(attributeValue)) { 1652 extras.put(pathShort, valuePart); 1653 } 1654 } else { 1655 extras.put(pathShort, attributeValue); 1656 } 1657 } 1658 } 1659 if (hasValue) { 1660 pathResult.setElement(i, element); // restore 1661 } 1662 } 1663 } 1664 // Only add the path if it could have a value, looking at the last element 1665 if (!hasValue(element)) { 1666 return null; 1667 } 1668 return pathResult.toStrings(); 1669 } 1670 getAttributeType(String elementName, String attributeName)1671 public AttributeType getAttributeType(String elementName, String attributeName) { 1672 Attribute attr = getAttribute(elementName, attributeName); 1673 return (attr != null) ? attr.type : null; 1674 } 1675 getAttribute(String elementName, String attributeName)1676 public Attribute getAttribute(String elementName, String attributeName) { 1677 Element element = nameToElement.get(elementName); 1678 return (element != null) ? element.getAttributeNamed(attributeName) : null; 1679 } 1680 1681 // TODO: add support for following to DTD annotations, and rework API 1682 1683 static final Set<String> SPACED_VALUES = ImmutableSet.of( 1684 "idValidity", 1685 "languageGroup"); 1686 getValueSplitter(XPathParts pathPlain)1687 public static Splitter getValueSplitter(XPathParts pathPlain) { 1688 if (!Collections.disjoint(pathPlain.getElements(), SPACED_VALUES)) { 1689 return SPACE_SPLITTER; 1690 } else if (pathPlain.getElement(-1).equals("annotation") 1691 && !pathPlain.getAttributeKeys(-1).contains("tts")) { 1692 return BAR_SPLITTER; 1693 } 1694 return CR_SPLITTER; 1695 } 1696 isComment(XPathParts pathPlain, String line)1697 public static boolean isComment(XPathParts pathPlain, String line) { 1698 if (pathPlain.contains("transform")) { 1699 if (line.startsWith("#")) { 1700 return true; 1701 } 1702 } 1703 return false; 1704 } 1705 isExtraSplit(String extraPath)1706 public static boolean isExtraSplit(String extraPath) { 1707 if (extraPath.endsWith("/_type") && extraPath.startsWith("//supplementalData/metaZones/mapTimezones")) { 1708 return true; 1709 } 1710 return false; 1711 } 1712 1713 /** 1714 * Return the value status for an EAV 1715 */ getValueStatus(String elementName, String attributeName, String value)1716 public ValueStatus getValueStatus(String elementName, String attributeName, String value) { 1717 Element element = nameToElement.get(elementName); 1718 if (element == null) { 1719 return ValueStatus.invalid; 1720 } 1721 Attribute attr = element.getAttributeNamed(attributeName); 1722 if (attr == null) { 1723 return ValueStatus.invalid; 1724 } 1725 return attr.getValueStatus(value); 1726 } 1727 1728 /** 1729 * Return element-attribute pairs with non-enumerated values, for quick checks. 1730 */ getNonEnumerated(Map<String,String> matchValues)1731 public Multimap<String, String> getNonEnumerated(Map<String,String> matchValues) { 1732 Multimap<String,String> nonEnumeratedElementToAttribute = TreeMultimap.create(); // make tree for ease of debugging 1733 for (Entry<String, Element> entry : nameToElement.entrySet()) { 1734 Element element = entry.getValue(); 1735 for (Attribute attribute : element.attributes.keySet()) { 1736 if (attribute.type != AttributeType.ENUMERATED_TYPE) { 1737 String elementName = element.getName(); 1738 String attrName = attribute.getName(); 1739 nonEnumeratedElementToAttribute.put(elementName, attrName); 1740 if (attribute.matchValue != null) { 1741 matchValues.put(elementName + "\t" + attrName, attribute.matchValue.getName()); 1742 } 1743 } 1744 } 1745 } 1746 return ImmutableSetMultimap.copyOf(nonEnumeratedElementToAttribute); 1747 } 1748 } 1749