1 package org.unicode.cldr.util; 2 3 import java.io.File; 4 import java.io.StringReader; 5 import java.util.ArrayList; 6 import java.util.Arrays; 7 import java.util.Collection; 8 import java.util.Collections; 9 import java.util.Comparator; 10 import java.util.HashMap; 11 import java.util.HashSet; 12 import java.util.Iterator; 13 import java.util.LinkedHashMap; 14 import java.util.LinkedHashSet; 15 import java.util.List; 16 import java.util.Locale; 17 import java.util.Map; 18 import java.util.Map.Entry; 19 import java.util.Set; 20 import java.util.TreeMap; 21 import java.util.concurrent.ConcurrentHashMap; 22 import java.util.concurrent.ConcurrentMap; 23 import java.util.regex.Pattern; 24 25 import com.google.common.base.CharMatcher; 26 import com.google.common.base.Joiner; 27 import com.google.common.base.Splitter; 28 import com.google.common.collect.ImmutableSet; 29 import com.google.common.collect.ImmutableSet.Builder; 30 import com.google.common.collect.ImmutableSetMultimap; 31 import com.google.common.collect.Multimap; 32 import com.google.common.collect.TreeMultimap; 33 import com.ibm.icu.impl.Relation; 34 import com.ibm.icu.text.Transform; 35 36 /** 37 * An immutable object that contains the structure of a DTD. 38 * @author markdavis 39 */ 40 public class DtdData extends XMLFileReader.SimpleHandler { 41 private static final String COMMENT_PREFIX = System.lineSeparator() + " "; 42 private static final boolean SHOW_ALL = CldrUtility.getProperty("show_all", false); 43 private static final boolean USE_SYNTHESIZED = false; 44 45 private static final boolean DEBUG = false; 46 private static final Pattern FILLER = PatternCache.get("[^-a-zA-Z0-9#_:]"); 47 48 private final Relation<String, Attribute> nameToAttributes = Relation.of(new TreeMap<String, Set<Attribute>>(), LinkedHashSet.class); 49 private Map<String, Element> nameToElement = new HashMap<>(); 50 private MapComparator<String> elementComparator; 51 private MapComparator<String> attributeComparator; 52 53 public final Element ROOT; 54 public final Element PCDATA = elementFrom("#PCDATA"); 55 public final Element ANY = elementFrom("ANY"); 56 public final DtdType dtdType; 57 public final String version; 58 private Element lastElement; 59 private Attribute lastAttribute; 60 private Set<String> preCommentCache; 61 private DtdComparator dtdComparator; 62 63 public enum AttributeStatus { 64 distinguished ("§d"), 65 value ("§v"), 66 metadata ("§m︎"); 67 public final String shortName; AttributeStatus(String shortName)68 AttributeStatus(String shortName) { 69 this.shortName = shortName; 70 } getShortName(AttributeStatus status)71 public static String getShortName(AttributeStatus status) { 72 return status == null ? "" : status.shortName; 73 } 74 } 75 76 public enum Mode { 77 REQUIRED("#REQUIRED"), OPTIONAL("#IMPLIED"), FIXED("#FIXED"), NULL("null"); 78 79 public final String source; 80 Mode(String s)81 Mode(String s) { 82 source = s; 83 } 84 forString(String mode)85 public static Mode forString(String mode) { 86 for (Mode value : Mode.values()) { 87 if (value.source.equals(mode)) { 88 return value; 89 } 90 } 91 if (mode == null) { 92 return NULL; 93 } 94 throw new IllegalArgumentException(mode); 95 } 96 } 97 98 public enum AttributeType { 99 CDATA, ID, IDREF, IDREFS, ENTITY, ENTITIES, NMTOKEN, NMTOKENS, ENUMERATED_TYPE 100 } 101 102 static final Set<String> DRAFT_ON_NON_LEAF_ALLOWED = ImmutableSet.of("collation", "transform", "unitPreferenceData", "rulesetGrouping"); 103 104 public static class Attribute implements Named { 105 private static final Joiner JOINER_COMMA_SPACE = Joiner.on(", "); 106 public static final String AUG_TRAIL = "⟫"; 107 public static final String AUG_LEAD = "⟪"; 108 public static final String ENUM_TRAIL = "⟩"; 109 public static final String ENUM_LEAD = "⟨"; 110 public static final Pattern LEAD_TRAIL = Pattern.compile("(.*[" + AUG_LEAD + ENUM_LEAD + "])(.*)([" + AUG_TRAIL + ENUM_TRAIL + "].*)"); 111 public final String name; 112 public final Element element; 113 public final Mode mode; 114 public final String defaultValue; 115 public final AttributeType type; 116 public final Map<String, Integer> values; 117 private final Set<String> commentsPre; 118 private Set<String> commentsPost; 119 private boolean isDeprecatedAttribute; 120 public AttributeStatus attributeStatus = AttributeStatus.distinguished; // default unless reset by annotations, or for xml: attributes 121 private Set<String> deprecatedValues = Collections.emptySet(); 122 public MatchValue matchValue; 123 private final Comparator<String> attributeValueComparator; 124 Attribute(DtdType dtdType, Element element2, String aName, Mode mode2, String[] split, String value2, Set<String> firstComment)125 private Attribute(DtdType dtdType, Element element2, String aName, Mode mode2, String[] split, String value2, Set<String> firstComment) { 126 commentsPre = firstComment; 127 element = element2; 128 name = aName.intern(); 129 if (name.equals("draft") // normally never permitted on elements with children, but special cases... 130 && !DRAFT_ON_NON_LEAF_ALLOWED.contains(element.getName())) { 131 int elementChildrenCount = element.getChildren().size(); 132 if (elementChildrenCount > 1 133 || elementChildrenCount == 1 && !element.getChildren().keySet().iterator().next().getName().equals("cp")) { 134 isDeprecatedAttribute = true; 135 if (DEBUG) { 136 System.out.println(element.getName() + ":" + element.getChildren()); 137 } 138 } 139 } else if (name.startsWith("xml:")) { 140 attributeStatus = AttributeStatus.metadata; 141 } 142 mode = mode2; 143 defaultValue = value2 == null ? null 144 : value2.intern(); 145 AttributeType _type = AttributeType.ENUMERATED_TYPE; 146 Map<String, Integer> _values = Collections.emptyMap(); 147 if (split.length == 1) { 148 try { 149 _type = AttributeType.valueOf(split[0]); 150 } catch (Exception e) { 151 } 152 } 153 type = _type; 154 155 if (_type == AttributeType.ENUMERATED_TYPE) { 156 LinkedHashMap<String, Integer> temp = new LinkedHashMap<>(); 157 for (String part : split) { 158 if (part.length() != 0) { 159 temp.put(part.intern(), temp.size()); 160 } 161 } 162 _values = Collections.unmodifiableMap(temp); 163 } 164 values = _values; 165 attributeValueComparator = getAttributeValueComparator(dtdType, element.name, name); 166 } 167 168 @Override toString()169 public String toString() { 170 return element.name + ":" + name; 171 } 172 getSampleValue()173 public String getSampleValue() { 174 return type == AttributeType.ENUMERATED_TYPE ? (values.containsKey("year") ? "year" : values.keySet().iterator().next()) 175 : matchValue != null ? matchValue.getSample() 176 : MatchValue.DEFAULT_SAMPLE; 177 } 178 appendDtdString(StringBuilder b)179 public StringBuilder appendDtdString(StringBuilder b) { 180 Attribute a = this; 181 b.append("<!ATTLIST " + element.name + " " + a.name); 182 boolean first; 183 if (a.type == AttributeType.ENUMERATED_TYPE) { 184 b.append(" ("); 185 first = true; 186 for (String s : a.values.keySet()) { 187 if (deprecatedValues.contains(s)) { 188 continue; 189 } 190 if (first) { 191 first = false; 192 } else { 193 b.append(" | "); 194 } 195 b.append(s); 196 } 197 b.append(")"); 198 } else { 199 b.append(' ').append(a.type); 200 } 201 if (a.mode != Mode.NULL) { 202 b.append(" ").append(a.mode.source); 203 } 204 if (a.defaultValue != null) { 205 b.append(" \"").append(a.defaultValue).append('"'); 206 } 207 b.append(" >"); 208 return b; 209 } 210 features()211 public String features() { 212 return (type == AttributeType.ENUMERATED_TYPE ? values.keySet().toString() : type.toString()) 213 + (mode == Mode.NULL ? "" : ", mode=" + mode) 214 + (defaultValue == null ? "" : ", default=" + defaultValue); 215 } 216 217 @Override getName()218 public String getName() { 219 return name; 220 } 221 222 private static Splitter COMMA = Splitter.on(',').trimResults(); 223 addComment(String commentIn)224 public void addComment(String commentIn) { 225 if (commentIn.startsWith("@")) { 226 switch (commentIn) { 227 case "@METADATA": 228 attributeStatus = AttributeStatus.metadata; 229 break; 230 case "@VALUE": 231 attributeStatus = AttributeStatus.value; 232 break; 233 case "@DEPRECATED": 234 isDeprecatedAttribute = true; 235 break; 236 default: 237 int colonPos = commentIn.indexOf(':'); 238 if (colonPos < 0) { 239 throw new IllegalArgumentException(element.name + " " + name + 240 "= : Unrecognized ATTLIST annotation: " + commentIn); 241 } 242 String command = commentIn.substring(0, colonPos); 243 String argument = commentIn.substring(colonPos + 1); 244 switch(command) { 245 case "@DEPRECATED": 246 deprecatedValues = Collections.unmodifiableSet(new HashSet<>(COMMA.splitToList(argument))); 247 break; 248 case "@MATCH": 249 if (matchValue != null) { 250 throw new IllegalArgumentException(element.name + " " + name + 251 "= : Conflicting @MATCH: " + matchValue.getName() + " & " + argument); 252 } 253 matchValue = MatchValue.of(argument); 254 break; 255 default: 256 throw new IllegalArgumentException(element.name + " " + name + 257 "= : Unrecognized ATTLIST annotation: " + commentIn); 258 } 259 } 260 return; 261 } 262 commentsPost = addUnmodifiable(commentsPost, commentIn.trim()); 263 } 264 265 /** 266 * Special version of identity; only considers name and name of element 267 */ 268 @Override equals(Object obj)269 public boolean equals(Object obj) { 270 if (!(obj instanceof Attribute)) { 271 return false; 272 } 273 Attribute that = (Attribute) obj; 274 return name.equals(that.name) 275 && element.name.equals(that.element.name) // don't use plain element: circularity 276 // not relevant to identity 277 // && Objects.equals(comment, that.comment) 278 // && mode.equals(that.mode) 279 // && Objects.equals(defaultValue, that.defaultValue) 280 // && type.equals(that.type) 281 // && values.equals(that.values) 282 ; 283 } 284 285 /** 286 * Special version of identity; only considers name and name of element 287 */ 288 @Override hashCode()289 public int hashCode() { 290 return name.hashCode() * 37 291 + element.name.hashCode() // don't use plain element: circularity 292 // not relevant to identity 293 // ) * 37 + Objects.hashCode(comment)) * 37 294 // + mode.hashCode()) * 37 295 // + Objects.hashCode(defaultValue)) * 37 296 // + type.hashCode()) * 37 297 // + values.hashCode() 298 ; 299 } 300 isDeprecated()301 public boolean isDeprecated() { 302 return isDeprecatedAttribute; 303 } 304 isDeprecatedValue(String value)305 public boolean isDeprecatedValue(String value) { 306 return deprecatedValues.contains(value); 307 } 308 getStatus()309 public AttributeStatus getStatus() { 310 return attributeStatus; 311 } 312 getValueStatus(String value)313 public ValueStatus getValueStatus(String value) { 314 return deprecatedValues.contains(value) ? ValueStatus.invalid 315 : type == AttributeType.ENUMERATED_TYPE ? (values.containsKey(value) ? ValueStatus.valid : ValueStatus.invalid) 316 : matchValue == null ? ValueStatus.unknown 317 : matchValue.is(value) ? ValueStatus.valid 318 : ValueStatus.invalid; 319 } 320 getMatchString()321 public String getMatchString() { 322 return type == AttributeType.ENUMERATED_TYPE ? ENUM_LEAD + JOINER_COMMA_SPACE.join(values.keySet()) + ENUM_TRAIL 323 : matchValue != null ? AUG_LEAD + matchValue.toString() + AUG_TRAIL 324 : ""; 325 } 326 getMatchingName(Map<Attribute, Integer> attributes)327 public Attribute getMatchingName(Map<Attribute, Integer> attributes) { 328 for (Attribute attribute : attributes.keySet()) { 329 if (name.equals(attribute.getName())) { 330 return attribute; 331 } 332 } 333 return null; 334 } 335 336 } 337 338 public enum ValueStatus {invalid, unknown, valid} 339 DtdData(DtdType type, String version)340 private DtdData(DtdType type, String version) { 341 this.dtdType = type; 342 this.ROOT = elementFrom(type.rootType.toString()); 343 this.version = version; 344 } 345 addAttribute(String eName, String aName, String type, String mode, String value)346 private void addAttribute(String eName, String aName, String type, String mode, String value) { 347 Attribute a = new Attribute(dtdType, nameToElement.get(eName), aName, Mode.forString(mode), FILLER.split(type), value, preCommentCache); 348 preCommentCache = null; 349 getAttributesFromName().put(aName, a); 350 CldrUtility.putNew(a.element.attributes, a, a.element.attributes.size()); 351 lastElement = null; 352 lastAttribute = a; 353 } 354 355 public enum ElementType { 356 EMPTY, ANY, PCDATA("(#PCDATA)"), CHILDREN; 357 public final String source; 358 ElementType(String s)359 private ElementType(String s) { 360 source = s; 361 } 362 ElementType()363 private ElementType() { 364 source = name(); 365 } 366 } 367 368 interface Named { getName()369 String getName(); 370 } 371 372 public enum ElementStatus { 373 regular, metadata 374 } 375 376 public static class Element implements Named { 377 public final String name; 378 private String rawModel; 379 private ElementType type; 380 private final Map<Element, Integer> children = new LinkedHashMap<>(); 381 private final Map<Attribute, Integer> attributes = new LinkedHashMap<>(); 382 private Set<String> commentsPre; 383 private Set<String> commentsPost; 384 private String model; 385 private boolean isOrderedElement; 386 private boolean isDeprecatedElement; 387 private boolean isTechPreviewElement; 388 private ElementStatus elementStatus = ElementStatus.regular; 389 Element(String name2)390 private Element(String name2) { 391 name = name2.intern(); 392 } 393 setChildren(DtdData dtdData, String model, Set<String> precomments)394 private void setChildren(DtdData dtdData, String model, Set<String> precomments) { 395 this.commentsPre = precomments; 396 rawModel = model; 397 this.model = clean(model); 398 if (model.equals("EMPTY")) { 399 type = ElementType.EMPTY; 400 return; 401 } 402 type = ElementType.CHILDREN; 403 for (String part : FILLER.split(model)) { 404 if (part.length() != 0) { 405 if (part.equals("#PCDATA")) { 406 type = ElementType.PCDATA; 407 } else if (part.equals("ANY")) { 408 type = ElementType.ANY; 409 } else { 410 CldrUtility.putNew(children, dtdData.elementFrom(part), children.size()); 411 } 412 } 413 } 414 if ((type == ElementType.CHILDREN) == (children.size() == 0) 415 && !model.startsWith("(#PCDATA|cp")) { 416 throw new IllegalArgumentException("CLDR does not permit Mixed content. " + name + ":" + model); 417 } 418 } 419 420 static final Pattern CLEANER1 = PatternCache.get("([,|(])(?=\\S)"); 421 static final Pattern CLEANER2 = PatternCache.get("(?=\\S)([|)])"); 422 clean(String model2)423 private String clean(String model2) { 424 // (x) -> ( x ); 425 // x,y -> x, y 426 // x|y -> x | y 427 String result = CLEANER1.matcher(model2).replaceAll("$1 "); 428 result = CLEANER2.matcher(result).replaceAll(" $1"); 429 return result.equals(model2) 430 ? model2 431 : result; // for debugging 432 } 433 containsAttribute(String string)434 public boolean containsAttribute(String string) { 435 for (Attribute a : attributes.keySet()) { 436 if (a.name.equals(string)) { 437 return true; 438 } 439 } 440 return false; 441 } 442 443 @Override toString()444 public String toString() { 445 return name; 446 } 447 toDtdString()448 public String toDtdString() { 449 return "<!ELEMENT " + name + " " + getRawModel() + " >"; 450 } 451 getType()452 public ElementType getType() { 453 return type; 454 } 455 getChildren()456 public Map<Element, Integer> getChildren() { 457 return Collections.unmodifiableMap(children); 458 } 459 getAttributes()460 public Map<Attribute, Integer> getAttributes() { 461 return Collections.unmodifiableMap(attributes); 462 } 463 464 @Override getName()465 public String getName() { 466 return name; 467 } 468 getChildNamed(String string)469 public Element getChildNamed(String string) { 470 for (Element e : children.keySet()) { 471 if (e.name.equals(string)) { 472 return e; 473 } 474 } 475 return null; 476 } 477 getAttributeNamed(String string)478 public Attribute getAttributeNamed(String string) { 479 for (Attribute a : attributes.keySet()) { 480 if (a.name.equals(string)) { 481 return a; 482 } 483 } 484 return null; 485 } 486 addComment(String addition)487 public void addComment(String addition) { 488 if (addition.startsWith("@")) { 489 // there are exactly 4 cases: deprecated, ordered, techPreview and metadata 490 switch (addition) { 491 case "@ORDERED": 492 isOrderedElement = true; 493 break; 494 case "@DEPRECATED": 495 isDeprecatedElement = true; 496 break; 497 case "@METADATA": 498 elementStatus = ElementStatus.metadata; 499 break; 500 case "@TECHPREVIEW": 501 isTechPreviewElement = true; 502 break; 503 default: 504 if (addition.startsWith("@MATCH") || 505 addition.startsWith("@VALUE")) { 506 // Try to catch this case 507 throw new IllegalArgumentException(name + 508 ": Unrecognized ELEMENT annotation (this isn't ATTLIST!): " + 509 addition); 510 } else { 511 throw new IllegalArgumentException(name + 512 ": Unrecognized ELEMENT annotation: " + 513 addition); 514 } 515 } 516 return; 517 } 518 commentsPost = addUnmodifiable(commentsPost, addition.trim()); 519 } 520 521 /** 522 * Special version of equals. Only the name is considered in the identity. 523 */ 524 @Override equals(Object obj)525 public boolean equals(Object obj) { 526 if (!(obj instanceof Element)) { 527 return false; 528 } 529 Element that = (Element) obj; 530 return name.equals(that.name) 531 // not relevant to the identity of the object 532 // && Objects.equals(comment, that.comment) 533 // && type == that.type 534 // && attributes.equals(that.attributes) 535 // && children.equals(that.children) 536 ; 537 } 538 539 /** 540 * Special version of hashcode. Only the name is considered in the identity. 541 */ 542 @Override hashCode()543 public int hashCode() { 544 return name.hashCode() 545 // not relevant to the identity of the object 546 // * 37 + Objects.hashCode(comment) 547 //) * 37 + Objects.hashCode(type) 548 // ) * 37 + attributes.hashCode() 549 // ) * 37 + children.hashCode() 550 ; 551 } 552 isDeprecated()553 public boolean isDeprecated() { 554 return isDeprecatedElement; 555 } 556 isOrdered()557 public boolean isOrdered() { 558 return isOrderedElement; 559 } 560 isTechPreview()561 public boolean isTechPreview() { 562 return isTechPreviewElement; 563 } 564 getElementStatus()565 public ElementStatus getElementStatus() { 566 return elementStatus; 567 } 568 569 /** 570 * @return the rawModel 571 */ getRawModel()572 public String getRawModel() { 573 return rawModel; 574 } 575 } 576 elementFrom(String name)577 private Element elementFrom(String name) { 578 Element result = nameToElement.get(name); 579 if (result == null) { 580 nameToElement.put(name, result = new Element(name)); 581 } 582 return result; 583 } 584 addElement(String name2, String model)585 private void addElement(String name2, String model) { 586 Element element = elementFrom(name2); 587 element.setChildren(this, model, preCommentCache); 588 preCommentCache = null; 589 lastElement = element; 590 lastAttribute = null; 591 } 592 addComment(String comment)593 private void addComment(String comment) { 594 comment = comment.trim(); 595 if (preCommentCache != null || comment.startsWith("#")) { // the precomments are "sticky" 596 if (comment.startsWith("@")) { 597 throw new IllegalArgumentException("@ annotation comment must follow element or attribute, without intervening # comment"); 598 } 599 preCommentCache = addUnmodifiable(preCommentCache, comment); 600 } else if (lastElement != null) { 601 lastElement.addComment(comment); 602 } else if (lastAttribute != null) { 603 lastAttribute.addComment(comment); 604 } else { 605 if (comment.startsWith("@")) { 606 throw new IllegalArgumentException("@ annotation comment must follow element or attribute, without intervening # comment"); 607 } 608 preCommentCache = addUnmodifiable(preCommentCache, comment); 609 } 610 } 611 612 // TODO hide this 613 /** 614 * @deprecated 615 */ 616 @Deprecated 617 @Override handleElementDecl(String name, String model)618 public void handleElementDecl(String name, String model) { 619 if (SHOW_ALL) { 620 // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, listPatterns?, collations?, posix?, segmentations?, rbnf?, annotations?, metadata?, references?, special*))) > 621 System.out.println(System.lineSeparator() + "<!ELEMENT " + name + " " + model + " >"); 622 } 623 addElement(name, model); 624 } 625 626 // TODO hide this 627 /** 628 * @deprecated 629 */ 630 @Deprecated 631 @Override handleStartDtd(String name, String publicId, String systemId)632 public void handleStartDtd(String name, String publicId, String systemId) { 633 DtdType explicitDtdType = DtdType.valueOf(name); 634 if (explicitDtdType != dtdType && explicitDtdType != dtdType.rootType) { 635 throw new IllegalArgumentException("Mismatch in dtdTypes"); 636 } 637 } 638 639 /** 640 * @deprecated 641 */ 642 @Deprecated 643 @Override handleAttributeDecl(String eName, String aName, String type, String mode, String value)644 public void handleAttributeDecl(String eName, String aName, String type, String mode, String value) { 645 if (SHOW_ALL) { 646 // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false ) #IMPLIED > 647 // <!ATTLIST version number CDATA #REQUIRED > 648 // <!ATTLIST version cldrVersion CDATA #FIXED "27" > 649 650 System.out.println("<!ATTLIST " + eName 651 + " " + aName 652 + " " + type 653 + " " + mode 654 + (value == null ? "" : " \"" + value + "\"") 655 + " >"); 656 } 657 // HACK for 1.1.1 658 if (eName.equals("draft")) { 659 eName = "week"; 660 } 661 addAttribute(eName, aName, type, mode, value); 662 } 663 664 /** 665 * @deprecated 666 */ 667 @Deprecated 668 @Override handleComment(String path, String comment)669 public void handleComment(String path, String comment) { 670 if (comment.contains("Copyright")) { 671 // Zap the copyright comment, replace it with the current one. 672 comment = CldrUtility.getCopyrightString(); 673 } 674 if (SHOW_ALL) { 675 // <!-- true and false are deprecated. --> 676 System.out.println("<!-- " + comment.trim() + " -->"); 677 } 678 addComment(comment); 679 } 680 681 // TODO hide this 682 /** 683 * @deprecated 684 */ 685 @Deprecated 686 @Override handleEndDtd()687 public void handleEndDtd() { 688 throw new XMLFileReader.AbortException(); 689 } 690 691 /** 692 * Note that it always gets the trunk version 693 * @deprecated depends on static config, use {@link DtdData#getInstance(DtdType, File)} instead 694 */ 695 @Deprecated getInstance(DtdType type)696 public static DtdData getInstance(DtdType type) { 697 return getInstance(type, CLDRConfig.getInstance().getCldrBaseDirectory()); 698 } 699 700 /** 701 * Special form using version, used only by tests, etc. 702 */ getInstance(DtdType type, String version)703 public static DtdData getInstance(DtdType type, String version) { 704 // Map out versions that had no DTD 705 if (version != null) { 706 switch (version) { 707 case "1.1.1": 708 version="1.1"; 709 break; 710 case "1.4.1": 711 version="1.4"; 712 break; 713 case "1.5.1": 714 version="1.5.0.1"; 715 break; 716 default: 717 } 718 } 719 File directory = version == null ? CLDRConfig.getInstance().getCldrBaseDirectory() 720 : new File(CLDRPaths.ARCHIVE_DIRECTORY + "/cldr-" + version); 721 722 return getInstance(type, version, directory); 723 } 724 725 private static final ConcurrentMap<Pair<DtdType, File>, DtdData> CACHE = new ConcurrentHashMap<>(); 726 727 /** 728 * Normal version of DtdData 729 * Get a DtdData, given the CLDR root directory. 730 * @param type which DtdType to return 731 * @param directory the CLDR Root directory, which contains the "common" directory. 732 * @return 733 */ getInstance(DtdType type, File directory)734 public static DtdData getInstance(DtdType type, File directory) { 735 Pair<DtdType, File> key = new Pair<>(type, directory); 736 DtdData data = CACHE.computeIfAbsent(key, k -> getInstance(type, null, directory)); 737 return data; 738 } 739 getInstance(DtdType type, String version, File directory)740 private static DtdData getInstance(DtdType type, String version, File directory) { 741 DtdData simpleHandler = new DtdData(type, version); 742 XMLFileReader xfr = new XMLFileReader().setHandler(simpleHandler); 743 if (type != type.rootType) { 744 // read the real first, then add onto it. 745 readFile(type.rootType, xfr, directory); 746 } 747 readFile(type, xfr, directory); 748 // HACK 749 if (type == DtdType.ldmlICU) { 750 Element special = simpleHandler.nameToElement.get("special"); 751 for (String extraElementName : Arrays.asList( 752 "icu:breakIteratorData", 753 "icu:UCARules", 754 "icu:scripts", 755 "icu:transforms", 756 "icu:ruleBasedNumberFormats", 757 "icu:isLeapMonth", 758 "icu:version", 759 "icu:breakDictionaryData", 760 "icu:depends")) { 761 Element extraElement = simpleHandler.nameToElement.get(extraElementName); 762 special.children.put(extraElement, special.children.size()); 763 } 764 } 765 if (simpleHandler.ROOT.children.size() == 0) { 766 throw new IllegalArgumentException("Internal Error: DtdData.getInstance(" + 767 type + ", ...): readFile() failed to return any children!"); 768 // should never happen 769 } 770 simpleHandler.finish(); 771 simpleHandler.freeze(); 772 return simpleHandler; 773 } 774 finish()775 private void finish() { 776 dtdComparator = new DtdComparator(); 777 } 778 readFile(DtdType type, XMLFileReader xfr, File directory)779 public static void readFile(DtdType type, XMLFileReader xfr, File directory) { 780 File file = new File(directory, type.dtdPath); 781 StringReader s = new StringReader("<?xml version='1.0' encoding='UTF-8' ?>" 782 + "<!DOCTYPE " + type 783 + " SYSTEM '" + file.getAbsolutePath() + "'>"); 784 try { 785 xfr.read(type.toString(), s, -1, true); // DTD_TYPE_TO_FILE.get(type) 786 } catch (IllegalArgumentException iae) { 787 // rethrow 788 throw new IllegalArgumentException("Error while reading " + type, iae); 789 } 790 } 791 freeze()792 private void freeze() { 793 if (version == null) { // only generate for new versions 794 MergeLists<String> elementMergeList = new MergeLists<>(); 795 elementMergeList.add(dtdType.toString()); 796 MergeLists<String> attributeMergeList = new MergeLists<>(); 797 attributeMergeList.add("_q"); 798 799 for (Element element : nameToElement.values()) { 800 if (element.children.size() > 0) { 801 Collection<String> names = getNames(element.children.keySet()); 802 elementMergeList.add(names); 803 if (DEBUG) { 804 System.out.println(element.getName() + "\t→\t" + names); 805 } 806 } 807 if (element.attributes.size() > 0) { 808 Collection<String> names = getNames(element.attributes.keySet()); 809 attributeMergeList.add(names); 810 if (DEBUG) { 811 System.out.println(element.getName() + "\t→\t@" + names); 812 } 813 } 814 } 815 List<String> elementList = elementMergeList.merge(); 816 List<String> attributeList = attributeMergeList.merge(); 817 if (DEBUG) { 818 System.out.println("Element Ordering:\t" + elementList); 819 System.out.println("Attribute Ordering:\t" + attributeList); 820 } 821 elementComparator = new MapComparator<>(elementList).setErrorOnMissing(true).freeze(); 822 attributeComparator = new MapComparator<>(attributeList).setErrorOnMissing(true).freeze(); 823 } 824 nameToAttributes.freeze(); 825 nameToElement = Collections.unmodifiableMap(nameToElement); 826 } 827 getNames(Collection<? extends Named> keySet)828 private Collection<String> getNames(Collection<? extends Named> keySet) { 829 List<String> result = new ArrayList<>(); 830 for (Named e : keySet) { 831 result.add(e.getName()); 832 } 833 return result; 834 } 835 836 public enum DtdItem { 837 ELEMENT, ATTRIBUTE, ATTRIBUTE_VALUE 838 } 839 840 public interface AttributeValueComparator { compare(String element, String attribute, String value1, String value2)841 public int compare(String element, String attribute, String value1, String value2); 842 } 843 getDtdComparator(AttributeValueComparator avc)844 public Comparator<String> getDtdComparator(AttributeValueComparator avc) { 845 return dtdComparator; 846 } 847 getDtdComparator()848 public DtdComparator getDtdComparator() { 849 return dtdComparator; 850 } 851 852 public class DtdComparator implements Comparator<String> { 853 @Override compare(String path1, String path2)854 public int compare(String path1, String path2) { 855 XPathParts a = XPathParts.getFrozenInstance(path1); 856 XPathParts b = XPathParts.getFrozenInstance(path2); 857 return xpathComparator(a, b); 858 } 859 xpathComparator(XPathParts a, XPathParts b)860 public int xpathComparator(XPathParts a, XPathParts b) { 861 // there must always be at least one element 862 String baseA = a.getElement(0); 863 String baseB = b.getElement(0); 864 if (!ROOT.name.equals(baseA) || !ROOT.name.equals(baseB)) { 865 throw new IllegalArgumentException("Comparing different DTDs: " + ROOT.name + ", " + baseA + ", " + baseB); 866 } 867 int min = Math.min(a.size(), b.size()); 868 Element parent = ROOT; 869 Element elementA; 870 for (int i = 1; i < min; ++i, parent = elementA) { 871 // add extra test for "fake" elements, used in diffing. they always start with _ 872 String elementRawA = a.getElement(i); 873 String elementRawB = b.getElement(i); 874 if (elementRawA.startsWith("_")) { 875 return elementRawB.startsWith("_") ? elementRawA.compareTo(elementRawB) : -1; 876 } else if (elementRawB.startsWith("_")) { 877 return 1; 878 } 879 // 880 elementA = nameToElement.get(elementRawA); 881 Element elementB = nameToElement.get(elementRawB); 882 if (elementA != elementB) { 883 int aa = parent.children.get(elementA); 884 int bb = parent.children.get(elementB); 885 return aa - bb; 886 } 887 int countA = a.getAttributeCount(i); 888 int countB = b.getAttributeCount(i); 889 if (countA == 0 && countB == 0) { 890 continue; 891 } 892 // we have two ways to compare the attributes. One based on the dtd, 893 // and one based on explicit comparators 894 895 // at this point the elements are the same and correspond to elementA 896 // in the dtd 897 898 // Handle the special added elements 899 String aqValue = a.getAttributeValue(i, "_q"); 900 if (aqValue != null) { 901 String bqValue = b.getAttributeValue(i, "_q"); 902 if (!aqValue.equals(bqValue)) { 903 int aValue = Integer.parseInt(aqValue); 904 int bValue = Integer.parseInt(bqValue); 905 return aValue - bValue; 906 } 907 --countA; 908 --countB; 909 } 910 911 attributes: for (Entry<Attribute, Integer> attr : elementA.attributes.entrySet()) { 912 Attribute main = attr.getKey(); 913 String valueA = a.getAttributeValue(i, main.name); 914 String valueB = b.getAttributeValue(i, main.name); 915 if (valueA == null) { 916 if (valueB != null) { 917 return -1; 918 } 919 } else if (valueB == null) { 920 return 1; 921 } else if (valueA.equals(valueB)) { 922 --countA; 923 --countB; 924 if (countA == 0 && countB == 0) { 925 break attributes; 926 } 927 continue; // TODO 928 } else if (main.attributeValueComparator != null) { 929 return main.attributeValueComparator.compare(valueA, valueB); 930 } else if (main.values.size() != 0) { 931 int aa = main.values.get(valueA); 932 int bb = main.values.get(valueB); 933 return aa - bb; 934 } else { 935 return valueA.compareTo(valueB); 936 } 937 } 938 if (countA != 0 || countB != 0) { 939 throw new IllegalArgumentException(); 940 } 941 } 942 return a.size() - b.size(); 943 } 944 } 945 getAttributeComparator()946 public MapComparator<String> getAttributeComparator() { 947 return attributeComparator; 948 } 949 950 getElementComparator()951 public MapComparator<String> getElementComparator() { 952 return elementComparator; 953 } 954 getAttributesFromName()955 public Relation<String, Attribute> getAttributesFromName() { 956 return nameToAttributes; 957 } 958 getElementFromName()959 public Map<String, Element> getElementFromName() { 960 return nameToElement; 961 } 962 963 @Override toString()964 public String toString() { 965 StringBuilder b = new StringBuilder(); 966 // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, listPatterns?, collations?, posix?, segmentations?, rbnf?, metadata?, references?, special*))) > 967 // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false ) #IMPLIED > <!-- true and false are deprecated. --> 968 Seen seen = new Seen(dtdType); 969 seen.seenElements.add(ANY); 970 seen.seenElements.add(PCDATA); 971 toString(ROOT, b, seen); 972 973 // Hack for ldmlIcu: catch the items that are not mentioned in the original 974 int currentEnd = b.length(); 975 for (Element e : nameToElement.values()) { 976 toString(e, b, seen); 977 } 978 if (currentEnd != b.length()) { 979 b.insert(currentEnd, 980 System.lineSeparator() + System.lineSeparator() 981 + "<!-- Elements not reachable from root! -->" 982 + System.lineSeparator()); 983 } 984 return b.toString(); 985 } 986 987 static final class Seen { 988 Set<Element> seenElements = new HashSet<>(); 989 Set<Attribute> seenAttributes = new HashSet<>(); 990 Seen(DtdType dtdType)991 public Seen(DtdType dtdType) { 992 if (dtdType.rootType == dtdType) { 993 return; 994 } 995 DtdData otherData = DtdData.getInstance(dtdType.rootType); 996 walk(otherData, otherData.ROOT); 997 seenElements.remove(otherData.nameToElement.get("special")); 998 } 999 walk(DtdData otherData, Element current)1000 private void walk(DtdData otherData, Element current) { 1001 seenElements.add(current); 1002 seenAttributes.addAll(current.attributes.keySet()); 1003 for (Element e : current.children.keySet()) { 1004 walk(otherData, e); 1005 } 1006 } 1007 } 1008 getDescendents(Element start, Set<Element> toAddTo)1009 public Set<Element> getDescendents(Element start, Set<Element> toAddTo) { 1010 if (!toAddTo.contains(start)) { 1011 toAddTo.add(start); 1012 for (Element e : start.children.keySet()) { 1013 getDescendents(e, toAddTo); 1014 } 1015 } 1016 return toAddTo; 1017 } 1018 toString(Element current, StringBuilder b, Seen seen)1019 private void toString(Element current, StringBuilder b, Seen seen) { 1020 boolean first = true; 1021 if (seen.seenElements.contains(current)) { 1022 return; 1023 } 1024 seen.seenElements.add(current); 1025 boolean elementDeprecated = isDeprecated(current.name, "*", "*"); 1026 1027 showComments(b, current.commentsPre, true); 1028 b.append("\n\n<!ELEMENT " + current.name + " " + current.model + " >"); 1029 if (USE_SYNTHESIZED) { 1030 Element aliasElement = getElementFromName().get("alias"); 1031 //b.append(current.rawChildren); 1032 if (!current.children.isEmpty()) { 1033 LinkedHashSet<Element> elements = new LinkedHashSet<>(current.children.keySet()); 1034 boolean hasAlias = aliasElement != null && elements.remove(aliasElement); 1035 //boolean hasSpecial = specialElement != null && elements.remove(specialElement); 1036 if (hasAlias) { 1037 b.append("(alias |"); 1038 } 1039 b.append("("); 1040 // <!ELEMENT transformNames ( alias | (transformName | special)* ) > 1041 // <!ELEMENT layout ( alias | (orientation*, inList*, inText*, special*) ) > 1042 1043 for (Element e : elements) { 1044 if (first) { 1045 first = false; 1046 } else { 1047 b.append(", "); 1048 } 1049 b.append(e.name); 1050 if (e.type != ElementType.PCDATA) { 1051 b.append("*"); 1052 } 1053 } 1054 if (hasAlias) { 1055 b.append(")"); 1056 } 1057 b.append(")"); 1058 } else { 1059 b.append(current.type == null ? "???" : current.type.source); 1060 } 1061 b.append(">"); 1062 } 1063 showComments(b, current.commentsPost, false); 1064 if (isOrdered(current.name)) { 1065 b.append(COMMENT_PREFIX + "<!--@ORDERED-->"); 1066 } 1067 if (isTechPreview(current.name)) { 1068 b.append(COMMENT_PREFIX + "<!--@TECHPREVIEW-->"); 1069 } 1070 if (current.getElementStatus() != ElementStatus.regular) { 1071 b.append(COMMENT_PREFIX + "<!--@" 1072 + current.getElementStatus().toString().toUpperCase(Locale.ROOT) 1073 + "-->"); 1074 } 1075 if (elementDeprecated) { 1076 b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->"); 1077 } 1078 1079 LinkedHashSet<String> deprecatedValues = new LinkedHashSet<>(); 1080 1081 for (Attribute a : current.attributes.keySet()) { 1082 if (seen.seenAttributes.contains(a)) { 1083 continue; 1084 } 1085 seen.seenAttributes.add(a); 1086 boolean attributeDeprecated = elementDeprecated || isDeprecated(current.name, a.name, "*"); 1087 1088 deprecatedValues.clear(); 1089 1090 showComments(b, a.commentsPre, true); 1091 b.append("\n<!ATTLIST " + current.name + " " + a.name); 1092 if (a.type == AttributeType.ENUMERATED_TYPE) { 1093 b.append(" ("); 1094 first = true; 1095 for (String s : a.values.keySet()) { 1096 if (first) { 1097 first = false; 1098 } else { 1099 b.append(" | "); 1100 } 1101 b.append(s); 1102 if (!attributeDeprecated && isDeprecated(current.name, a.name, s)) { 1103 deprecatedValues.add(s); 1104 } 1105 } 1106 b.append(")"); 1107 } else { 1108 b.append(' ').append(a.type); 1109 } 1110 if (a.mode != Mode.NULL) { 1111 b.append(" ").append(a.mode.source); 1112 } 1113 if (a.defaultValue != null) { 1114 b.append(" \"").append(a.defaultValue).append('"'); 1115 } 1116 b.append(" >"); 1117 showComments(b, a.commentsPost, false); 1118 // if (attributeDeprecated != deprecatedComment) { 1119 // System.out.println("*** BAD DEPRECATION ***" + a); 1120 // } 1121 if (a.matchValue != null) { 1122 b.append(COMMENT_PREFIX + "<!--@MATCH:" + a.matchValue.getName() + "-->"); 1123 } 1124 if (METADATA.contains(a.name) || a.attributeStatus == AttributeStatus.metadata) { 1125 b.append(COMMENT_PREFIX + "<!--@METADATA-->"); 1126 } else if (!isDistinguishing(current.name, a.name)) { 1127 b.append(COMMENT_PREFIX + "<!--@VALUE-->"); 1128 } 1129 if (attributeDeprecated) { 1130 b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->"); 1131 } else if (!deprecatedValues.isEmpty()) { 1132 b.append(COMMENT_PREFIX + "<!--@DEPRECATED:" + Joiner.on(", ") 1133 .join(deprecatedValues) + "-->"); 1134 } 1135 } 1136 if (current.children.size() > 0) { 1137 for (Element e : current.children.keySet()) { 1138 toString(e, b, seen); 1139 } 1140 } 1141 } 1142 showComments(StringBuilder b, Set<String> comments, boolean separate)1143 private void showComments(StringBuilder b, Set<String> comments, boolean separate) { 1144 if (comments == null) { 1145 return; 1146 } 1147 if (separate && b.length() != 0) { 1148 b.append(System.lineSeparator()); 1149 } 1150 for (String c : comments) { 1151 boolean deprecatedComment = false; // the following served its purpose... c.toLowerCase(Locale.ENGLISH).contains("deprecat"); 1152 if (!deprecatedComment) { 1153 if (separate) { 1154 // special handling for very first comment 1155 if (b.length() == 0) { 1156 b.append("<!--") 1157 .append(System.lineSeparator()) 1158 .append(c) 1159 .append(System.lineSeparator()) 1160 .append("-->"); 1161 continue; 1162 } 1163 b.append(System.lineSeparator()); 1164 } else { 1165 b.append(COMMENT_PREFIX); 1166 } 1167 b.append("<!-- ").append(c).append(" -->"); 1168 } 1169 } 1170 } 1171 removeFirst(Collection<T> elements, Transform<T, Boolean> matcher)1172 public static <T> T removeFirst(Collection<T> elements, Transform<T, Boolean> matcher) { 1173 for (Iterator<T> it = elements.iterator(); it.hasNext();) { 1174 T item = it.next(); 1175 if (matcher.transform(item) == Boolean.TRUE) { 1176 it.remove(); 1177 return item; 1178 } 1179 } 1180 return null; 1181 } 1182 getElements()1183 public Set<Element> getElements() { 1184 return new LinkedHashSet<>(nameToElement.values()); 1185 } 1186 getAttributes()1187 public Set<Attribute> getAttributes() { 1188 return new LinkedHashSet<>(nameToAttributes.values()); 1189 } 1190 isDistinguishing(String elementName, String attribute)1191 public boolean isDistinguishing(String elementName, String attribute) { 1192 return getAttributeStatus(elementName, attribute) == AttributeStatus.distinguished; 1193 } 1194 1195 static final Set<String> METADATA = new HashSet<>(Arrays.asList("references", "standard", "draft")); 1196 addUnmodifiable(Set<String> comment, String addition)1197 static final Set<String> addUnmodifiable(Set<String> comment, String addition) { 1198 if (comment == null) { 1199 return Collections.singleton(addition); 1200 } else { 1201 comment = new LinkedHashSet<>(comment); 1202 comment.add(addition); 1203 return Collections.unmodifiableSet(comment); 1204 } 1205 } 1206 1207 public class IllegalByDtdException extends RuntimeException { 1208 private static final long serialVersionUID = 1L; 1209 public final String elementName; 1210 public final String attributeName; 1211 public final String attributeValue; 1212 IllegalByDtdException(String elementName, String attributeName, String attributeValue)1213 public IllegalByDtdException(String elementName, String attributeName, String attributeValue) { 1214 this.elementName = elementName; 1215 this.attributeName = attributeName; 1216 this.attributeValue = attributeValue; 1217 } 1218 1219 @Override getMessage()1220 public String getMessage() { 1221 return "Dtd " + dtdType 1222 + " doesn’t allow " 1223 + "element=" + elementName 1224 + (attributeName == null ? "" : ", attribute: " + attributeName) 1225 + (attributeValue == null ? "" : ", attributeValue: " + attributeValue); 1226 } 1227 } 1228 1229 //@SuppressWarnings("unused") isDeprecated(String elementName, String attributeName, String attributeValue)1230 public boolean isDeprecated(String elementName, String attributeName, String attributeValue) { 1231 Element element = getElementThrowingIfNull(elementName, null, null); 1232 if (element.isDeprecatedElement) { 1233 return true; 1234 } 1235 if ("*".equals(attributeName) || "_q".equals(attributeName)) { 1236 return false; 1237 } 1238 Attribute attribute = element.getAttributeNamed(attributeName); 1239 if (attribute == null) { 1240 throw new IllegalByDtdException(elementName, attributeName, attributeValue); 1241 } else if (attribute.isDeprecatedAttribute) { 1242 return true; 1243 } 1244 return attribute.deprecatedValues.contains(attributeValue); // don't need special test for "*" 1245 } 1246 1247 /** 1248 * Returns whether an element (specified by its full name) is ordered. This method 1249 * understands all elements in the DTDs used (including the ICU extensions), but will 1250 * throw IllegalByDtdException for unknown elements. See CLDR-8614 for more background. 1251 */ isOrdered(String elementName)1252 public boolean isOrdered(String elementName) { 1253 Element element = getElementThrowingIfNull(elementName, null, null); 1254 return element.isOrdered(); 1255 } 1256 getElementThrowingIfNull(String elementName, String attributeName, String value)1257 public Element getElementThrowingIfNull(String elementName, String attributeName, String value) { 1258 Element element = nameToElement.get(elementName); 1259 if (element == null) { 1260 throw new IllegalByDtdException(elementName, attributeName, value); 1261 } 1262 return element; 1263 } 1264 1265 /** 1266 * Returns whether an element (specified by its full name) is a tech preview. This method 1267 * understands all elements in the DTDs used (including the ICU extensions), but will 1268 * throw IllegalByDtdException for unknown elements. See CLDR-8614 for more background. 1269 */ isTechPreview(String elementName)1270 public boolean isTechPreview(String elementName) { 1271 Element element = getElementThrowingIfNull(elementName, null, null); 1272 return element.isTechPreview(); 1273 } 1274 1275 getAttributeStatus(String elementName, String attributeName)1276 public AttributeStatus getAttributeStatus(String elementName, String attributeName) { 1277 if ("_q".equals(attributeName)) { 1278 return AttributeStatus.distinguished; // special case 1279 } 1280 Element element = nameToElement.get(elementName); 1281 if (element == null) { 1282 if (elementName.startsWith("icu:")) { 1283 return AttributeStatus.distinguished; 1284 } 1285 throw new IllegalByDtdException(elementName, attributeName, null); 1286 } 1287 Attribute attribute = element.getAttributeNamed(attributeName); 1288 if (attribute == null) { 1289 if (elementName.startsWith("icu:")) { 1290 return AttributeStatus.distinguished; 1291 } 1292 throw new IllegalByDtdException(elementName, attributeName, null); 1293 } 1294 return attribute.attributeStatus; 1295 } 1296 1297 // The default is a map comparator, which compares numbers as numbers, and strings with UCA 1298 private static MapComparator<String> valueOrdering = new MapComparator<String>().setErrorOnMissing(false).freeze(); 1299 1300 static MapComparator<String> dayValueOrder = new MapComparator<String>().add( 1301 "sun", "mon", "tue", "wed", "thu", "fri", "sat").freeze(); 1302 static MapComparator<String> dayPeriodOrder = new MapComparator<String>().add( 1303 "midnight", "am", "noon", "pm", 1304 "morning1", "morning2", "afternoon1", "afternoon2", "evening1", "evening2", "night1", "night2", 1305 // The ones on the following line are no longer used actively. Can be removed later? 1306 "earlyMorning", "morning", "midDay", "afternoon", "evening", "night", "weeHours").freeze(); 1307 static MapComparator<String> dateTimeFormatOrder = new MapComparator<String>().add( 1308 "standard", "atTime").freeze(); 1309 static MapComparator<String> listPatternOrder = new MapComparator<String>().add( 1310 "start", "middle", "end", "2", "3").freeze(); 1311 static MapComparator<String> widthOrder = new MapComparator<String>().add( 1312 "abbreviated", "narrow", "short", "wide", "all").freeze(); 1313 static MapComparator<String> lengthOrder = new MapComparator<String>().add( 1314 "full", "long", "medium", "short").freeze(); 1315 static MapComparator<String> dateFieldOrder = new MapComparator<String>().add( 1316 "era", "era-short", "era-narrow", 1317 "year", "year-short", "year-narrow", 1318 "quarter", "quarter-short", "quarter-narrow", 1319 "month", "month-short", "month-narrow", 1320 "week", "week-short", "week-narrow", 1321 "weekOfMonth", "weekOfMonth-short", "weekOfMonth-narrow", 1322 "day", "day-short", "day-narrow", 1323 "dayOfYear", "dayOfYear-short", "dayOfYear-narrow", 1324 "weekday", "weekday-short", "weekday-narrow", 1325 "weekdayOfMonth", "weekdayOfMonth-short", "weekdayOfMonth-narrow", 1326 "sun", "sun-short", "sun-narrow", 1327 "mon", "mon-short", "mon-narrow", 1328 "tue", "tue-short", "tue-narrow", 1329 "wed", "wed-short", "wed-narrow", 1330 "thu", "thu-short", "thu-narrow", 1331 "fri", "fri-short", "fri-narrow", 1332 "sat", "sat-short", "sat-narrow", 1333 "dayperiod-short", "dayperiod", "dayperiod-narrow", 1334 "hour", "hour-short", "hour-narrow", 1335 "minute", "minute-short", "minute-narrow", 1336 "second", "second-short", "second-narrow", 1337 "zone", "zone-short", "zone-narrow").freeze(); 1338 static MapComparator<String> nameFieldOrder = new MapComparator<String>().add( 1339 "prefix", "given", "given-informal", "given2", 1340 "surname", "surname-prefix", "surname-core", "surname2", "suffix").freeze(); 1341 static MapComparator<String> orderValueOrder = new MapComparator<String>().add( 1342 "givenFirst", "surnameFirst", "sorting").freeze(); 1343 static MapComparator<String> lengthValueOrder = new MapComparator<String>().add( 1344 "long", "medium", "short").freeze(); 1345 static MapComparator<String> usageValueOrder = new MapComparator<String>().add( 1346 "referring", "addressing", "monogram").freeze(); 1347 static MapComparator<String> formalityValueOrder = new MapComparator<String>().add( 1348 "formal", "informal").freeze(); 1349 static MapComparator<String> sampleNameItemOrder = new MapComparator<String>().add( 1350 "givenOnly", "givenSurnameOnly", "given12Surname", "full").freeze(); 1351 1352 /* TODO: change this to be data-file driven. Can do with new Unit preferences info; also put them in a more meaningful order (metric vs other; size) */ 1353 1354 public static final MapComparator<String> unitOrder = new MapComparator<String>().add( 1355 "acceleration-g-force", "acceleration-meter-per-square-second", 1356 "acceleration-meter-per-second-squared", // deprecated 1357 "angle-revolution", "angle-radian", "angle-degree", "angle-arc-minute", "angle-arc-second", 1358 "area-square-kilometer", "area-hectare", "area-square-meter", "area-square-centimeter", 1359 "area-square-mile", "area-acre", "area-square-yard", "area-square-foot", "area-square-inch", 1360 "area-dunam", 1361 "concentr-karat", 1362 "proportion-karat", // deprecated 1363 "concentr-milligram-ofglucose-per-deciliter", 1364 "concentr-milligram-per-deciliter", 1365 "concentr-millimole-per-liter", 1366 "concentr-item", 1367 "concentr-portion", 1368 "concentr-permillion", 1369 "concentr-part-per-million", // deprecated 1370 "concentr-percent", "concentr-permille", "concentr-permyriad", 1371 "concentr-mole", 1372 "concentr-ofglucose", 1373 "consumption-liter-per-kilometer", "consumption-liter-per-100-kilometer", 1374 "consumption-liter-per-100kilometers", // deprecated 1375 "consumption-mile-per-gallon", "consumption-mile-per-gallon-imperial", 1376 "digital-petabyte", "digital-terabyte", "digital-terabit", "digital-gigabyte", "digital-gigabit", 1377 "digital-megabyte", "digital-megabit", "digital-kilobyte", "digital-kilobit", 1378 "digital-byte", "digital-bit", 1379 "duration-century", "duration-decade", 1380 "duration-year", "duration-year-person", 1381 "duration-quarter", 1382 "duration-month", "duration-month-person", 1383 "duration-week", "duration-week-person", 1384 "duration-day", "duration-day-person", 1385 "duration-hour", "duration-minute", "duration-second", 1386 "duration-millisecond", "duration-microsecond", "duration-nanosecond", 1387 "electric-ampere", "electric-milliampere", "electric-ohm", "electric-volt", 1388 "energy-kilocalorie", "energy-calorie", "energy-foodcalorie", "energy-kilojoule", "energy-joule", "energy-kilowatt-hour", 1389 "energy-electronvolt", 1390 "energy-british-thermal-unit", 1391 "energy-therm-us", 1392 "force-pound-force", 1393 "force-newton", 1394 "force-kilowatt-hour-per-100-kilometer", 1395 "frequency-gigahertz", "frequency-megahertz", "frequency-kilohertz", "frequency-hertz", 1396 "graphics-em", "graphics-pixel", "graphics-megapixel", 1397 "graphics-pixel-per-centimeter", "graphics-pixel-per-inch", 1398 "graphics-dot-per-centimeter", "graphics-dot-per-inch", 1399 "graphics-dot", 1400 "length-earth-radius", 1401 "length-100-kilometer", 1402 "length-kilometer", "length-meter", "length-decimeter", "length-centimeter", 1403 "length-millimeter", "length-micrometer", "length-nanometer", "length-picometer", 1404 "length-mile", "length-yard", "length-foot", "length-inch", 1405 "length-parsec", "length-light-year", "length-astronomical-unit", 1406 "length-furlong", "length-fathom", 1407 "length-nautical-mile", "length-mile-scandinavian", 1408 "length-point", 1409 "length-solar-radius", 1410 "light-lux", 1411 "light-candela", 1412 "light-lumen", 1413 "light-solar-luminosity", 1414 "mass-tonne", "mass-metric-ton", "mass-kilogram", "mass-gram", "mass-milligram", "mass-microgram", 1415 "mass-ton", "mass-stone", "mass-pound", "mass-ounce", 1416 "mass-ounce-troy", "mass-carat", 1417 "mass-dalton", 1418 "mass-earth-mass", 1419 "mass-solar-mass", 1420 1421 "mass-grain", 1422 1423 "power-gigawatt", "power-megawatt", "power-kilowatt", "power-watt", "power-milliwatt", 1424 "power-horsepower", 1425 "pressure-millimeter-ofhg", 1426 "pressure-millimeter-of-mercury", // deprecated 1427 "pressure-ofhg", 1428 "pressure-pound-force-per-square-inch", 1429 "pressure-pound-per-square-inch", // deprecated 1430 "pressure-inch-ofhg", 1431 "pressure-inch-hg", // deprecated 1432 "pressure-bar", "pressure-millibar", "pressure-atmosphere", 1433 "pressure-pascal", 1434 "pressure-hectopascal", 1435 "pressure-kilopascal", 1436 "pressure-megapascal", 1437 "speed-kilometer-per-hour", "speed-meter-per-second", "speed-mile-per-hour", "speed-knot", 1438 "temperature-generic", "temperature-celsius", "temperature-fahrenheit", "temperature-kelvin", 1439 "torque-pound-force-foot", 1440 "torque-pound-foot", // deprecated 1441 "torque-newton-meter", 1442 "volume-cubic-kilometer", "volume-cubic-meter", "volume-cubic-centimeter", 1443 "volume-cubic-mile", "volume-cubic-yard", "volume-cubic-foot", "volume-cubic-inch", 1444 "volume-megaliter", "volume-hectoliter", "volume-liter", "volume-deciliter", "volume-centiliter", "volume-milliliter", 1445 "volume-pint-metric", "volume-cup-metric", 1446 "volume-acre-foot", 1447 "volume-bushel", "volume-gallon", "volume-gallon-imperial", "volume-quart", "volume-pint", "volume-cup", 1448 "volume-fluid-ounce", "volume-fluid-ounce-imperial", "volume-tablespoon", "volume-teaspoon", 1449 "volume-barrel", 1450 1451 "volume-dessert-spoon", 1452 "volume-dessert-spoon-imperial", 1453 "volume-drop", 1454 "volume-dram", 1455 "volume-jigger", 1456 "volume-pinch", 1457 "volume-quart-imperial" 1458 // "volume-pint-imperial" 1459 ).freeze(); 1460 1461 static MapComparator<String> countValueOrder = new MapComparator<String>().add( 1462 "0", "1", "zero", "one", "two", "few", "many", "other").freeze(); 1463 static MapComparator<String> unitLengthOrder = new MapComparator<String>().add( 1464 "long", "short", "narrow").freeze(); 1465 static MapComparator<String> currencyFormatOrder = new MapComparator<String>().add( 1466 "standard", "accounting").freeze(); 1467 static Comparator<String> zoneOrder = StandardCodes.make().getTZIDComparator(); 1468 1469 static final Comparator<String> COMP = (Comparator) CLDRConfig.getInstance().getCollator(); 1470 1471 // Hack for US 1472 static final Comparator<String> UNICODE_SET_COMPARATOR = new Comparator<>() { 1473 @Override 1474 public int compare(String o1, String o2) { 1475 if (o1.contains("{")) { 1476 o1 = o1.replace("{", ""); 1477 } 1478 if (o2.contains("{")) { 1479 o2 = o2.replace("{", ""); 1480 } 1481 return COMP.compare(o1, o2); 1482 } 1483 1484 }; 1485 getAttributeValueComparator(String element, String attribute)1486 public static Comparator<String> getAttributeValueComparator(String element, String attribute) { 1487 return getAttributeValueComparator(DtdType.ldml, element, attribute); 1488 } 1489 getAttributeValueComparator(DtdType type, String element, String attribute)1490 static Comparator<String> getAttributeValueComparator(DtdType type, String element, String attribute) { 1491 // The default is a map comparator, which compares numbers as numbers, and strings with UCA 1492 Comparator<String> comp = valueOrdering; 1493 if (type != DtdType.ldml && type != DtdType.ldmlICU) { 1494 return comp; 1495 } 1496 if (attribute.equals("day")) { // && (element.startsWith("weekend") 1497 comp = dayValueOrder; 1498 } else if (attribute.equals("type")) { 1499 if (element.endsWith("FormatLength")) { 1500 comp = lengthOrder; 1501 } else if (element.endsWith("Width")) { 1502 comp = widthOrder; 1503 } else if (element.equals("day")) { 1504 comp = dayValueOrder; 1505 } else if (element.equals("field")) { 1506 comp = dateFieldOrder; 1507 } else if (element.equals("zone")) { 1508 comp = zoneOrder; 1509 } else if (element.equals("listPatternPart")) { 1510 comp = listPatternOrder; 1511 } else if (element.equals("currencyFormat")) { 1512 comp = currencyFormatOrder; 1513 } else if (element.equals("unitLength")) { 1514 comp = unitLengthOrder; 1515 } else if (element.equals("unit")) { 1516 comp = unitOrder; 1517 } else if (element.equals("dayPeriod")) { 1518 comp = dayPeriodOrder; 1519 } else if (element.equals("dateTimeFormat")) { 1520 comp = dateTimeFormatOrder; 1521 } else if (element.equals("nameField")) { 1522 comp = nameFieldOrder; 1523 } 1524 } else if (attribute.equals("order") && element.equals("personName")) { 1525 comp = orderValueOrder; 1526 } else if (attribute.equals("length") && element.equals("personName")) { 1527 comp = lengthValueOrder; 1528 } else if (attribute.equals("usage") && element.equals("personName")) { 1529 comp = usageValueOrder; 1530 } else if (attribute.equals("formality")) { 1531 comp = formalityValueOrder; 1532 } else if (attribute.equals("item") && element.equals("sampleName")) { 1533 comp = sampleNameItemOrder; 1534 } else if (attribute.equals("count") && !element.equals("minDays")) { 1535 comp = countValueOrder; 1536 } else if (attribute.equals("cp") && element.equals("annotation")) { 1537 comp = UNICODE_SET_COMPARATOR; 1538 } 1539 return comp; 1540 } 1541 1542 /** 1543 * Comparator for attributes in CLDR files 1544 */ 1545 private static AttributeValueComparator ldmlAvc = new AttributeValueComparator() { 1546 @Override 1547 public int compare(String element, String attribute, String value1, String value2) { 1548 Comparator<String> comp = getAttributeValueComparator(element, attribute); 1549 return comp.compare(value1, value2); 1550 } 1551 }; 1552 hasValue(String elementName)1553 public boolean hasValue(String elementName) { 1554 return nameToElement.get(elementName).type == ElementType.PCDATA; 1555 } 1556 isMetadata(XPathParts pathPlain)1557 public boolean isMetadata(XPathParts pathPlain) { 1558 for (String s : pathPlain.getElements()) { 1559 Element e = getElementFromName().get(s); 1560 if (e.elementStatus == ElementStatus.metadata) { 1561 return true; 1562 } 1563 } 1564 return false; 1565 } 1566 isMetadataOld(DtdType dtdType2, XPathParts pathPlain)1567 public static boolean isMetadataOld(DtdType dtdType2, XPathParts pathPlain) { 1568 // TODO Don't use hard-coded list; instead add to DTD annotations 1569 final String element1 = pathPlain.getElement(1); 1570 final String element2 = pathPlain.getElement(2); 1571 final String elementN = pathPlain.getElement(-1); 1572 switch (dtdType2) { 1573 case ldml: 1574 switch (element1) { 1575 case "generation": 1576 case "metadata": 1577 return true; 1578 } 1579 break; 1580 case ldmlBCP47: 1581 switch (element1) { 1582 case "generation": 1583 case "version": 1584 return true; 1585 } 1586 break; 1587 ////supplementalData/transforms/transform[@source="am"][@target="am_FONIPA"][@direction="forward"]/comment 1588 case supplementalData: 1589 // these are NOT under /metadata/ but are actually metadata 1590 switch (element1) { 1591 case "generation": 1592 case "version": 1593 case "validity": 1594 case "references": 1595 case "coverageLevels": 1596 return true; 1597 case "transforms": 1598 return elementN.equals("comment"); 1599 case "metadata": 1600 // these ARE under /metadata/, but many others under /metadata/ are NOT actually metadata. 1601 switch (element2) { 1602 case "validity": 1603 case "serialElements": 1604 case "suppress": 1605 case "distinguishing": 1606 case "blocking": 1607 case "casingData": 1608 return true; 1609 } 1610 break; 1611 } 1612 break; 1613 default: 1614 } 1615 return false; 1616 } 1617 isDeprecated(XPathParts pathPlain)1618 public boolean isDeprecated(XPathParts pathPlain) { 1619 for (int i = 0; i < pathPlain.size(); ++i) { 1620 String elementName = pathPlain.getElement(i); 1621 if (isDeprecated(elementName, "*", null)) { 1622 return true; 1623 } 1624 for (String attribute : pathPlain.getAttributeKeys(i)) { 1625 String attributeValue = pathPlain.getAttributeValue(i, attribute); 1626 if (isDeprecated(elementName, attribute, attributeValue)) { 1627 return true; 1628 } 1629 } 1630 } 1631 return false; 1632 } 1633 1634 public final static Splitter SPACE_SPLITTER = Splitter.on(CharMatcher.whitespace()).trimResults().omitEmptyStrings(); 1635 public final static Splitter BAR_SPLITTER = Splitter.on('|').trimResults().omitEmptyStrings(); 1636 public final static Splitter CR_SPLITTER = Splitter.on(CharMatcher.anyOf("\n\r")).trimResults().omitEmptyStrings(); 1637 1638 private static class XPathPartsSet { 1639 private final Set<XPathParts> list = new LinkedHashSet<>(); 1640 addElement(String element)1641 private void addElement(String element) { 1642 if (list.isEmpty()) { 1643 list.add(new XPathParts().addElement(element)); 1644 } else { 1645 for (XPathParts item : list) { 1646 item.addElement(element); 1647 } 1648 } 1649 } 1650 addAttribute(String attribute, String attributeValue)1651 private void addAttribute(String attribute, String attributeValue) { 1652 for (XPathParts item : list) { 1653 item.addAttribute(attribute, attributeValue); 1654 } 1655 } 1656 setElement(int i, String string)1657 private void setElement(int i, String string) { 1658 for (XPathParts item : list) { 1659 item.setElement(i, string); 1660 } 1661 } 1662 addAttributes(String attribute, List<String> attributeValues)1663 private void addAttributes(String attribute, List<String> attributeValues) { 1664 if (attributeValues.size() == 1) { 1665 addAttribute(attribute, attributeValues.iterator().next()); 1666 } else { 1667 // duplicate all the items in the list with the given values 1668 Set<XPathParts> newList = new LinkedHashSet<>(); 1669 for (XPathParts item : list) { 1670 for (String attributeValue : attributeValues) { 1671 XPathParts newItem = item.cloneAsThawed(); 1672 newItem.addAttribute(attribute, attributeValue); 1673 newList.add(newItem); 1674 } 1675 } 1676 list.clear(); 1677 list.addAll(newList); 1678 } 1679 } 1680 toStrings()1681 private ImmutableSet<String> toStrings() { 1682 Builder<String> result = new ImmutableSet.Builder<>(); 1683 1684 for (XPathParts item : list) { 1685 result.add(item.toString()); 1686 } 1687 return result.build(); 1688 } 1689 1690 @Override toString()1691 public String toString() { 1692 return list.toString(); 1693 } 1694 } 1695 getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras)1696 public Set<String> getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras) { 1697 extras.clear(); 1698 Map<String, String> valueAttributes = new HashMap<>(); 1699 XPathPartsSet pathResult = new XPathPartsSet(); 1700 String element = null; 1701 for (int i = 0; i < pathPlain.size(); ++i) { 1702 element = pathPlain.getElement(i); 1703 pathResult.addElement(element); 1704 valueAttributes.clear(); 1705 for (String attribute : pathPlain.getAttributeKeys(i)) { 1706 AttributeStatus status = getAttributeStatus(element, attribute); 1707 final String attributeValue = pathPlain.getAttributeValue(i, attribute); 1708 switch (status) { 1709 case distinguished: 1710 AttributeType attrType = getAttributeType(element, attribute); 1711 if (attrType == AttributeType.NMTOKENS) { 1712 pathResult.addAttributes(attribute, SPACE_SPLITTER.splitToList(attributeValue)); 1713 } else { 1714 pathResult.addAttribute(attribute, attributeValue); 1715 } 1716 break; 1717 case value: 1718 valueAttributes.put(attribute, attributeValue); 1719 break; 1720 case metadata: 1721 break; 1722 } 1723 } 1724 if (!valueAttributes.isEmpty()) { 1725 boolean hasValue = hasValue(element); 1726 // if it doesn't have a value, we construct new child elements, with _ prefix 1727 // if it does have a value, we have to play a further trick, since 1728 // we can't have a value and child elements at the same level. 1729 // So we use a _ suffix on the element. 1730 if (hasValue) { 1731 pathResult.setElement(i, element + "_"); 1732 } else { 1733 int debug = 0; 1734 } 1735 for (Entry<String, String> attributeAndValue : valueAttributes.entrySet()) { 1736 final String attribute = attributeAndValue.getKey(); 1737 final String attributeValue = attributeAndValue.getValue(); 1738 1739 Set<String> pathsShort = pathResult.toStrings(); 1740 AttributeType attrType = getAttributeType(element, attribute); 1741 for (String pathShort : pathsShort) { 1742 pathShort += "/_" + attribute; 1743 if (attrType == AttributeType.NMTOKENS) { 1744 for (String valuePart : SPACE_SPLITTER.split(attributeValue)) { 1745 extras.put(pathShort, valuePart); 1746 } 1747 } else { 1748 extras.put(pathShort, attributeValue); 1749 } 1750 } 1751 } 1752 if (hasValue) { 1753 pathResult.setElement(i, element); // restore 1754 } 1755 } 1756 } 1757 // Only add the path if it could have a value, looking at the last element 1758 if (!hasValue(element)) { 1759 return null; 1760 } 1761 return pathResult.toStrings(); 1762 } 1763 getAttributeType(String elementName, String attributeName)1764 public AttributeType getAttributeType(String elementName, String attributeName) { 1765 Attribute attr = getAttribute(elementName, attributeName); 1766 return (attr != null) ? attr.type : null; 1767 } 1768 getAttribute(String elementName, String attributeName)1769 public Attribute getAttribute(String elementName, String attributeName) { 1770 Element element = nameToElement.get(elementName); 1771 return (element != null) ? element.getAttributeNamed(attributeName) : null; 1772 } 1773 1774 // TODO: add support for following to DTD annotations, and rework API 1775 1776 static final Set<String> SPACED_VALUES = ImmutableSet.of( 1777 "idValidity", 1778 "languageGroup"); 1779 getValueSplitter(XPathParts pathPlain)1780 public static Splitter getValueSplitter(XPathParts pathPlain) { 1781 if (!Collections.disjoint(pathPlain.getElements(), SPACED_VALUES)) { 1782 return SPACE_SPLITTER; 1783 } else if (pathPlain.getElement(-1).equals("annotation") 1784 && !pathPlain.getAttributeKeys(-1).contains("tts")) { 1785 return BAR_SPLITTER; 1786 } 1787 return CR_SPLITTER; 1788 } 1789 isComment(XPathParts pathPlain, String line)1790 public static boolean isComment(XPathParts pathPlain, String line) { 1791 if (pathPlain.contains("transform")) { 1792 if (line.startsWith("#")) { 1793 return true; 1794 } 1795 } 1796 return false; 1797 } 1798 isExtraSplit(String extraPath)1799 public static boolean isExtraSplit(String extraPath) { 1800 if (extraPath.endsWith("/_type") && extraPath.startsWith("//supplementalData/metaZones/mapTimezones")) { 1801 return true; 1802 } 1803 return false; 1804 } 1805 1806 /** 1807 * Return the value status for an EAV 1808 */ getValueStatus(String elementName, String attributeName, String value)1809 public ValueStatus getValueStatus(String elementName, String attributeName, String value) { 1810 Element element = nameToElement.get(elementName); 1811 if (element == null) { 1812 return ValueStatus.invalid; 1813 } 1814 Attribute attr = element.getAttributeNamed(attributeName); 1815 if (attr == null) { 1816 return ValueStatus.invalid; 1817 } 1818 return attr.getValueStatus(value); 1819 } 1820 1821 /** 1822 * Return element-attribute pairs with non-enumerated values, for quick checks. 1823 */ getNonEnumerated(Map<String,String> matchValues)1824 public Multimap<String, String> getNonEnumerated(Map<String,String> matchValues) { 1825 Multimap<String,String> nonEnumeratedElementToAttribute = TreeMultimap.create(); // make tree for ease of debugging 1826 for (Entry<String, Element> entry : nameToElement.entrySet()) { 1827 Element element = entry.getValue(); 1828 for (Attribute attribute : element.attributes.keySet()) { 1829 if (attribute.type != AttributeType.ENUMERATED_TYPE) { 1830 String elementName = element.getName(); 1831 String attrName = attribute.getName(); 1832 nonEnumeratedElementToAttribute.put(elementName, attrName); 1833 if (attribute.matchValue != null) { 1834 matchValues.put(elementName + "\t" + attrName, attribute.matchValue.getName()); 1835 } 1836 } 1837 } 1838 } 1839 return ImmutableSetMultimap.copyOf(nonEnumeratedElementToAttribute); 1840 } 1841 } 1842