1 package org.unicode.cldr.util; 2 3 import java.io.File; 4 import java.io.StringReader; 5 import java.util.ArrayList; 6 import java.util.Arrays; 7 import java.util.Collection; 8 import java.util.Collections; 9 import java.util.Comparator; 10 import java.util.EnumMap; 11 import java.util.HashMap; 12 import java.util.HashSet; 13 import java.util.Iterator; 14 import java.util.LinkedHashMap; 15 import java.util.LinkedHashSet; 16 import java.util.List; 17 import java.util.Locale; 18 import java.util.Map; 19 import java.util.Map.Entry; 20 import java.util.Set; 21 import java.util.TreeMap; 22 import java.util.regex.Pattern; 23 24 import com.google.common.base.CharMatcher; 25 import com.google.common.base.Splitter; 26 import com.google.common.collect.ImmutableSet; 27 import com.google.common.collect.ImmutableSet.Builder; 28 import com.google.common.collect.Multimap; 29 import com.ibm.icu.dev.util.CollectionUtilities; 30 import com.ibm.icu.impl.Relation; 31 import com.ibm.icu.text.Transform; 32 33 /** 34 * An immutable object that contains the structure of a DTD. 35 * @author markdavis 36 */ 37 public class DtdData extends XMLFileReader.SimpleHandler { 38 private static final String COMMENT_PREFIX = System.lineSeparator() + " "; 39 private static final boolean SHOW_ALL = CldrUtility.getProperty("show_all", false); 40 private static final boolean USE_SYNTHESIZED = false; 41 42 private static final boolean DEBUG = false; 43 private static final Pattern FILLER = PatternCache.get("[^-a-zA-Z0-9#_:]"); 44 45 private final Relation<String, Attribute> nameToAttributes = Relation.of(new TreeMap<String, Set<Attribute>>(), LinkedHashSet.class); 46 private Map<String, Element> nameToElement = new HashMap<String, Element>(); 47 private MapComparator<String> elementComparator; 48 private MapComparator<String> attributeComparator; 49 50 public final Element ROOT; 51 public final Element PCDATA = elementFrom("#PCDATA"); 52 public final Element ANY = elementFrom("ANY"); 53 public final DtdType dtdType; 54 public final String version; 55 private Element lastElement; 56 private Attribute lastAttribute; 57 private Set<String> preCommentCache; 58 private DtdComparator dtdComparator; 59 60 public enum AttributeStatus { 61 distinguished, value, metadata 62 } 63 64 public enum Mode { 65 REQUIRED("#REQUIRED"), OPTIONAL("#IMPLIED"), FIXED("#FIXED"), NULL("null"); 66 67 public final String source; 68 Mode(String s)69 Mode(String s) { 70 source = s; 71 } 72 forString(String mode)73 public static Mode forString(String mode) { 74 for (Mode value : Mode.values()) { 75 if (value.source.equals(mode)) { 76 return value; 77 } 78 } 79 if (mode == null) { 80 return NULL; 81 } 82 throw new IllegalArgumentException(mode); 83 } 84 } 85 86 public enum AttributeType { 87 CDATA, ID, IDREF, IDREFS, ENTITY, ENTITIES, NMTOKEN, NMTOKENS, ENUMERATED_TYPE 88 } 89 90 public static class Attribute implements Named { 91 public final String name; 92 public final Element element; 93 public final Mode mode; 94 public final String defaultValue; 95 public final AttributeType type; 96 public final Map<String, Integer> values; 97 private final Set<String> commentsPre; 98 private Set<String> commentsPost; 99 private boolean isDeprecatedAttribute; 100 private AttributeStatus attributeStatus = AttributeStatus.distinguished; // default unless reset by annotations 101 private Set<String> deprecatedValues = Collections.emptySet(); 102 private final Comparator<String> attributeValueComparator; 103 Attribute(DtdType dtdType, Element element2, String aName, Mode mode2, String[] split, String value2, Set<String> firstComment)104 private Attribute(DtdType dtdType, Element element2, String aName, Mode mode2, String[] split, String value2, Set<String> firstComment) { 105 commentsPre = firstComment; 106 element = element2; 107 name = aName.intern(); 108 if (name.equals("draft") // normally never permitted on elements with children, but special cases... 109 && !element.getName().equals("collation") 110 && !element.getName().equals("transform")) { 111 int elementChildrenCount = element.getChildren().size(); 112 if (elementChildrenCount > 1 113 || elementChildrenCount == 1 && !element.getChildren().keySet().iterator().next().getName().equals("cp")) { 114 isDeprecatedAttribute = true; 115 if (DEBUG) { 116 System.out.println(element.getName() + ":" + element.getChildren()); 117 } 118 } 119 } 120 mode = mode2; 121 defaultValue = value2 == null ? null 122 : value2.intern(); 123 AttributeType _type = AttributeType.ENUMERATED_TYPE; 124 Map<String, Integer> _values = Collections.emptyMap(); 125 if (split.length == 1) { 126 try { 127 _type = AttributeType.valueOf(split[0]); 128 } catch (Exception e) { 129 } 130 } 131 type = _type; 132 133 if (_type == AttributeType.ENUMERATED_TYPE) { 134 LinkedHashMap<String, Integer> temp = new LinkedHashMap<String, Integer>(); 135 for (String part : split) { 136 if (part.length() != 0) { 137 temp.put(part.intern(), temp.size()); 138 } 139 } 140 _values = Collections.unmodifiableMap(temp); 141 } 142 values = _values; 143 attributeValueComparator = getAttributeValueComparator(dtdType, element.name, name); 144 } 145 146 @Override toString()147 public String toString() { 148 return element.name + ":" + name; 149 } 150 appendDtdString(StringBuilder b)151 public StringBuilder appendDtdString(StringBuilder b) { 152 Attribute a = this; 153 b.append("<!ATTLIST " + element.name + " " + a.name); 154 boolean first; 155 if (a.type == AttributeType.ENUMERATED_TYPE) { 156 b.append(" ("); 157 first = true; 158 for (String s : a.values.keySet()) { 159 if (deprecatedValues.contains(s)) { 160 continue; 161 } 162 if (first) { 163 first = false; 164 } else { 165 b.append(" | "); 166 } 167 b.append(s); 168 } 169 b.append(")"); 170 } else { 171 b.append(' ').append(a.type); 172 } 173 if (a.mode != Mode.NULL) { 174 b.append(" ").append(a.mode.source); 175 } 176 if (a.defaultValue != null) { 177 b.append(" \"").append(a.defaultValue).append('"'); 178 } 179 b.append(" >"); 180 return b; 181 } 182 features()183 public String features() { 184 return (type == AttributeType.ENUMERATED_TYPE ? values.keySet().toString() : type.toString()) 185 + (mode == Mode.NULL ? "" : ", mode=" + mode) 186 + (defaultValue == null ? "" : ", default=" + defaultValue); 187 } 188 189 @Override getName()190 public String getName() { 191 return name; 192 } 193 194 private static Splitter COMMA = Splitter.on(',').trimResults(); 195 addComment(String commentIn)196 public void addComment(String commentIn) { 197 if (commentIn.startsWith("@")) { 198 // there are exactly 2 cases: deprecated and ordered 199 switch (commentIn) { 200 case "@METADATA": 201 attributeStatus = AttributeStatus.metadata; 202 break; 203 case "@VALUE": 204 attributeStatus = AttributeStatus.value; 205 break; 206 case "@DEPRECATED": 207 isDeprecatedAttribute = true; 208 break; 209 default: 210 if (commentIn.startsWith("@DEPRECATED:")) { 211 deprecatedValues = Collections.unmodifiableSet(new HashSet<>(COMMA.splitToList(commentIn.substring("@DEPRECATED:".length())))); 212 break; 213 } 214 throw new IllegalArgumentException("Unrecognized annotation: " + commentIn); 215 } 216 return; 217 } 218 commentsPost = addUnmodifiable(commentsPost, commentIn.trim()); 219 } 220 221 /** 222 * Special version of identity; only considers name and name of element 223 */ 224 @Override equals(Object obj)225 public boolean equals(Object obj) { 226 if (!(obj instanceof Attribute)) { 227 return false; 228 } 229 Attribute that = (Attribute) obj; 230 return name.equals(that.name) 231 && element.name.equals(that.element.name) // don't use plain element: circularity 232 // not relevant to identity 233 // && Objects.equals(comment, that.comment) 234 // && mode.equals(that.mode) 235 // && Objects.equals(defaultValue, that.defaultValue) 236 // && type.equals(that.type) 237 // && values.equals(that.values) 238 ; 239 } 240 241 /** 242 * Special version of identity; only considers name and name of element 243 */ 244 @Override hashCode()245 public int hashCode() { 246 return name.hashCode() * 37 247 + element.name.hashCode() // don't use plain element: circularity 248 // not relevant to identity 249 // ) * 37 + Objects.hashCode(comment)) * 37 250 // + mode.hashCode()) * 37 251 // + Objects.hashCode(defaultValue)) * 37 252 // + type.hashCode()) * 37 253 // + values.hashCode() 254 ; 255 } 256 isDeprecated()257 public boolean isDeprecated() { 258 return isDeprecatedAttribute; 259 } 260 isDeprecatedValue(String value)261 public boolean isDeprecatedValue(String value) { 262 return deprecatedValues.contains(value); 263 } 264 getStatus()265 public AttributeStatus getStatus() { 266 return attributeStatus; 267 } 268 269 } 270 DtdData(DtdType type, String version)271 private DtdData(DtdType type, String version) { 272 this.dtdType = type; 273 this.ROOT = elementFrom(type.rootType.toString()); 274 this.version = version; 275 } 276 addAttribute(String eName, String aName, String type, String mode, String value)277 private void addAttribute(String eName, String aName, String type, String mode, String value) { 278 Attribute a = new Attribute(dtdType, nameToElement.get(eName), aName, Mode.forString(mode), FILLER.split(type), value, preCommentCache); 279 preCommentCache = null; 280 getAttributesFromName().put(aName, a); 281 CldrUtility.putNew(a.element.attributes, a, a.element.attributes.size()); 282 lastElement = null; 283 lastAttribute = a; 284 } 285 286 public enum ElementType { 287 EMPTY, ANY, PCDATA("(#PCDATA)"), CHILDREN; 288 public final String source; 289 ElementType(String s)290 private ElementType(String s) { 291 source = s; 292 } 293 ElementType()294 private ElementType() { 295 source = name(); 296 } 297 } 298 299 interface Named { getName()300 String getName(); 301 } 302 303 public enum ElementStatus { 304 regular, metadata 305 } 306 307 public static class Element implements Named { 308 public final String name; 309 private String rawModel; 310 private ElementType type; 311 private final Map<Element, Integer> children = new LinkedHashMap<Element, Integer>(); 312 private final Map<Attribute, Integer> attributes = new LinkedHashMap<Attribute, Integer>(); 313 private Set<String> commentsPre; 314 private Set<String> commentsPost; 315 private String model; 316 private boolean isOrderedElement; 317 private boolean isDeprecatedElement; 318 private ElementStatus elementStatus = ElementStatus.regular; 319 Element(String name2)320 private Element(String name2) { 321 name = name2.intern(); 322 } 323 setChildren(DtdData dtdData, String model, Set<String> precomments)324 private void setChildren(DtdData dtdData, String model, Set<String> precomments) { 325 this.commentsPre = precomments; 326 rawModel = model; 327 this.model = clean(model); 328 if (model.equals("EMPTY")) { 329 type = ElementType.EMPTY; 330 return; 331 } 332 type = ElementType.CHILDREN; 333 for (String part : FILLER.split(model)) { 334 if (part.length() != 0) { 335 if (part.equals("#PCDATA")) { 336 type = ElementType.PCDATA; 337 } else if (part.equals("ANY")) { 338 type = ElementType.ANY; 339 } else { 340 CldrUtility.putNew(children, dtdData.elementFrom(part), children.size()); 341 } 342 } 343 } 344 if ((type == ElementType.CHILDREN) == (children.size() == 0) 345 && !model.startsWith("(#PCDATA|cp")) { 346 throw new IllegalArgumentException("CLDR does not permit Mixed content. " + name + ":" + model); 347 } 348 } 349 350 static final Pattern CLEANER1 = PatternCache.get("([,|(])(?=\\S)"); 351 static final Pattern CLEANER2 = PatternCache.get("(?=\\S)([|)])"); 352 clean(String model2)353 private String clean(String model2) { 354 // (x) -> ( x ); 355 // x,y -> x, y 356 // x|y -> x | y 357 String result = CLEANER1.matcher(model2).replaceAll("$1 "); 358 result = CLEANER2.matcher(result).replaceAll(" $1"); 359 return result.equals(model2) 360 ? model2 361 : result; // for debugging 362 } 363 containsAttribute(String string)364 public boolean containsAttribute(String string) { 365 for (Attribute a : attributes.keySet()) { 366 if (a.name.equals(string)) { 367 return true; 368 } 369 } 370 return false; 371 } 372 373 @Override toString()374 public String toString() { 375 return name; 376 } 377 toDtdString()378 public String toDtdString() { 379 return "<!ELEMENT " + name + " " + getRawModel() + " >"; 380 } 381 getType()382 public ElementType getType() { 383 return type; 384 } 385 getChildren()386 public Map<Element, Integer> getChildren() { 387 return Collections.unmodifiableMap(children); 388 } 389 getAttributes()390 public Map<Attribute, Integer> getAttributes() { 391 return Collections.unmodifiableMap(attributes); 392 } 393 394 @Override getName()395 public String getName() { 396 return name; 397 } 398 getChildNamed(String string)399 public Element getChildNamed(String string) { 400 for (Element e : children.keySet()) { 401 if (e.name.equals(string)) { 402 return e; 403 } 404 } 405 return null; 406 } 407 getAttributeNamed(String string)408 public Attribute getAttributeNamed(String string) { 409 for (Attribute a : attributes.keySet()) { 410 if (a.name.equals(string)) { 411 return a; 412 } 413 } 414 return null; 415 } 416 addComment(String addition)417 public void addComment(String addition) { 418 if (addition.startsWith("@")) { 419 // there are exactly 3 cases: deprecated, ordered, and metadata 420 switch (addition) { 421 case "@ORDERED": 422 isOrderedElement = true; 423 break; 424 case "@DEPRECATED": 425 isDeprecatedElement = true; 426 break; 427 case "@METADATA": 428 elementStatus = ElementStatus.metadata; 429 break; 430 default: 431 throw new IllegalArgumentException("Unrecognized annotation: " + addition); 432 } 433 return; 434 } 435 commentsPost = addUnmodifiable(commentsPost, addition.trim()); 436 } 437 438 /** 439 * Special version of equals. Only the name is considered in the identity. 440 */ 441 @Override equals(Object obj)442 public boolean equals(Object obj) { 443 if (!(obj instanceof Element)) { 444 return false; 445 } 446 Element that = (Element) obj; 447 return name.equals(that.name) 448 // not relevant to the identity of the object 449 // && Objects.equals(comment, that.comment) 450 // && type == that.type 451 // && attributes.equals(that.attributes) 452 // && children.equals(that.children) 453 ; 454 } 455 456 /** 457 * Special version of hashcode. Only the name is considered in the identity. 458 */ 459 @Override hashCode()460 public int hashCode() { 461 return name.hashCode() 462 // not relevant to the identity of the object 463 // * 37 + Objects.hashCode(comment) 464 //) * 37 + Objects.hashCode(type) 465 // ) * 37 + attributes.hashCode() 466 // ) * 37 + children.hashCode() 467 ; 468 } 469 isDeprecated()470 public boolean isDeprecated() { 471 return isDeprecatedElement; 472 } 473 getElementStatus()474 public ElementStatus getElementStatus() { 475 return elementStatus; 476 } 477 478 /** 479 * @return the rawModel 480 */ getRawModel()481 public String getRawModel() { 482 return rawModel; 483 } 484 } 485 elementFrom(String name)486 private Element elementFrom(String name) { 487 Element result = nameToElement.get(name); 488 if (result == null) { 489 nameToElement.put(name, result = new Element(name)); 490 } 491 return result; 492 } 493 addElement(String name2, String model)494 private void addElement(String name2, String model) { 495 Element element = elementFrom(name2); 496 element.setChildren(this, model, preCommentCache); 497 preCommentCache = null; 498 lastElement = element; 499 lastAttribute = null; 500 } 501 addComment(String comment)502 private void addComment(String comment) { 503 comment = comment.trim(); 504 if (preCommentCache != null || comment.startsWith("#")) { // the precomments are "sticky" 505 if (comment.startsWith("@")) { 506 throw new IllegalArgumentException("@ annotation comment must follow element or attribute, without intervening # comment"); 507 } 508 preCommentCache = addUnmodifiable(preCommentCache, comment); 509 } else if (lastElement != null) { 510 lastElement.addComment(comment); 511 } else if (lastAttribute != null) { 512 lastAttribute.addComment(comment); 513 } else { 514 if (comment.startsWith("@")) { 515 throw new IllegalArgumentException("@ annotation comment must follow element or attribute, without intervening # comment"); 516 } 517 preCommentCache = addUnmodifiable(preCommentCache, comment); 518 } 519 } 520 521 // TODO hide this 522 /** 523 * @deprecated 524 */ 525 @Override handleElementDecl(String name, String model)526 public void handleElementDecl(String name, String model) { 527 if (SHOW_ALL) { 528 // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, listPatterns?, collations?, posix?, segmentations?, rbnf?, annotations?, metadata?, references?, special*))) > 529 System.out.println(System.lineSeparator() + "<!ELEMENT " + name + " " + model + " >"); 530 } 531 addElement(name, model); 532 } 533 534 // TODO hide this 535 /** 536 * @deprecated 537 */ 538 @Override handleStartDtd(String name, String publicId, String systemId)539 public void handleStartDtd(String name, String publicId, String systemId) { 540 DtdType explicitDtdType = DtdType.valueOf(name); 541 if (explicitDtdType != dtdType && explicitDtdType != dtdType.rootType) { 542 throw new IllegalArgumentException("Mismatch in dtdTypes"); 543 } 544 }; 545 546 /** 547 * @deprecated 548 */ 549 @Override handleAttributeDecl(String eName, String aName, String type, String mode, String value)550 public void handleAttributeDecl(String eName, String aName, String type, String mode, String value) { 551 if (SHOW_ALL) { 552 // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false ) #IMPLIED > 553 // <!ATTLIST version number CDATA #REQUIRED > 554 // <!ATTLIST version cldrVersion CDATA #FIXED "27" > 555 556 System.out.println("<!ATTLIST " + eName 557 + " " + aName 558 + " " + type 559 + " " + mode 560 + (value == null ? "" : " \"" + value + "\"") 561 + " >"); 562 } 563 // HACK for 1.1.1 564 if (eName.equals("draft")) { 565 eName = "week"; 566 } 567 addAttribute(eName, aName, type, mode, value); 568 } 569 570 /** 571 * @deprecated 572 */ 573 @Override handleComment(String path, String comment)574 public void handleComment(String path, String comment) { 575 if (SHOW_ALL) { 576 // <!-- true and false are deprecated. --> 577 System.out.println("<!-- " + comment.trim() + " -->"); 578 } 579 addComment(comment); 580 } 581 582 // TODO hide this 583 /** 584 * @deprecated 585 */ 586 @Override handleEndDtd()587 public void handleEndDtd() { 588 throw new XMLFileReader.AbortException(); 589 } 590 591 // static final Map<CLDRFile.DtdType, String> DTD_TYPE_TO_FILE; 592 // static { 593 // EnumMap<CLDRFile.DtdType, String> temp = new EnumMap<CLDRFile.DtdType, String>(CLDRFile.DtdType.class); 594 // temp.put(CLDRFile.DtdType.ldml, CldrUtility.BASE_DIRECTORY + "common/dtd/ldml.dtd"); 595 // temp.put(CLDRFile.DtdType.supplementalData, CldrUtility.BASE_DIRECTORY + "common/dtd/ldmlSupplemental.dtd"); 596 // temp.put(CLDRFile.DtdType.ldmlBCP47, CldrUtility.BASE_DIRECTORY + "common/dtd/ldmlBCP47.dtd"); 597 // temp.put(CLDRFile.DtdType.keyboard, CldrUtility.BASE_DIRECTORY + "keyboards/dtd/ldmlKeyboard.dtd"); 598 // temp.put(CLDRFile.DtdType.platform, CldrUtility.BASE_DIRECTORY + "keyboards/dtd/ldmlPlatform.dtd"); 599 // DTD_TYPE_TO_FILE = Collections.unmodifiableMap(temp); 600 // } 601 602 /** 603 * Normal version of DtdData 604 * Note that it always gets the trunk version 605 */ getInstance(DtdType type)606 public static DtdData getInstance(DtdType type) { 607 return CACHE.get(type); 608 } 609 610 /** 611 * Special form using version, used only by tests, etc. 612 */ getInstance(DtdType type, String version)613 public static DtdData getInstance(DtdType type, String version) { 614 DtdData simpleHandler = new DtdData(type, version); 615 XMLFileReader xfr = new XMLFileReader().setHandler(simpleHandler); 616 File directory = version == null ? CLDRConfig.getInstance().getCldrBaseDirectory() 617 : new File(CLDRPaths.ARCHIVE_DIRECTORY + "/cldr-" + version); 618 619 if (type != type.rootType) { 620 // read the real first, then add onto it. 621 readFile(type.rootType, xfr, directory); 622 } 623 readFile(type, xfr, directory); 624 // HACK 625 if (type == DtdType.ldmlICU) { 626 Element special = simpleHandler.nameToElement.get("special"); 627 for (String extraElementName : Arrays.asList( 628 "icu:breakIteratorData", 629 "icu:UCARules", 630 "icu:scripts", 631 "icu:transforms", 632 "icu:ruleBasedNumberFormats", 633 "icu:isLeapMonth", 634 "icu:version", 635 "icu:breakDictionaryData", 636 "icu:depends")) { 637 Element extraElement = simpleHandler.nameToElement.get(extraElementName); 638 special.children.put(extraElement, special.children.size()); 639 } 640 } 641 if (simpleHandler.ROOT.children.size() == 0) { 642 throw new IllegalArgumentException(); // should never happen 643 } 644 simpleHandler.finish(); 645 simpleHandler.freeze(); 646 return simpleHandler; 647 } 648 finish()649 private void finish() { 650 dtdComparator = new DtdComparator(); 651 } 652 readFile(DtdType type, XMLFileReader xfr, File directory)653 public static void readFile(DtdType type, XMLFileReader xfr, File directory) { 654 File file = new File(directory, type.dtdPath); 655 StringReader s = new StringReader("<?xml version='1.0' encoding='UTF-8' ?>" 656 + "<!DOCTYPE " + type 657 + " SYSTEM '" + file.getAbsolutePath() + "'>"); 658 xfr.read(type.toString(), s, -1, true); // DTD_TYPE_TO_FILE.get(type) 659 } 660 freeze()661 private void freeze() { 662 if (version == null) { // only generate for new versions 663 MergeLists<String> elementMergeList = new MergeLists<String>(); 664 elementMergeList.add(dtdType.toString()); 665 MergeLists<String> attributeMergeList = new MergeLists<String>(); 666 attributeMergeList.add("_q"); 667 668 for (Element element : nameToElement.values()) { 669 if (element.children.size() > 0) { 670 Collection<String> names = getNames(element.children.keySet()); 671 elementMergeList.add(names); 672 if (DEBUG) { 673 System.out.println(element.getName() + "\t→\t" + names); 674 } 675 } 676 if (element.attributes.size() > 0) { 677 Collection<String> names = getNames(element.attributes.keySet()); 678 attributeMergeList.add(names); 679 if (DEBUG) { 680 System.out.println(element.getName() + "\t→\t@" + names); 681 } 682 } 683 } 684 List<String> elementList = elementMergeList.merge(); 685 List<String> attributeList = attributeMergeList.merge(); 686 if (DEBUG) { 687 System.out.println("Element Ordering:\t" + elementList); 688 System.out.println("Attribute Ordering:\t" + attributeList); 689 } 690 // double-check 691 // for (Element element : elements) { 692 // if (!MergeLists.hasConsistentOrder(elementList, element.children.keySet())) { 693 // throw new IllegalArgumentException("Failed to find good element order: " + element.children.keySet()); 694 // } 695 // if (!MergeLists.hasConsistentOrder(attributeList, element.attributes.keySet())) { 696 // throw new IllegalArgumentException("Failed to find good attribute order: " + element.attributes.keySet()); 697 // } 698 // } 699 elementComparator = new MapComparator<String>(elementList).setErrorOnMissing(true).freeze(); 700 attributeComparator = new MapComparator<String>(attributeList).setErrorOnMissing(true).freeze(); 701 } 702 nameToAttributes.freeze(); 703 nameToElement = Collections.unmodifiableMap(nameToElement); 704 } 705 getNames(Collection<? extends Named> keySet)706 private Collection<String> getNames(Collection<? extends Named> keySet) { 707 List<String> result = new ArrayList<String>(); 708 for (Named e : keySet) { 709 result.add(e.getName()); 710 } 711 return result; 712 } 713 714 public enum DtdItem { 715 ELEMENT, ATTRIBUTE, ATTRIBUTE_VALUE 716 } 717 718 public interface AttributeValueComparator { compare(String element, String attribute, String value1, String value2)719 public int compare(String element, String attribute, String value1, String value2); 720 } 721 getDtdComparator(AttributeValueComparator avc)722 public Comparator<String> getDtdComparator(AttributeValueComparator avc) { 723 return dtdComparator; 724 } 725 726 private class DtdComparator implements Comparator<String> { 727 @Override compare(String path1, String path2)728 public int compare(String path1, String path2) { 729 XPathParts a = XPathParts.getFrozenInstance(path1); 730 XPathParts b = XPathParts.getFrozenInstance(path2); 731 // there must always be at least one element 732 String baseA = a.getElement(0); 733 String baseB = b.getElement(0); 734 if (!ROOT.name.equals(baseA) || !ROOT.name.equals(baseB)) { 735 throw new IllegalArgumentException("Comparing different DTDs: " + ROOT.name + ", " + baseA + ", " + baseB); 736 } 737 int min = Math.min(a.size(), b.size()); 738 Element parent = ROOT; 739 Element elementA; 740 for (int i = 1; i < min; ++i, parent = elementA) { 741 // add extra test for "fake" elements, used in diffing. they always start with _ 742 String elementRawA = a.getElement(i); 743 String elementRawB = b.getElement(i); 744 if (elementRawA.startsWith("_")) { 745 return elementRawB.startsWith("_") ? elementRawA.compareTo(elementRawB) : -1; 746 } else if (elementRawB.startsWith("_")) { 747 return 1; 748 } 749 // 750 elementA = nameToElement.get(elementRawA); 751 Element elementB = nameToElement.get(elementRawB); 752 if (elementA != elementB) { 753 int aa = parent.children.get(elementA); 754 int bb = parent.children.get(elementB); 755 return aa - bb; 756 } 757 int countA = a.getAttributeCount(i); 758 int countB = b.getAttributeCount(i); 759 if (countA == 0 && countB == 0) { 760 continue; 761 } 762 // we have two ways to compare the attributes. One based on the dtd, 763 // and one based on explicit comparators 764 765 // at this point the elements are the same and correspond to elementA 766 // in the dtd 767 768 // Handle the special added elements 769 String aqValue = a.getAttributeValue(i, "_q"); 770 if (aqValue != null) { 771 String bqValue = b.getAttributeValue(i, "_q"); 772 if (!aqValue.equals(bqValue)) { 773 int aValue = Integer.parseInt(aqValue); 774 int bValue = Integer.parseInt(bqValue); 775 return aValue - bValue; 776 } 777 --countA; 778 --countB; 779 } 780 781 attributes: for (Entry<Attribute, Integer> attr : elementA.attributes.entrySet()) { 782 Attribute main = attr.getKey(); 783 String valueA = a.getAttributeValue(i, main.name); 784 String valueB = b.getAttributeValue(i, main.name); 785 if (valueA == null) { 786 if (valueB != null) { 787 return -1; 788 } 789 } else if (valueB == null) { 790 return 1; 791 } else if (valueA.equals(valueB)) { 792 --countA; 793 --countB; 794 if (countA == 0 && countB == 0) { 795 break attributes; 796 } 797 continue; // TODO 798 } else if (main.attributeValueComparator != null) { 799 return main.attributeValueComparator.compare(valueA, valueB); 800 } else if (main.values.size() != 0) { 801 int aa = main.values.get(valueA); 802 int bb = main.values.get(valueB); 803 return aa - bb; 804 } else { 805 return valueA.compareTo(valueB); 806 } 807 } 808 if (countA != 0 || countB != 0) { 809 throw new IllegalArgumentException(); 810 } 811 } 812 return a.size() - b.size(); 813 } 814 } 815 getAttributeComparator()816 public MapComparator<String> getAttributeComparator() { 817 return attributeComparator; 818 } 819 getElementComparator()820 public MapComparator<String> getElementComparator() { 821 return elementComparator; 822 } 823 getAttributesFromName()824 public Relation<String, Attribute> getAttributesFromName() { 825 return nameToAttributes; 826 } 827 getElementFromName()828 public Map<String, Element> getElementFromName() { 829 return nameToElement; 830 } 831 832 // private static class XPathIterator implements SimpleIterator<Node> { 833 // private String path; 834 // private int position; // at the start of the next element, or at the end of the string 835 // private Node node = new Node(); 836 // 837 // public void set(String path) { 838 // if (!path.startsWith("//")) { 839 // throw new IllegalArgumentException(); 840 // } 841 // this.path = path; 842 // this.position = 2; 843 // } 844 // 845 // @Override 846 // public Node next() { 847 // // starts with /...[@...="...."]... 848 // if (position >= path.length()) { 849 // return null; 850 // } 851 // node.elementName = ""; 852 // node.attributes.clear(); 853 // int start = position; 854 // // collect the element 855 // while (true) { 856 // if (position >= path.length()) { 857 // return node; 858 // } 859 // char ch = path.charAt(position++); 860 // switch (ch) { 861 // case '/': 862 // return node; 863 // case '[': 864 // node.elementName = path.substring(start, position); 865 // break; 866 // } 867 // } 868 // // done with element, we hit a [, collect the attributes 869 // 870 // if (path.charAt(position++) != '@') { 871 // throw new IllegalArgumentException(); 872 // } 873 // while (true) { 874 // if (position >= path.length()) { 875 // return node; 876 // } 877 // char ch = path.charAt(position++); 878 // switch (ch) { 879 // case '/': 880 // return node; 881 // case '[': 882 // node.elementName = path.substring(start, position); 883 // break; 884 // } 885 // } 886 // } 887 // } 888 toString()889 public String toString() { 890 StringBuilder b = new StringBuilder(); 891 // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, listPatterns?, collations?, posix?, segmentations?, rbnf?, metadata?, references?, special*))) > 892 // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false ) #IMPLIED > <!-- true and false are deprecated. --> 893 // if (firstComment != null) { 894 // b.append("\n<!--").append(firstComment).append("-->"); 895 // } 896 Seen seen = new Seen(dtdType); 897 seen.seenElements.add(ANY); 898 seen.seenElements.add(PCDATA); 899 toString(ROOT, b, seen); 900 901 // Hack for ldmlIcu: catch the items that are not mentioned in the original 902 int currentEnd = b.length(); 903 for (Element e : nameToElement.values()) { 904 toString(e, b, seen); 905 } 906 if (currentEnd != b.length()) { 907 b.insert(currentEnd, 908 System.lineSeparator() + System.lineSeparator() 909 + "<!-- Elements not reachable from root! -->" 910 + System.lineSeparator()); 911 } 912 return b.toString(); 913 } 914 915 static final class Seen { 916 Set<Element> seenElements = new HashSet<Element>(); 917 Set<Attribute> seenAttributes = new HashSet<Attribute>(); 918 Seen(DtdType dtdType)919 public Seen(DtdType dtdType) { 920 if (dtdType.rootType == dtdType) { 921 return; 922 } 923 DtdData otherData = DtdData.getInstance(dtdType.rootType); 924 walk(otherData, otherData.ROOT); 925 seenElements.remove(otherData.nameToElement.get("special")); 926 } 927 walk(DtdData otherData, Element current)928 private void walk(DtdData otherData, Element current) { 929 seenElements.add(current); 930 seenAttributes.addAll(current.attributes.keySet()); 931 for (Element e : current.children.keySet()) { 932 walk(otherData, e); 933 } 934 } 935 } 936 getDescendents(Element start, Set<Element> toAddTo)937 public Set<Element> getDescendents(Element start, Set<Element> toAddTo) { 938 if (!toAddTo.contains(start)) { 939 toAddTo.add(start); 940 for (Element e : start.children.keySet()) { 941 getDescendents(e, toAddTo); 942 } 943 } 944 return toAddTo; 945 } 946 947 //static final SupplementalDataInfo supplementalDataInfo = CLDRConfig.getInstance().getSupplementalDataInfo(); 948 toString(Element current, StringBuilder b, Seen seen)949 private void toString(Element current, StringBuilder b, Seen seen) { 950 // if ("calendar".equals(current.name) || current.commentsPost != null && current.commentsPost.contains("use of fields")) { 951 // int debug = 0; 952 // } 953 boolean first = true; 954 if (seen.seenElements.contains(current)) { 955 return; 956 } 957 seen.seenElements.add(current); 958 boolean elementDeprecated = isDeprecated(current.name, "*", "*"); 959 960 showComments(b, current.commentsPre, true); 961 b.append("\n\n<!ELEMENT " + current.name + " " + current.model + " >"); 962 if (USE_SYNTHESIZED) { 963 Element aliasElement = getElementFromName().get("alias"); 964 //b.append(current.rawChildren); 965 if (!current.children.isEmpty()) { 966 LinkedHashSet<Element> elements = new LinkedHashSet<Element>(current.children.keySet()); 967 boolean hasAlias = aliasElement != null && elements.remove(aliasElement); 968 //boolean hasSpecial = specialElement != null && elements.remove(specialElement); 969 if (hasAlias) { 970 b.append("(alias |"); 971 } 972 b.append("("); 973 // <!ELEMENT transformNames ( alias | (transformName | special)* ) > 974 // <!ELEMENT layout ( alias | (orientation*, inList*, inText*, special*) ) > 975 976 for (Element e : elements) { 977 if (first) { 978 first = false; 979 } else { 980 b.append(", "); 981 } 982 b.append(e.name); 983 if (e.type != ElementType.PCDATA) { 984 b.append("*"); 985 } 986 } 987 if (hasAlias) { 988 b.append(")"); 989 } 990 b.append(")"); 991 } else { 992 b.append(current.type == null ? "???" : current.type.source); 993 } 994 b.append(">"); 995 } 996 showComments(b, current.commentsPost, false); 997 if (isOrdered(current.name)) { 998 b.append(COMMENT_PREFIX + "<!--@ORDERED-->"); 999 } 1000 if (current.getElementStatus() != ElementStatus.regular) { 1001 b.append(COMMENT_PREFIX + "<!--@" 1002 + current.getElementStatus().toString().toUpperCase(Locale.ROOT) 1003 + "-->"); 1004 } 1005 if (elementDeprecated) { 1006 b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->"); 1007 } 1008 1009 LinkedHashSet<String> deprecatedValues = new LinkedHashSet<>(); 1010 1011 for (Attribute a : current.attributes.keySet()) { 1012 if (seen.seenAttributes.contains(a)) { 1013 continue; 1014 } 1015 seen.seenAttributes.add(a); 1016 boolean attributeDeprecated = elementDeprecated || isDeprecated(current.name, a.name, "*"); 1017 1018 deprecatedValues.clear(); 1019 1020 showComments(b, a.commentsPre, true); 1021 b.append("\n<!ATTLIST " + current.name + " " + a.name); 1022 if (a.type == AttributeType.ENUMERATED_TYPE) { 1023 b.append(" ("); 1024 first = true; 1025 for (String s : a.values.keySet()) { 1026 if (first) { 1027 first = false; 1028 } else { 1029 b.append(" | "); 1030 } 1031 b.append(s); 1032 if (!attributeDeprecated && isDeprecated(current.name, a.name, s)) { 1033 deprecatedValues.add(s); 1034 } 1035 } 1036 b.append(")"); 1037 } else { 1038 b.append(' ').append(a.type); 1039 } 1040 if (a.mode != Mode.NULL) { 1041 b.append(" ").append(a.mode.source); 1042 } 1043 if (a.defaultValue != null) { 1044 b.append(" \"").append(a.defaultValue).append('"'); 1045 } 1046 b.append(" >"); 1047 showComments(b, a.commentsPost, false); 1048 // if (attributeDeprecated != deprecatedComment) { 1049 // System.out.println("*** BAD DEPRECATION ***" + a); 1050 // } 1051 if (METADATA.contains(a.name) || a.attributeStatus == AttributeStatus.metadata) { 1052 b.append(COMMENT_PREFIX + "<!--@METADATA-->"); 1053 } else if (!isDistinguishing(current.name, a.name)) { 1054 b.append(COMMENT_PREFIX + "<!--@VALUE-->"); 1055 } 1056 if (attributeDeprecated) { 1057 b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->"); 1058 } else if (!deprecatedValues.isEmpty()) { 1059 b.append(COMMENT_PREFIX + "<!--@DEPRECATED:" + CollectionUtilities.join(deprecatedValues, ", ") + "-->"); 1060 } 1061 } 1062 if (current.children.size() > 0) { 1063 for (Element e : current.children.keySet()) { 1064 toString(e, b, seen); 1065 } 1066 } 1067 } 1068 showComments(StringBuilder b, Set<String> comments, boolean separate)1069 private void showComments(StringBuilder b, Set<String> comments, boolean separate) { 1070 if (comments == null) { 1071 return; 1072 } 1073 if (separate && b.length() != 0) { 1074 b.append(System.lineSeparator()); 1075 } 1076 for (String c : comments) { 1077 boolean deprecatedComment = false; // the following served its purpose... c.toLowerCase(Locale.ENGLISH).contains("deprecat"); 1078 if (!deprecatedComment) { 1079 if (separate) { 1080 // special handling for very first comment 1081 if (b.length() == 0) { 1082 b.append("<!--") 1083 .append(System.lineSeparator()) 1084 .append(c) 1085 .append(System.lineSeparator()) 1086 .append("-->"); 1087 continue; 1088 } 1089 b.append(System.lineSeparator()); 1090 } else { 1091 b.append(COMMENT_PREFIX); 1092 } 1093 b.append("<!-- ").append(c).append(" -->"); 1094 } 1095 } 1096 } 1097 removeFirst(Collection<T> elements, Transform<T, Boolean> matcher)1098 public static <T> T removeFirst(Collection<T> elements, Transform<T, Boolean> matcher) { 1099 for (Iterator<T> it = elements.iterator(); it.hasNext();) { 1100 T item = it.next(); 1101 if (matcher.transform(item) == Boolean.TRUE) { 1102 it.remove(); 1103 return item; 1104 } 1105 } 1106 return null; 1107 } 1108 getElements()1109 public Set<Element> getElements() { 1110 return new LinkedHashSet<Element>(nameToElement.values()); 1111 } 1112 getAttributes()1113 public Set<Attribute> getAttributes() { 1114 return new LinkedHashSet<Attribute>(nameToAttributes.values()); 1115 } 1116 isDistinguishing(String elementName, String attribute)1117 public boolean isDistinguishing(String elementName, String attribute) { 1118 return getAttributeStatus(elementName, attribute) == AttributeStatus.distinguished; 1119 } 1120 1121 static final Set<String> METADATA = new HashSet<>(Arrays.asList("references", "standard", "draft")); 1122 addUnmodifiable(Set<String> comment, String addition)1123 static final Set<String> addUnmodifiable(Set<String> comment, String addition) { 1124 if (comment == null) { 1125 return Collections.singleton(addition); 1126 } else { 1127 comment = new LinkedHashSet<>(comment); 1128 comment.add(addition); 1129 return Collections.unmodifiableSet(comment); 1130 } 1131 } 1132 1133 public class IllegalByDtdException extends RuntimeException { 1134 private static final long serialVersionUID = 1L; 1135 public final String elementName; 1136 public final String attributeName; 1137 public final String attributeValue; 1138 IllegalByDtdException(String elementName, String attributeName, String attributeValue)1139 public IllegalByDtdException(String elementName, String attributeName, String attributeValue) { 1140 this.elementName = elementName; 1141 this.attributeName = attributeName; 1142 this.attributeValue = attributeValue; 1143 } 1144 1145 @Override getMessage()1146 public String getMessage() { 1147 return "Dtd " + dtdType 1148 + " doesn’t allow " 1149 + "element=" + elementName 1150 + (attributeName == null ? "" : ", attribute: " + attributeName) 1151 + (attributeValue == null ? "" : ", attributeValue: " + attributeValue); 1152 } 1153 } 1154 1155 //@SuppressWarnings("unused") isDeprecated(String elementName, String attributeName, String attributeValue)1156 public boolean isDeprecated(String elementName, String attributeName, String attributeValue) { 1157 Element element = nameToElement.get(elementName); 1158 if (element == null) { 1159 throw new IllegalByDtdException(elementName, attributeName, attributeValue); 1160 } else if (element.isDeprecatedElement) { 1161 return true; 1162 } 1163 if ("*".equals(attributeName) || "_q".equals(attributeName)) { 1164 return false; 1165 } 1166 Attribute attribute = element.getAttributeNamed(attributeName); 1167 if (attribute == null) { 1168 throw new IllegalByDtdException(elementName, attributeName, attributeValue); 1169 } else if (attribute.isDeprecatedAttribute) { 1170 return true; 1171 } 1172 return attribute.deprecatedValues.contains(attributeValue); // don't need special test for "*" 1173 } 1174 isOrdered(String elementName)1175 public boolean isOrdered(String elementName) { 1176 Element element = nameToElement.get(elementName); 1177 if (element == null) { 1178 if (elementName.startsWith("icu:")) { 1179 return false; 1180 } 1181 throw new IllegalByDtdException(elementName, null, null); 1182 } 1183 return element.isOrderedElement; 1184 } 1185 getAttributeStatus(String elementName, String attributeName)1186 public AttributeStatus getAttributeStatus(String elementName, String attributeName) { 1187 if ("_q".equals(attributeName)) { 1188 return AttributeStatus.distinguished; // special case 1189 } 1190 if ("#PCDATA".equals(elementName)) { 1191 int debug = 1; 1192 } 1193 Element element = nameToElement.get(elementName); 1194 if (element == null) { 1195 if (elementName.startsWith("icu:")) { 1196 return AttributeStatus.distinguished; 1197 } 1198 throw new IllegalByDtdException(elementName, attributeName, null); 1199 } 1200 Attribute attribute = element.getAttributeNamed(attributeName); 1201 if (attribute == null) { 1202 if (elementName.startsWith("icu:")) { 1203 return AttributeStatus.distinguished; 1204 } 1205 throw new IllegalByDtdException(elementName, attributeName, null); 1206 } 1207 return attribute.attributeStatus; 1208 } 1209 1210 // The default is a map comparator, which compares numbers as numbers, and strings with UCA 1211 private static MapComparator<String> valueOrdering = new MapComparator<String>().setErrorOnMissing(false).freeze(); 1212 1213 static MapComparator<String> dayValueOrder = new MapComparator<String>().add( 1214 "sun", "mon", "tue", "wed", "thu", "fri", "sat").freeze(); 1215 static MapComparator<String> dayPeriodOrder = new MapComparator<String>().add( 1216 "midnight", "am", "noon", "pm", 1217 "morning1", "morning2", "afternoon1", "afternoon2", "evening1", "evening2", "night1", "night2", 1218 // The ones on the following line are no longer used actively. Can be removed later? 1219 "earlyMorning", "morning", "midDay", "afternoon", "evening", "night", "weeHours").freeze(); 1220 static MapComparator<String> listPatternOrder = new MapComparator<String>().add( 1221 "start", "middle", "end", "2", "3").freeze(); 1222 static MapComparator<String> widthOrder = new MapComparator<String>().add( 1223 "abbreviated", "narrow", "short", "wide", "all").freeze(); 1224 static MapComparator<String> lengthOrder = new MapComparator<String>().add( 1225 "full", "long", "medium", "short").freeze(); 1226 static MapComparator<String> dateFieldOrder = new MapComparator<String>().add( 1227 "era", "era-short", "era-narrow", 1228 "year", "year-short", "year-narrow", 1229 "quarter", "quarter-short", "quarter-narrow", 1230 "month", "month-short", "month-narrow", 1231 "week", "week-short", "week-narrow", 1232 "weekOfMonth", "weekOfMonth-short", "weekOfMonth-narrow", 1233 "day", "day-short", "day-narrow", 1234 "dayOfYear", "dayOfYear-short", "dayOfYear-narrow", 1235 "weekday", "weekday-short", "weekday-narrow", 1236 "weekdayOfMonth", "weekdayOfMonth-short", "weekdayOfMonth-narrow", 1237 "sun", "sun-short", "sun-narrow", 1238 "mon", "mon-short", "mon-narrow", 1239 "tue", "tue-short", "tue-narrow", 1240 "wed", "wed-short", "wed-narrow", 1241 "thu", "thu-short", "thu-narrow", 1242 "fri", "fri-short", "fri-narrow", 1243 "sat", "sat-short", "sat-narrow", 1244 "dayperiod-short", "dayperiod", "dayperiod-narrow", 1245 "hour", "hour-short", "hour-narrow", 1246 "minute", "minute-short", "minute-narrow", 1247 "second", "second-short", "second-narrow", 1248 "zone", "zone-short", "zone-narrow").freeze(); 1249 static MapComparator<String> unitOrder = new MapComparator<String>().add( 1250 "acceleration-g-force", "acceleration-meter-per-second-squared", 1251 "angle-revolution", "angle-radian", "angle-degree", "angle-arc-minute", "angle-arc-second", 1252 "area-square-kilometer", "area-hectare", "area-square-meter", "area-square-centimeter", 1253 "area-square-mile", "area-acre", "area-square-yard", "area-square-foot", "area-square-inch", 1254 "concentr-karat", 1255 "concentr-milligram-per-deciliter", "concentr-millimole-per-liter", 1256 "concentr-part-per-million", "concentr-percent", "concentr-permille", 1257 "consumption-liter-per-kilometer", "consumption-liter-per-100kilometers", 1258 "consumption-mile-per-gallon", "consumption-mile-per-gallon-imperial", 1259 "digital-petabyte", "digital-terabyte", "digital-terabit", "digital-gigabyte", "digital-gigabit", 1260 "digital-megabyte", "digital-megabit", "digital-kilobyte", "digital-kilobit", 1261 "digital-byte", "digital-bit", 1262 "duration-century", 1263 "duration-year", "duration-year-person", 1264 "duration-month", "duration-month-person", 1265 "duration-week", "duration-week-person", 1266 "duration-day", "duration-day-person", 1267 "duration-hour", "duration-minute", "duration-second", 1268 "duration-millisecond", "duration-microsecond", "duration-nanosecond", 1269 "electric-ampere", "electric-milliampere", "electric-ohm", "electric-volt", 1270 "energy-kilocalorie", "energy-calorie", "energy-foodcalorie", "energy-kilojoule", "energy-joule", "energy-kilowatt-hour", 1271 "frequency-gigahertz", "frequency-megahertz", "frequency-kilohertz", "frequency-hertz", 1272 "length-kilometer", "length-meter", "length-decimeter", "length-centimeter", 1273 "length-millimeter", "length-micrometer", "length-nanometer", "length-picometer", 1274 "length-mile", "length-yard", "length-foot", "length-inch", 1275 "length-parsec", "length-light-year", "length-astronomical-unit", 1276 "length-furlong", "length-fathom", 1277 "length-nautical-mile", "length-mile-scandinavian", 1278 "length-point", 1279 "light-lux", 1280 "mass-metric-ton", "mass-kilogram", "mass-gram", "mass-milligram", "mass-microgram", 1281 "mass-ton", "mass-stone", "mass-pound", "mass-ounce", 1282 "mass-ounce-troy", "mass-carat", 1283 "power-gigawatt", "power-megawatt", "power-kilowatt", "power-watt", "power-milliwatt", 1284 "power-horsepower", 1285 "pressure-hectopascal", "pressure-millimeter-of-mercury", 1286 "pressure-pound-per-square-inch", "pressure-inch-hg", "pressure-millibar", "pressure-atmosphere", 1287 "speed-kilometer-per-hour", "speed-meter-per-second", "speed-mile-per-hour", "speed-knot", 1288 "temperature-generic", "temperature-celsius", "temperature-fahrenheit", "temperature-kelvin", 1289 "volume-cubic-kilometer", "volume-cubic-meter", "volume-cubic-centimeter", 1290 "volume-cubic-mile", "volume-cubic-yard", "volume-cubic-foot", "volume-cubic-inch", 1291 "volume-megaliter", "volume-hectoliter", "volume-liter", "volume-deciliter", "volume-centiliter", "volume-milliliter", 1292 "volume-pint-metric", "volume-cup-metric", 1293 "volume-acre-foot", 1294 "volume-bushel", "volume-gallon", "volume-gallon-imperial", "volume-quart", "volume-pint", "volume-cup", 1295 "volume-fluid-ounce", "volume-tablespoon", "volume-teaspoon").freeze(); 1296 1297 static MapComparator<String> countValueOrder = new MapComparator<String>().add( 1298 "0", "1", "zero", "one", "two", "few", "many", "other").freeze(); 1299 static MapComparator<String> unitLengthOrder = new MapComparator<String>().add( 1300 "long", "short", "narrow").freeze(); 1301 static MapComparator<String> currencyFormatOrder = new MapComparator<String>().add( 1302 "standard", "accounting").freeze(); 1303 static Comparator<String> zoneOrder = StandardCodes.make().getTZIDComparator(); 1304 1305 static final Comparator<String> COMP = (Comparator) CLDRConfig.getInstance().getCollator(); 1306 1307 // Hack for US 1308 static final Comparator<String> UNICODE_SET_COMPARATOR = new Comparator<String>() { 1309 @Override 1310 public int compare(String o1, String o2) { 1311 if (o1.contains("{")) { 1312 o1 = o1.replace("{", ""); 1313 } 1314 if (o2.contains("{")) { 1315 o2 = o2.replace("{", ""); 1316 } 1317 return COMP.compare(o1, o2); 1318 } 1319 1320 }; 1321 getAttributeValueComparator(String element, String attribute)1322 public static Comparator<String> getAttributeValueComparator(String element, String attribute) { 1323 return getAttributeValueComparator(DtdType.ldml, element, attribute); 1324 } 1325 getAttributeValueComparator(DtdType type, String element, String attribute)1326 static Comparator<String> getAttributeValueComparator(DtdType type, String element, String attribute) { 1327 // The default is a map comparator, which compares numbers as numbers, and strings with UCA 1328 Comparator<String> comp = valueOrdering; 1329 if (type != DtdType.ldml && type != DtdType.ldmlICU) { 1330 return comp; 1331 } 1332 if (attribute.equals("day")) { // && (element.startsWith("weekend") 1333 comp = dayValueOrder; 1334 } else if (attribute.equals("type")) { 1335 if (element.endsWith("FormatLength")) { 1336 comp = lengthOrder; 1337 } else if (element.endsWith("Width")) { 1338 comp = widthOrder; 1339 } else if (element.equals("day")) { 1340 comp = dayValueOrder; 1341 } else if (element.equals("field")) { 1342 comp = dateFieldOrder; 1343 } else if (element.equals("zone")) { 1344 comp = zoneOrder; 1345 } else if (element.equals("listPatternPart")) { 1346 comp = listPatternOrder; 1347 } else if (element.equals("currencyFormat")) { 1348 comp = currencyFormatOrder; 1349 } else if (element.equals("unitLength")) { 1350 comp = unitLengthOrder; 1351 } else if (element.equals("unit")) { 1352 comp = unitOrder; 1353 } else if (element.equals("dayPeriod")) { 1354 comp = dayPeriodOrder; 1355 } 1356 } else if (attribute.equals("count") && !element.equals("minDays")) { 1357 comp = countValueOrder; 1358 } else if (attribute.equals("cp") && element.equals("annotation")) { 1359 comp = UNICODE_SET_COMPARATOR; 1360 } 1361 return comp; 1362 } 1363 1364 /** 1365 * Comparator for attributes in CLDR files 1366 */ 1367 private static AttributeValueComparator ldmlAvc = new AttributeValueComparator() { 1368 @Override 1369 public int compare(String element, String attribute, String value1, String value2) { 1370 Comparator<String> comp = getAttributeValueComparator(element, attribute); 1371 return comp.compare(value1, value2); 1372 } 1373 }; 1374 hasValue(String elementName)1375 public boolean hasValue(String elementName) { 1376 return nameToElement.get(elementName).type == ElementType.PCDATA; 1377 } 1378 isMetadata(XPathParts pathPlain)1379 public boolean isMetadata(XPathParts pathPlain) { 1380 for (String s : pathPlain.getElements()) { 1381 Element e = getElementFromName().get(s); 1382 if (e.elementStatus == ElementStatus.metadata) { 1383 return true; 1384 } 1385 } 1386 return false; 1387 } 1388 isMetadataOld(DtdType dtdType2, XPathParts pathPlain)1389 public static boolean isMetadataOld(DtdType dtdType2, XPathParts pathPlain) { 1390 // TODO Don't use hard-coded list; instead add to DTD annotations 1391 final String element1 = pathPlain.getElement(1); 1392 final String element2 = pathPlain.getElement(2); 1393 final String elementN = pathPlain.getElement(-1); 1394 switch (dtdType2) { 1395 case ldml: 1396 switch (element1) { 1397 case "generation": 1398 case "metadata": 1399 return true; 1400 } 1401 break; 1402 case ldmlBCP47: 1403 switch (element1) { 1404 case "generation": 1405 case "version": 1406 return true; 1407 } 1408 break; 1409 ////supplementalData/transforms/transform[@source="am"][@target="am_FONIPA"][@direction="forward"]/comment 1410 case supplementalData: 1411 // these are NOT under /metadata/ but are actually metadata 1412 switch (element1) { 1413 case "generation": 1414 case "version": 1415 case "validity": 1416 case "references": 1417 case "coverageLevels": 1418 return true; 1419 case "transforms": 1420 return elementN.equals("comment"); 1421 case "metadata": 1422 // these ARE under /metadata/, but many others under /metadata/ are NOT actually metadata. 1423 switch (element2) { 1424 case "validity": 1425 case "serialElements": 1426 case "suppress": 1427 case "distinguishing": 1428 case "blocking": 1429 case "casingData": 1430 return true; 1431 } 1432 break; 1433 } 1434 break; 1435 default: 1436 } 1437 return false; 1438 } 1439 isDeprecated(XPathParts pathPlain)1440 public boolean isDeprecated(XPathParts pathPlain) { 1441 for (int i = 0; i < pathPlain.size(); ++i) { 1442 String elementName = pathPlain.getElement(i); 1443 if (isDeprecated(elementName, "*", null)) { 1444 return true; 1445 } 1446 for (String attribute : pathPlain.getAttributeKeys(i)) { 1447 String attributeValue = pathPlain.getAttributeValue(i, attribute); 1448 if (isDeprecated(elementName, attribute, attributeValue)) { 1449 return true; 1450 } 1451 } 1452 } 1453 return false; 1454 } 1455 1456 public final static Splitter SPACE_SPLITTER = Splitter.on(CharMatcher.whitespace()).trimResults().omitEmptyStrings(); 1457 public final static Splitter BAR_SPLITTER = Splitter.on('|').trimResults().omitEmptyStrings(); 1458 public final static Splitter CR_SPLITTER = Splitter.on(CharMatcher.anyOf("\n\r")).trimResults().omitEmptyStrings(); 1459 1460 private static class XPathPartsSet { 1461 private final Set<XPathParts> list = new LinkedHashSet<>(); 1462 addElement(String element)1463 private void addElement(String element) { 1464 if (list.isEmpty()) { 1465 list.add(new XPathParts().addElement(element)); 1466 } else { 1467 for (XPathParts item : list) { 1468 item.addElement(element); 1469 } 1470 } 1471 } 1472 addAttribute(String attribute, String attributeValue)1473 private void addAttribute(String attribute, String attributeValue) { 1474 for (XPathParts item : list) { 1475 item.addAttribute(attribute, attributeValue); 1476 } 1477 } 1478 setElement(int i, String string)1479 private void setElement(int i, String string) { 1480 for (XPathParts item : list) { 1481 item.setElement(i, string); 1482 } 1483 } 1484 1485 // private int size() { 1486 // return list.iterator().next().size(); 1487 // } 1488 // 1489 // private void removeElement(int i) { 1490 // for (XPathParts item : list) { 1491 // item.removeElement(i); 1492 // } 1493 // } 1494 addAttributes(String attribute, List<String> attributeValues)1495 private void addAttributes(String attribute, List<String> attributeValues) { 1496 if (attributeValues.size() == 1) { 1497 addAttribute(attribute, attributeValues.iterator().next()); 1498 } else { 1499 // duplicate all the items in the list with the given values 1500 Set<XPathParts> newList = new LinkedHashSet<>(); 1501 for (XPathParts item : list) { 1502 for (String attributeValue : attributeValues) { 1503 XPathParts newItem = item.cloneAsThawed(); 1504 newItem.addAttribute(attribute, attributeValue); 1505 newList.add(newItem); 1506 } 1507 } 1508 list.clear(); 1509 list.addAll(newList); 1510 } 1511 } 1512 toStrings()1513 private ImmutableSet<String> toStrings() { 1514 Builder<String> result = new ImmutableSet.Builder<>(); 1515 1516 for (XPathParts item : list) { 1517 result.add(item.toString()); 1518 } 1519 return result.build(); 1520 } 1521 1522 @Override toString()1523 public String toString() { 1524 return list.toString(); 1525 } 1526 } 1527 getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras)1528 public Set<String> getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras) { 1529 extras.clear(); 1530 Map<String, String> valueAttributes = new HashMap<>(); 1531 XPathPartsSet pathResult = new XPathPartsSet(); 1532 String element = null; 1533 for (int i = 0; i < pathPlain.size(); ++i) { 1534 element = pathPlain.getElement(i); 1535 pathResult.addElement(element); 1536 valueAttributes.clear(); 1537 for (String attribute : pathPlain.getAttributeKeys(i)) { 1538 AttributeStatus status = getAttributeStatus(element, attribute); 1539 final String attributeValue = pathPlain.getAttributeValue(i, attribute); 1540 switch (status) { 1541 case distinguished: 1542 AttributeType attrType = getAttributeType(element, attribute); 1543 if (attrType == AttributeType.NMTOKENS) { 1544 pathResult.addAttributes(attribute, SPACE_SPLITTER.splitToList(attributeValue)); 1545 } else { 1546 pathResult.addAttribute(attribute, attributeValue); 1547 } 1548 break; 1549 case value: 1550 valueAttributes.put(attribute, attributeValue); 1551 break; 1552 case metadata: 1553 break; 1554 } 1555 } 1556 if (!valueAttributes.isEmpty()) { 1557 boolean hasValue = hasValue(element); 1558 // if it doesn't have a value, we construct new child elements, with _ prefix 1559 // if it does have a value, we have to play a further trick, since 1560 // we can't have a value and child elements at the same level. 1561 // So we use a _ suffix on the element. 1562 if (hasValue) { 1563 pathResult.setElement(i, element + "_"); 1564 } else { 1565 int debug = 0; 1566 } 1567 for (Entry<String, String> attributeAndValue : valueAttributes.entrySet()) { 1568 final String attribute = attributeAndValue.getKey(); 1569 final String attributeValue = attributeAndValue.getValue(); 1570 1571 Set<String> pathsShort = pathResult.toStrings(); 1572 AttributeType attrType = getAttributeType(element, attribute); 1573 for (String pathShort : pathsShort) { 1574 pathShort += "/_" + attribute; 1575 if (attrType == AttributeType.NMTOKENS) { 1576 for (String valuePart : SPACE_SPLITTER.split(attributeValue)) { 1577 extras.put(pathShort, valuePart); 1578 } 1579 } else { 1580 extras.put(pathShort, attributeValue); 1581 } 1582 } 1583 } 1584 if (hasValue) { 1585 pathResult.setElement(i, element); // restore 1586 } 1587 } 1588 } 1589 // Only add the path if it could have a value, looking at the last element 1590 if (!hasValue(element)) { 1591 return null; 1592 } 1593 return pathResult.toStrings(); 1594 } 1595 getAttributeType(String elementName, String attributeName)1596 public AttributeType getAttributeType(String elementName, String attributeName) { 1597 Element element = nameToElement.get(elementName); 1598 if (element == null) { 1599 return null; 1600 } 1601 Attribute attr = element.getAttributeNamed(attributeName); 1602 if (attr == null) { 1603 return null; 1604 } 1605 return attr.type; 1606 } 1607 1608 // TODO: add support for following to DTD annotations, and rework API 1609 1610 static final Set<String> SPACED_VALUES = ImmutableSet.of( 1611 "idValidity", 1612 "languageGroup"); 1613 getValueSplitter(XPathParts pathPlain)1614 public static Splitter getValueSplitter(XPathParts pathPlain) { 1615 if (!Collections.disjoint(pathPlain.getElements(), SPACED_VALUES)) { 1616 return SPACE_SPLITTER; 1617 } else if (pathPlain.getElement(-1).equals("annotation") 1618 && !pathPlain.getAttributeKeys(-1).contains("tts")) { 1619 return BAR_SPLITTER; 1620 } 1621 return CR_SPLITTER; 1622 } 1623 isComment(XPathParts pathPlain, String line)1624 public static boolean isComment(XPathParts pathPlain, String line) { 1625 if (pathPlain.contains("transform")) { 1626 if (line.startsWith("#")) { 1627 return true; 1628 } 1629 } 1630 return false; 1631 } 1632 isExtraSplit(String extraPath)1633 public static boolean isExtraSplit(String extraPath) { 1634 if (extraPath.endsWith("/_type") && extraPath.startsWith("//supplementalData/metaZones/mapTimezones")) { 1635 return true; 1636 } 1637 return false; 1638 } 1639 1640 // ALWAYS KEEP AT END, FOR STATIC INIT ORDER 1641 private static final Map<DtdType, DtdData> CACHE; 1642 static { 1643 EnumMap<DtdType, DtdData> temp = new EnumMap<DtdType, DtdData>(DtdType.class); 1644 for (DtdType type : DtdType.values()) { temp.put(type, getInstance(type, null))1645 temp.put(type, getInstance(type, null)); 1646 } 1647 CACHE = Collections.unmodifiableMap(temp); 1648 } 1649 // ALWAYS KEEP AT END, FOR STATIC INIT ORDER 1650 } 1651