• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.util;
2 
3 import java.io.File;
4 import java.io.StringReader;
5 import java.util.ArrayList;
6 import java.util.Arrays;
7 import java.util.Collection;
8 import java.util.Collections;
9 import java.util.Comparator;
10 import java.util.HashMap;
11 import java.util.HashSet;
12 import java.util.Iterator;
13 import java.util.LinkedHashMap;
14 import java.util.LinkedHashSet;
15 import java.util.List;
16 import java.util.Locale;
17 import java.util.Map;
18 import java.util.Map.Entry;
19 import java.util.Set;
20 import java.util.TreeMap;
21 import java.util.concurrent.ConcurrentHashMap;
22 import java.util.concurrent.ConcurrentMap;
23 import java.util.regex.Pattern;
24 
25 import com.google.common.base.CharMatcher;
26 import com.google.common.base.Splitter;
27 import com.google.common.collect.ImmutableSet;
28 import com.google.common.collect.ImmutableSet.Builder;
29 import com.google.common.collect.ImmutableSetMultimap;
30 import com.google.common.collect.Multimap;
31 import com.google.common.collect.TreeMultimap;
32 import com.ibm.icu.dev.util.CollectionUtilities;
33 import com.ibm.icu.impl.Relation;
34 import com.ibm.icu.text.Transform;
35 
36 /**
37  * An immutable object that contains the structure of a DTD.
38  * @author markdavis
39  */
40 public class DtdData extends XMLFileReader.SimpleHandler {
41     private static final String COMMENT_PREFIX = System.lineSeparator() + "    ";
42     private static final boolean SHOW_ALL = CldrUtility.getProperty("show_all", false);
43     private static final boolean USE_SYNTHESIZED = false;
44 
45     private static final boolean DEBUG = false;
46     private static final Pattern FILLER = PatternCache.get("[^-a-zA-Z0-9#_:]");
47 
48     private final Relation<String, Attribute> nameToAttributes = Relation.of(new TreeMap<String, Set<Attribute>>(), LinkedHashSet.class);
49     private Map<String, Element> nameToElement = new HashMap<String, Element>();
50     private MapComparator<String> elementComparator;
51     private MapComparator<String> attributeComparator;
52 
53     public final Element ROOT;
54     public final Element PCDATA = elementFrom("#PCDATA");
55     public final Element ANY = elementFrom("ANY");
56     public final DtdType dtdType;
57     public final String version;
58     private Element lastElement;
59     private Attribute lastAttribute;
60     private Set<String> preCommentCache;
61     private DtdComparator dtdComparator;
62 
63     public enum AttributeStatus {
64         distinguished ("§d"),
65         value ("§v"),
66         metadata ("§m︎");
67         public final String shortName;
AttributeStatus(String shortName)68         AttributeStatus(String shortName) {
69             this.shortName = shortName;
70         }
getShortName(AttributeStatus status)71         public static String getShortName(AttributeStatus status) {
72             return status == null ? "" : status.shortName;
73         }
74     }
75 
76     public enum Mode {
77         REQUIRED("#REQUIRED"), OPTIONAL("#IMPLIED"), FIXED("#FIXED"), NULL("null");
78 
79         public final String source;
80 
Mode(String s)81         Mode(String s) {
82             source = s;
83         }
84 
forString(String mode)85         public static Mode forString(String mode) {
86             for (Mode value : Mode.values()) {
87                 if (value.source.equals(mode)) {
88                     return value;
89                 }
90             }
91             if (mode == null) {
92                 return NULL;
93             }
94             throw new IllegalArgumentException(mode);
95         }
96     }
97 
98     public enum AttributeType {
99         CDATA, ID, IDREF, IDREFS, ENTITY, ENTITIES, NMTOKEN, NMTOKENS, ENUMERATED_TYPE
100     }
101 
102     public static class Attribute implements Named {
103         public final String name;
104         public final Element element;
105         public final Mode mode;
106         public final String defaultValue;
107         public final AttributeType type;
108         public final Map<String, Integer> values;
109         private final Set<String> commentsPre;
110         private Set<String> commentsPost;
111         private boolean isDeprecatedAttribute;
112         public AttributeStatus attributeStatus = AttributeStatus.distinguished; // default unless reset by annotations
113         private Set<String> deprecatedValues = Collections.emptySet();
114         public MatchValue matchValue;
115         private final Comparator<String> attributeValueComparator;
116 
Attribute(DtdType dtdType, Element element2, String aName, Mode mode2, String[] split, String value2, Set<String> firstComment)117         private Attribute(DtdType dtdType, Element element2, String aName, Mode mode2, String[] split, String value2, Set<String> firstComment) {
118             commentsPre = firstComment;
119             element = element2;
120             name = aName.intern();
121             if (name.equals("draft") // normally never permitted on elements with children, but special cases...
122                 && !element.getName().equals("collation")
123                 && !element.getName().equals("transform")) {
124                 int elementChildrenCount = element.getChildren().size();
125                 if (elementChildrenCount > 1
126                     || elementChildrenCount == 1 && !element.getChildren().keySet().iterator().next().getName().equals("cp")) {
127                     isDeprecatedAttribute = true;
128                     if (DEBUG) {
129                         System.out.println(element.getName() + ":" + element.getChildren());
130                     }
131                 }
132             }
133             mode = mode2;
134             defaultValue = value2 == null ? null
135                 : value2.intern();
136             AttributeType _type = AttributeType.ENUMERATED_TYPE;
137             Map<String, Integer> _values = Collections.emptyMap();
138             if (split.length == 1) {
139                 try {
140                     _type = AttributeType.valueOf(split[0]);
141                 } catch (Exception e) {
142                 }
143             }
144             type = _type;
145 
146             if (_type == AttributeType.ENUMERATED_TYPE) {
147                 LinkedHashMap<String, Integer> temp = new LinkedHashMap<String, Integer>();
148                 for (String part : split) {
149                     if (part.length() != 0) {
150                         temp.put(part.intern(), temp.size());
151                     }
152                 }
153                 _values = Collections.unmodifiableMap(temp);
154             }
155             values = _values;
156             attributeValueComparator = getAttributeValueComparator(dtdType, element.name, name);
157         }
158 
159         @Override
toString()160         public String toString() {
161             return element.name + ":" + name;
162         }
163 
appendDtdString(StringBuilder b)164         public StringBuilder appendDtdString(StringBuilder b) {
165             Attribute a = this;
166             b.append("<!ATTLIST " + element.name + " " + a.name);
167             boolean first;
168             if (a.type == AttributeType.ENUMERATED_TYPE) {
169                 b.append(" (");
170                 first = true;
171                 for (String s : a.values.keySet()) {
172                     if (deprecatedValues.contains(s)) {
173                         continue;
174                     }
175                     if (first) {
176                         first = false;
177                     } else {
178                         b.append(" | ");
179                     }
180                     b.append(s);
181                 }
182                 b.append(")");
183             } else {
184                 b.append(' ').append(a.type);
185             }
186             if (a.mode != Mode.NULL) {
187                 b.append(" ").append(a.mode.source);
188             }
189             if (a.defaultValue != null) {
190                 b.append(" \"").append(a.defaultValue).append('"');
191             }
192             b.append(" >");
193             return b;
194         }
195 
features()196         public String features() {
197             return (type == AttributeType.ENUMERATED_TYPE ? values.keySet().toString() : type.toString())
198                 + (mode == Mode.NULL ? "" : ", mode=" + mode)
199                 + (defaultValue == null ? "" : ", default=" + defaultValue);
200         }
201 
202         @Override
getName()203         public String getName() {
204             return name;
205         }
206 
207         private static Splitter COMMA = Splitter.on(',').trimResults();
208 
addComment(String commentIn)209         public void addComment(String commentIn) {
210             if (commentIn.startsWith("@")) {
211                 // there are exactly 2 cases: deprecated and ordered
212                 switch (commentIn) {
213                 case "@METADATA":
214                     attributeStatus = AttributeStatus.metadata;
215                     break;
216                 case "@VALUE":
217                     attributeStatus = AttributeStatus.value;
218                     break;
219                 case "@DEPRECATED":
220                     isDeprecatedAttribute = true;
221                     break;
222                 default:
223                     int colonPos = commentIn.indexOf(':');
224                     if (colonPos < 0) {
225                         throw new IllegalArgumentException("Unrecognized annotation: " + commentIn);
226                     }
227                     String command = commentIn.substring(0, colonPos);
228                     String argument = commentIn.substring(colonPos + 1);
229                     switch(command) {
230                     case "@DEPRECATED":
231                         deprecatedValues = Collections.unmodifiableSet(new HashSet<>(COMMA.splitToList(argument)));
232                         break;
233                     case "@MATCH":
234                         if (matchValue != null) {
235                             throw new IllegalArgumentException("Conflicting @MATCH: " + matchValue.getName() + " & " + argument);
236                         }
237                         matchValue = MatchValue.of(argument);
238                         break;
239                     default:
240                         throw new IllegalArgumentException("Unrecognized annotation: " + commentIn);
241                     }
242                 }
243                 return;
244             }
245             commentsPost = addUnmodifiable(commentsPost, commentIn.trim());
246         }
247 
248         /**
249          * Special version of identity; only considers name and name of element
250          */
251         @Override
equals(Object obj)252         public boolean equals(Object obj) {
253             if (!(obj instanceof Attribute)) {
254                 return false;
255             }
256             Attribute that = (Attribute) obj;
257             return name.equals(that.name)
258                 && element.name.equals(that.element.name) // don't use plain element: circularity
259                 // not relevant to identity
260                 //                && Objects.equals(comment, that.comment)
261                 //                && mode.equals(that.mode)
262                 //                && Objects.equals(defaultValue, that.defaultValue)
263                 //                && type.equals(that.type)
264                 //                && values.equals(that.values)
265                 ;
266         }
267 
268         /**
269          * Special version of identity; only considers name and name of element
270          */
271         @Override
hashCode()272         public int hashCode() {
273             return name.hashCode() * 37
274                 + element.name.hashCode() // don't use plain element: circularity
275                 // not relevant to identity
276                 //                ) * 37 + Objects.hashCode(comment)) * 37
277                 //                + mode.hashCode()) * 37
278                 //                + Objects.hashCode(defaultValue)) * 37
279                 //                + type.hashCode()) * 37
280                 //                + values.hashCode()
281                 ;
282         }
283 
isDeprecated()284         public boolean isDeprecated() {
285             return isDeprecatedAttribute;
286         }
287 
isDeprecatedValue(String value)288         public boolean isDeprecatedValue(String value) {
289             return deprecatedValues.contains(value);
290         }
291 
getStatus()292         public AttributeStatus getStatus() {
293             return attributeStatus;
294         }
295 
getValueStatus(String value)296         public ValueStatus getValueStatus(String value) {
297             return deprecatedValues.contains(value)
298                 ? ValueStatus.invalid
299                     : type == AttributeType.ENUMERATED_TYPE
300                     ? (values.containsKey(value)
301                         ? ValueStatus.valid
302                             : ValueStatus.invalid)
303                         : matchValue == null
304                         ? ValueStatus.unknown :
305                             matchValue.is(value)
306                             ? ValueStatus.valid
307                                 : ValueStatus.invalid;
308         }
309 
getMatchString()310         public String getMatchString() {
311             return type == AttributeType.ENUMERATED_TYPE
312                 ? "⟨" + CollectionUtilities.join(values.keySet(), ", ") + "⟩"
313                     : matchValue != null
314                     ? "⟪" + matchValue.toString() + "⟫"
315                         : "";
316         }
317 
getMatchingName(Map<Attribute, Integer> attributes)318         public Attribute getMatchingName(Map<Attribute, Integer> attributes) {
319             for (Attribute attribute : attributes.keySet()) {
320                 if (name.equals(attribute.getName())) {
321                     return attribute;
322                 }
323             }
324             return null;
325         }
326 
327     }
328 
329     public enum ValueStatus {invalid, unknown, valid}
330 
DtdData(DtdType type, String version)331     private DtdData(DtdType type, String version) {
332         this.dtdType = type;
333         this.ROOT = elementFrom(type.rootType.toString());
334         this.version = version;
335     }
336 
addAttribute(String eName, String aName, String type, String mode, String value)337     private void addAttribute(String eName, String aName, String type, String mode, String value) {
338         Attribute a = new Attribute(dtdType, nameToElement.get(eName), aName, Mode.forString(mode), FILLER.split(type), value, preCommentCache);
339         preCommentCache = null;
340         getAttributesFromName().put(aName, a);
341         CldrUtility.putNew(a.element.attributes, a, a.element.attributes.size());
342         lastElement = null;
343         lastAttribute = a;
344     }
345 
346     public enum ElementType {
347         EMPTY, ANY, PCDATA("(#PCDATA)"), CHILDREN;
348         public final String source;
349 
ElementType(String s)350         private ElementType(String s) {
351             source = s;
352         }
353 
ElementType()354         private ElementType() {
355             source = name();
356         }
357     }
358 
359     interface Named {
getName()360         String getName();
361     }
362 
363     public enum ElementStatus {
364         regular, metadata
365     }
366 
367     public static class Element implements Named {
368         public final String name;
369         private String rawModel;
370         private ElementType type;
371         private final Map<Element, Integer> children = new LinkedHashMap<Element, Integer>();
372         private final Map<Attribute, Integer> attributes = new LinkedHashMap<Attribute, Integer>();
373         private Set<String> commentsPre;
374         private Set<String> commentsPost;
375         private String model;
376         private boolean isOrderedElement;
377         private boolean isDeprecatedElement;
378         private ElementStatus elementStatus = ElementStatus.regular;
379 
Element(String name2)380         private Element(String name2) {
381             name = name2.intern();
382         }
383 
setChildren(DtdData dtdData, String model, Set<String> precomments)384         private void setChildren(DtdData dtdData, String model, Set<String> precomments) {
385             this.commentsPre = precomments;
386             rawModel = model;
387             this.model = clean(model);
388             if (model.equals("EMPTY")) {
389                 type = ElementType.EMPTY;
390                 return;
391             }
392             type = ElementType.CHILDREN;
393             for (String part : FILLER.split(model)) {
394                 if (part.length() != 0) {
395                     if (part.equals("#PCDATA")) {
396                         type = ElementType.PCDATA;
397                     } else if (part.equals("ANY")) {
398                         type = ElementType.ANY;
399                     } else {
400                         CldrUtility.putNew(children, dtdData.elementFrom(part), children.size());
401                     }
402                 }
403             }
404             if ((type == ElementType.CHILDREN) == (children.size() == 0)
405                 && !model.startsWith("(#PCDATA|cp")) {
406                 throw new IllegalArgumentException("CLDR does not permit Mixed content. " + name + ":" + model);
407             }
408         }
409 
410         static final Pattern CLEANER1 = PatternCache.get("([,|(])(?=\\S)");
411         static final Pattern CLEANER2 = PatternCache.get("(?=\\S)([|)])");
412 
clean(String model2)413         private String clean(String model2) {
414             // (x) -> ( x );
415             // x,y -> x, y
416             // x|y -> x | y
417             String result = CLEANER1.matcher(model2).replaceAll("$1 ");
418             result = CLEANER2.matcher(result).replaceAll(" $1");
419             return result.equals(model2)
420                 ? model2
421                     : result; // for debugging
422         }
423 
containsAttribute(String string)424         public boolean containsAttribute(String string) {
425             for (Attribute a : attributes.keySet()) {
426                 if (a.name.equals(string)) {
427                     return true;
428                 }
429             }
430             return false;
431         }
432 
433         @Override
toString()434         public String toString() {
435             return name;
436         }
437 
toDtdString()438         public String toDtdString() {
439             return "<!ELEMENT " + name + " " + getRawModel() + " >";
440         }
441 
getType()442         public ElementType getType() {
443             return type;
444         }
445 
getChildren()446         public Map<Element, Integer> getChildren() {
447             return Collections.unmodifiableMap(children);
448         }
449 
getAttributes()450         public Map<Attribute, Integer> getAttributes() {
451             return Collections.unmodifiableMap(attributes);
452         }
453 
454         @Override
getName()455         public String getName() {
456             return name;
457         }
458 
getChildNamed(String string)459         public Element getChildNamed(String string) {
460             for (Element e : children.keySet()) {
461                 if (e.name.equals(string)) {
462                     return e;
463                 }
464             }
465             return null;
466         }
467 
getAttributeNamed(String string)468         public Attribute getAttributeNamed(String string) {
469             for (Attribute a : attributes.keySet()) {
470                 if (a.name.equals(string)) {
471                     return a;
472                 }
473             }
474             return null;
475         }
476 
addComment(String addition)477         public void addComment(String addition) {
478             if (addition.startsWith("@")) {
479                 // there are exactly 3 cases: deprecated, ordered, and metadata
480                 switch (addition) {
481                 case "@ORDERED":
482                     isOrderedElement = true;
483                     break;
484                 case "@DEPRECATED":
485                     isDeprecatedElement = true;
486                     break;
487                 case "@METADATA":
488                     elementStatus = ElementStatus.metadata;
489                     break;
490                 default:
491                     throw new IllegalArgumentException("Unrecognized annotation: " + addition);
492                 }
493                 return;
494             }
495             commentsPost = addUnmodifiable(commentsPost, addition.trim());
496         }
497 
498         /**
499          * Special version of equals. Only the name is considered in the identity.
500          */
501         @Override
equals(Object obj)502         public boolean equals(Object obj) {
503             if (!(obj instanceof Element)) {
504                 return false;
505             }
506             Element that = (Element) obj;
507             return name.equals(that.name)
508                 // not relevant to the identity of the object
509                 //                && Objects.equals(comment, that.comment)
510                 //                && type == that.type
511                 //                && attributes.equals(that.attributes)
512                 //                && children.equals(that.children)
513                 ;
514         }
515 
516         /**
517          * Special version of hashcode. Only the name is considered in the identity.
518          */
519         @Override
hashCode()520         public int hashCode() {
521             return name.hashCode()
522                 // not relevant to the identity of the object
523                 // * 37 + Objects.hashCode(comment)
524                 //) * 37 + Objects.hashCode(type)
525                 //                ) * 37 + attributes.hashCode()
526                 //                ) * 37 + children.hashCode()
527                 ;
528         }
529 
isDeprecated()530         public boolean isDeprecated() {
531             return isDeprecatedElement;
532         }
533 
isOrdered()534         public boolean isOrdered() {
535             return isOrderedElement;
536         }
537 
getElementStatus()538         public ElementStatus getElementStatus() {
539             return elementStatus;
540         }
541 
542         /**
543          * @return the rawModel
544          */
getRawModel()545         public String getRawModel() {
546             return rawModel;
547         }
548     }
549 
elementFrom(String name)550     private Element elementFrom(String name) {
551         Element result = nameToElement.get(name);
552         if (result == null) {
553             nameToElement.put(name, result = new Element(name));
554         }
555         return result;
556     }
557 
addElement(String name2, String model)558     private void addElement(String name2, String model) {
559         Element element = elementFrom(name2);
560         element.setChildren(this, model, preCommentCache);
561         preCommentCache = null;
562         lastElement = element;
563         lastAttribute = null;
564     }
565 
addComment(String comment)566     private void addComment(String comment) {
567         comment = comment.trim();
568         if (preCommentCache != null || comment.startsWith("#")) { // the precomments are "sticky"
569             if (comment.startsWith("@")) {
570                 throw new IllegalArgumentException("@ annotation comment must follow element or attribute, without intervening # comment");
571             }
572             preCommentCache = addUnmodifiable(preCommentCache, comment);
573         } else if (lastElement != null) {
574             lastElement.addComment(comment);
575         } else if (lastAttribute != null) {
576             lastAttribute.addComment(comment);
577         } else {
578             if (comment.startsWith("@")) {
579                 throw new IllegalArgumentException("@ annotation comment must follow element or attribute, without intervening # comment");
580             }
581             preCommentCache = addUnmodifiable(preCommentCache, comment);
582         }
583     }
584 
585     // TODO hide this
586     /**
587      * @deprecated
588      */
589     @Override
handleElementDecl(String name, String model)590     public void handleElementDecl(String name, String model) {
591         if (SHOW_ALL) {
592             // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, listPatterns?, collations?, posix?, segmentations?, rbnf?, annotations?, metadata?, references?, special*))) >
593             System.out.println(System.lineSeparator() + "<!ELEMENT " + name + " " + model + " >");
594         }
595         addElement(name, model);
596     }
597 
598     // TODO hide this
599     /**
600      * @deprecated
601      */
602     @Override
handleStartDtd(String name, String publicId, String systemId)603     public void handleStartDtd(String name, String publicId, String systemId) {
604         DtdType explicitDtdType = DtdType.valueOf(name);
605         if (explicitDtdType != dtdType && explicitDtdType != dtdType.rootType) {
606             throw new IllegalArgumentException("Mismatch in dtdTypes");
607         }
608     };
609 
610     /**
611      * @deprecated
612      */
613     @Override
handleAttributeDecl(String eName, String aName, String type, String mode, String value)614     public void handleAttributeDecl(String eName, String aName, String type, String mode, String value) {
615         if (SHOW_ALL) {
616             // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false ) #IMPLIED >
617             // <!ATTLIST version number CDATA #REQUIRED >
618             // <!ATTLIST version cldrVersion CDATA #FIXED "27" >
619 
620             System.out.println("<!ATTLIST " + eName
621                 + " " + aName
622                 + " " + type
623                 + " " + mode
624                 + (value == null ? "" : " \"" + value + "\"")
625                 + " >");
626         }
627         // HACK for 1.1.1
628         if (eName.equals("draft")) {
629             eName = "week";
630         }
631         addAttribute(eName, aName, type, mode, value);
632     }
633 
634     /**
635      * @deprecated
636      */
637     @Override
handleComment(String path, String comment)638     public void handleComment(String path, String comment) {
639         if (SHOW_ALL) {
640             // <!-- true and false are deprecated. -->
641             System.out.println("<!-- " + comment.trim() + " -->");
642         }
643         addComment(comment);
644     }
645 
646     // TODO hide this
647     /**
648      * @deprecated
649      */
650     @Override
handleEndDtd()651     public void handleEndDtd() {
652         throw new XMLFileReader.AbortException();
653     }
654 
655     /**
656      * Note that it always gets the trunk version
657      * @deprecated depends on static config, use {@link DtdData#getInstance(DtdType, File)} instead
658      */
getInstance(DtdType type)659     public static DtdData getInstance(DtdType type) {
660         return getInstance(type, CLDRConfig.getInstance().getCldrBaseDirectory());
661     }
662 
663     /**
664      * Special form using version, used only by tests, etc.
665      */
getInstance(DtdType type, String version)666     public static DtdData getInstance(DtdType type, String version) {
667         File directory = version == null ? CLDRConfig.getInstance().getCldrBaseDirectory()
668             : new File(CLDRPaths.ARCHIVE_DIRECTORY + "/cldr-" + version);
669 
670         return getInstance(type, version, directory);
671     }
672 
673     private static final ConcurrentMap<Pair<DtdType, File>, DtdData> CACHE = new ConcurrentHashMap<>();
674 
675     /**
676      * Normal version of DtdData
677      * Get a DtdData, given the CLDR root directory.
678      * @param type which DtdType to return
679      * @param directory the CLDR Root directory, which contains the "common" directory.
680      * @return
681      */
getInstance(DtdType type, File directory)682     public static DtdData getInstance(DtdType type, File directory) {
683         Pair<DtdType, File> key = new Pair<>(type, directory);
684         DtdData data = CACHE.computeIfAbsent(key, k -> getInstance(type, null, directory));
685         return data;
686     }
687 
getInstance(DtdType type, String version, File directory)688     private static DtdData getInstance(DtdType type, String version, File directory) {
689         DtdData simpleHandler = new DtdData(type, version);
690         XMLFileReader xfr = new XMLFileReader().setHandler(simpleHandler);
691         if (type != type.rootType) {
692             // read the real first, then add onto it.
693             readFile(type.rootType, xfr, directory);
694         }
695         readFile(type, xfr, directory);
696         // HACK
697         if (type == DtdType.ldmlICU) {
698             Element special = simpleHandler.nameToElement.get("special");
699             for (String extraElementName : Arrays.asList(
700                 "icu:breakIteratorData",
701                 "icu:UCARules",
702                 "icu:scripts",
703                 "icu:transforms",
704                 "icu:ruleBasedNumberFormats",
705                 "icu:isLeapMonth",
706                 "icu:version",
707                 "icu:breakDictionaryData",
708                 "icu:depends")) {
709                 Element extraElement = simpleHandler.nameToElement.get(extraElementName);
710                 special.children.put(extraElement, special.children.size());
711             }
712         }
713         if (simpleHandler.ROOT.children.size() == 0) {
714             throw new IllegalArgumentException(); // should never happen
715         }
716         simpleHandler.finish();
717         simpleHandler.freeze();
718         return simpleHandler;
719     }
720 
finish()721     private void finish() {
722         dtdComparator = new DtdComparator();
723     }
724 
readFile(DtdType type, XMLFileReader xfr, File directory)725     public static void readFile(DtdType type, XMLFileReader xfr, File directory) {
726         File file = new File(directory, type.dtdPath);
727         StringReader s = new StringReader("<?xml version='1.0' encoding='UTF-8' ?>"
728             + "<!DOCTYPE " + type
729             + " SYSTEM '" + file.getAbsolutePath() + "'>");
730         xfr.read(type.toString(), s, -1, true); //  DTD_TYPE_TO_FILE.get(type)
731     }
732 
freeze()733     private void freeze() {
734         if (version == null) { // only generate for new versions
735             MergeLists<String> elementMergeList = new MergeLists<String>();
736             elementMergeList.add(dtdType.toString());
737             MergeLists<String> attributeMergeList = new MergeLists<String>();
738             attributeMergeList.add("_q");
739 
740             for (Element element : nameToElement.values()) {
741                 if (element.children.size() > 0) {
742                     Collection<String> names = getNames(element.children.keySet());
743                     elementMergeList.add(names);
744                     if (DEBUG) {
745                         System.out.println(element.getName() + "\t→\t" + names);
746                     }
747                 }
748                 if (element.attributes.size() > 0) {
749                     Collection<String> names = getNames(element.attributes.keySet());
750                     attributeMergeList.add(names);
751                     if (DEBUG) {
752                         System.out.println(element.getName() + "\t→\t@" + names);
753                     }
754                 }
755             }
756             List<String> elementList = elementMergeList.merge();
757             List<String> attributeList = attributeMergeList.merge();
758             if (DEBUG) {
759                 System.out.println("Element Ordering:\t" + elementList);
760                 System.out.println("Attribute Ordering:\t" + attributeList);
761             }
762             elementComparator = new MapComparator<String>(elementList).setErrorOnMissing(true).freeze();
763             attributeComparator = new MapComparator<String>(attributeList).setErrorOnMissing(true).freeze();
764         }
765         nameToAttributes.freeze();
766         nameToElement = Collections.unmodifiableMap(nameToElement);
767     }
768 
getNames(Collection<? extends Named> keySet)769     private Collection<String> getNames(Collection<? extends Named> keySet) {
770         List<String> result = new ArrayList<String>();
771         for (Named e : keySet) {
772             result.add(e.getName());
773         }
774         return result;
775     }
776 
777     public enum DtdItem {
778         ELEMENT, ATTRIBUTE, ATTRIBUTE_VALUE
779     }
780 
781     public interface AttributeValueComparator {
compare(String element, String attribute, String value1, String value2)782         public int compare(String element, String attribute, String value1, String value2);
783     }
784 
getDtdComparator(AttributeValueComparator avc)785     public Comparator<String> getDtdComparator(AttributeValueComparator avc) {
786         return dtdComparator;
787     }
788 
789     private class DtdComparator implements Comparator<String> {
790         @Override
compare(String path1, String path2)791         public int compare(String path1, String path2) {
792             XPathParts a = XPathParts.getFrozenInstance(path1);
793             XPathParts b = XPathParts.getFrozenInstance(path2);
794             // there must always be at least one element
795             String baseA = a.getElement(0);
796             String baseB = b.getElement(0);
797             if (!ROOT.name.equals(baseA) || !ROOT.name.equals(baseB)) {
798                 throw new IllegalArgumentException("Comparing different DTDs: " + ROOT.name + ", " + baseA + ", " + baseB);
799             }
800             int min = Math.min(a.size(), b.size());
801             Element parent = ROOT;
802             Element elementA;
803             for (int i = 1; i < min; ++i, parent = elementA) {
804                 // add extra test for "fake" elements, used in diffing. they always start with _
805                 String elementRawA = a.getElement(i);
806                 String elementRawB = b.getElement(i);
807                 if (elementRawA.startsWith("_")) {
808                     return elementRawB.startsWith("_") ? elementRawA.compareTo(elementRawB) : -1;
809                 } else if (elementRawB.startsWith("_")) {
810                     return 1;
811                 }
812                 //
813                 elementA = nameToElement.get(elementRawA);
814                 Element elementB = nameToElement.get(elementRawB);
815                 if (elementA != elementB) {
816                     int aa = parent.children.get(elementA);
817                     int bb = parent.children.get(elementB);
818                     return aa - bb;
819                 }
820                 int countA = a.getAttributeCount(i);
821                 int countB = b.getAttributeCount(i);
822                 if (countA == 0 && countB == 0) {
823                     continue;
824                 }
825                 // we have two ways to compare the attributes. One based on the dtd,
826                 // and one based on explicit comparators
827 
828                 // at this point the elements are the same and correspond to elementA
829                 // in the dtd
830 
831                 // Handle the special added elements
832                 String aqValue = a.getAttributeValue(i, "_q");
833                 if (aqValue != null) {
834                     String bqValue = b.getAttributeValue(i, "_q");
835                     if (!aqValue.equals(bqValue)) {
836                         int aValue = Integer.parseInt(aqValue);
837                         int bValue = Integer.parseInt(bqValue);
838                         return aValue - bValue;
839                     }
840                     --countA;
841                     --countB;
842                 }
843 
844                 attributes: for (Entry<Attribute, Integer> attr : elementA.attributes.entrySet()) {
845                     Attribute main = attr.getKey();
846                     String valueA = a.getAttributeValue(i, main.name);
847                     String valueB = b.getAttributeValue(i, main.name);
848                     if (valueA == null) {
849                         if (valueB != null) {
850                             return -1;
851                         }
852                     } else if (valueB == null) {
853                         return 1;
854                     } else if (valueA.equals(valueB)) {
855                         --countA;
856                         --countB;
857                         if (countA == 0 && countB == 0) {
858                             break attributes;
859                         }
860                         continue; // TODO
861                     } else if (main.attributeValueComparator != null) {
862                         return main.attributeValueComparator.compare(valueA, valueB);
863                     } else if (main.values.size() != 0) {
864                         int aa = main.values.get(valueA);
865                         int bb = main.values.get(valueB);
866                         return aa - bb;
867                     } else {
868                         return valueA.compareTo(valueB);
869                     }
870                 }
871                 if (countA != 0 || countB != 0) {
872                     throw new IllegalArgumentException();
873                 }
874             }
875             return a.size() - b.size();
876         }
877     }
878 
getAttributeComparator()879     public MapComparator<String> getAttributeComparator() {
880         return attributeComparator;
881     }
882 
getElementComparator()883     public MapComparator<String> getElementComparator() {
884         return elementComparator;
885     }
886 
getAttributesFromName()887     public Relation<String, Attribute> getAttributesFromName() {
888         return nameToAttributes;
889     }
890 
getElementFromName()891     public Map<String, Element> getElementFromName() {
892         return nameToElement;
893     }
894 
toString()895     public String toString() {
896         StringBuilder b = new StringBuilder();
897         // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, listPatterns?, collations?, posix?, segmentations?, rbnf?, metadata?, references?, special*))) >
898         // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false ) #IMPLIED > <!-- true and false are deprecated. -->
899         Seen seen = new Seen(dtdType);
900         seen.seenElements.add(ANY);
901         seen.seenElements.add(PCDATA);
902         toString(ROOT, b, seen);
903 
904         // Hack for ldmlIcu: catch the items that are not mentioned in the original
905         int currentEnd = b.length();
906         for (Element e : nameToElement.values()) {
907             toString(e, b, seen);
908         }
909         if (currentEnd != b.length()) {
910             b.insert(currentEnd,
911                 System.lineSeparator() + System.lineSeparator()
912                 + "<!-- Elements not reachable from root! -->"
913                 + System.lineSeparator());
914         }
915         return b.toString();
916     }
917 
918     static final class Seen {
919         Set<Element> seenElements = new HashSet<Element>();
920         Set<Attribute> seenAttributes = new HashSet<Attribute>();
921 
Seen(DtdType dtdType)922         public Seen(DtdType dtdType) {
923             if (dtdType.rootType == dtdType) {
924                 return;
925             }
926             DtdData otherData = DtdData.getInstance(dtdType.rootType);
927             walk(otherData, otherData.ROOT);
928             seenElements.remove(otherData.nameToElement.get("special"));
929         }
930 
walk(DtdData otherData, Element current)931         private void walk(DtdData otherData, Element current) {
932             seenElements.add(current);
933             seenAttributes.addAll(current.attributes.keySet());
934             for (Element e : current.children.keySet()) {
935                 walk(otherData, e);
936             }
937         }
938     }
939 
getDescendents(Element start, Set<Element> toAddTo)940     public Set<Element> getDescendents(Element start, Set<Element> toAddTo) {
941         if (!toAddTo.contains(start)) {
942             toAddTo.add(start);
943             for (Element e : start.children.keySet()) {
944                 getDescendents(e, toAddTo);
945             }
946         }
947         return toAddTo;
948     }
949 
toString(Element current, StringBuilder b, Seen seen)950     private void toString(Element current, StringBuilder b, Seen seen) {
951         boolean first = true;
952         if (seen.seenElements.contains(current)) {
953             return;
954         }
955         seen.seenElements.add(current);
956         boolean elementDeprecated = isDeprecated(current.name, "*", "*");
957 
958         showComments(b, current.commentsPre, true);
959         b.append("\n\n<!ELEMENT " + current.name + " " + current.model + " >");
960         if (USE_SYNTHESIZED) {
961             Element aliasElement = getElementFromName().get("alias");
962             //b.append(current.rawChildren);
963             if (!current.children.isEmpty()) {
964                 LinkedHashSet<Element> elements = new LinkedHashSet<Element>(current.children.keySet());
965                 boolean hasAlias = aliasElement != null && elements.remove(aliasElement);
966                 //boolean hasSpecial = specialElement != null && elements.remove(specialElement);
967                 if (hasAlias) {
968                     b.append("(alias |");
969                 }
970                 b.append("(");
971                 // <!ELEMENT transformNames ( alias | (transformName | special)* ) >
972                 // <!ELEMENT layout ( alias | (orientation*, inList*, inText*, special*) ) >
973 
974                 for (Element e : elements) {
975                     if (first) {
976                         first = false;
977                     } else {
978                         b.append(", ");
979                     }
980                     b.append(e.name);
981                     if (e.type != ElementType.PCDATA) {
982                         b.append("*");
983                     }
984                 }
985                 if (hasAlias) {
986                     b.append(")");
987                 }
988                 b.append(")");
989             } else {
990                 b.append(current.type == null ? "???" : current.type.source);
991             }
992             b.append(">");
993         }
994         showComments(b, current.commentsPost, false);
995         if (isOrdered(current.name)) {
996             b.append(COMMENT_PREFIX + "<!--@ORDERED-->");
997         }
998         if (current.getElementStatus() != ElementStatus.regular) {
999             b.append(COMMENT_PREFIX + "<!--@"
1000                 + current.getElementStatus().toString().toUpperCase(Locale.ROOT)
1001                 + "-->");
1002         }
1003         if (elementDeprecated) {
1004             b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->");
1005         }
1006 
1007         LinkedHashSet<String> deprecatedValues = new LinkedHashSet<>();
1008 
1009         for (Attribute a : current.attributes.keySet()) {
1010             if (seen.seenAttributes.contains(a)) {
1011                 continue;
1012             }
1013             seen.seenAttributes.add(a);
1014             boolean attributeDeprecated = elementDeprecated || isDeprecated(current.name, a.name, "*");
1015 
1016             deprecatedValues.clear();
1017 
1018             showComments(b, a.commentsPre, true);
1019             b.append("\n<!ATTLIST " + current.name + " " + a.name);
1020             if (a.type == AttributeType.ENUMERATED_TYPE) {
1021                 b.append(" (");
1022                 first = true;
1023                 for (String s : a.values.keySet()) {
1024                     if (first) {
1025                         first = false;
1026                     } else {
1027                         b.append(" | ");
1028                     }
1029                     b.append(s);
1030                     if (!attributeDeprecated && isDeprecated(current.name, a.name, s)) {
1031                         deprecatedValues.add(s);
1032                     }
1033                 }
1034                 b.append(")");
1035             } else {
1036                 b.append(' ').append(a.type);
1037             }
1038             if (a.mode != Mode.NULL) {
1039                 b.append(" ").append(a.mode.source);
1040             }
1041             if (a.defaultValue != null) {
1042                 b.append(" \"").append(a.defaultValue).append('"');
1043             }
1044             b.append(" >");
1045             showComments(b, a.commentsPost, false);
1046 //            if (attributeDeprecated != deprecatedComment) {
1047 //                System.out.println("*** BAD DEPRECATION ***" + a);
1048 //            }
1049             if (a.matchValue != null) {
1050                 b.append(COMMENT_PREFIX + "<!--@MATCH:" + a.matchValue.getName() + "-->");
1051             }
1052             if (METADATA.contains(a.name) || a.attributeStatus == AttributeStatus.metadata) {
1053                 b.append(COMMENT_PREFIX + "<!--@METADATA-->");
1054             } else if (!isDistinguishing(current.name, a.name)) {
1055                 b.append(COMMENT_PREFIX + "<!--@VALUE-->");
1056             }
1057             if (attributeDeprecated) {
1058                 b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->");
1059             } else if (!deprecatedValues.isEmpty()) {
1060                 b.append(COMMENT_PREFIX + "<!--@DEPRECATED:" + CollectionUtilities.join(deprecatedValues, ", ") + "-->");
1061             }
1062         }
1063         if (current.children.size() > 0) {
1064             for (Element e : current.children.keySet()) {
1065                 toString(e, b, seen);
1066             }
1067         }
1068     }
1069 
showComments(StringBuilder b, Set<String> comments, boolean separate)1070     private void showComments(StringBuilder b, Set<String> comments, boolean separate) {
1071         if (comments == null) {
1072             return;
1073         }
1074         if (separate && b.length() != 0) {
1075             b.append(System.lineSeparator());
1076         }
1077         for (String c : comments) {
1078             boolean deprecatedComment = false; // the following served its purpose... c.toLowerCase(Locale.ENGLISH).contains("deprecat");
1079             if (!deprecatedComment) {
1080                 if (separate) {
1081                     // special handling for very first comment
1082                     if (b.length() == 0) {
1083                         b.append("<!--")
1084                         .append(System.lineSeparator())
1085                         .append(c)
1086                         .append(System.lineSeparator())
1087                         .append("-->");
1088                         continue;
1089                     }
1090                     b.append(System.lineSeparator());
1091                 } else {
1092                     b.append(COMMENT_PREFIX);
1093                 }
1094                 b.append("<!-- ").append(c).append(" -->");
1095             }
1096         }
1097     }
1098 
removeFirst(Collection<T> elements, Transform<T, Boolean> matcher)1099     public static <T> T removeFirst(Collection<T> elements, Transform<T, Boolean> matcher) {
1100         for (Iterator<T> it = elements.iterator(); it.hasNext();) {
1101             T item = it.next();
1102             if (matcher.transform(item) == Boolean.TRUE) {
1103                 it.remove();
1104                 return item;
1105             }
1106         }
1107         return null;
1108     }
1109 
getElements()1110     public Set<Element> getElements() {
1111         return new LinkedHashSet<Element>(nameToElement.values());
1112     }
1113 
getAttributes()1114     public Set<Attribute> getAttributes() {
1115         return new LinkedHashSet<Attribute>(nameToAttributes.values());
1116     }
1117 
isDistinguishing(String elementName, String attribute)1118     public boolean isDistinguishing(String elementName, String attribute) {
1119         return getAttributeStatus(elementName, attribute) == AttributeStatus.distinguished;
1120     }
1121 
1122     static final Set<String> METADATA = new HashSet<>(Arrays.asList("references", "standard", "draft"));
1123 
addUnmodifiable(Set<String> comment, String addition)1124     static final Set<String> addUnmodifiable(Set<String> comment, String addition) {
1125         if (comment == null) {
1126             return Collections.singleton(addition);
1127         } else {
1128             comment = new LinkedHashSet<>(comment);
1129             comment.add(addition);
1130             return Collections.unmodifiableSet(comment);
1131         }
1132     }
1133 
1134     public class IllegalByDtdException extends RuntimeException {
1135         private static final long serialVersionUID = 1L;
1136         public final String elementName;
1137         public final String attributeName;
1138         public final String attributeValue;
1139 
IllegalByDtdException(String elementName, String attributeName, String attributeValue)1140         public IllegalByDtdException(String elementName, String attributeName, String attributeValue) {
1141             this.elementName = elementName;
1142             this.attributeName = attributeName;
1143             this.attributeValue = attributeValue;
1144         }
1145 
1146         @Override
getMessage()1147         public String getMessage() {
1148             return "Dtd " + dtdType
1149                 + " doesn’t allow "
1150                 + "element=" + elementName
1151                 + (attributeName == null ? "" : ", attribute: " + attributeName)
1152                 + (attributeValue == null ? "" : ", attributeValue: " + attributeValue);
1153         }
1154     }
1155 
1156     //@SuppressWarnings("unused")
isDeprecated(String elementName, String attributeName, String attributeValue)1157     public boolean isDeprecated(String elementName, String attributeName, String attributeValue) {
1158         Element element = nameToElement.get(elementName);
1159         if (element == null) {
1160             throw new IllegalByDtdException(elementName, attributeName, attributeValue);
1161         } else if (element.isDeprecatedElement) {
1162             return true;
1163         }
1164         if ("*".equals(attributeName) || "_q".equals(attributeName)) {
1165             return false;
1166         }
1167         Attribute attribute = element.getAttributeNamed(attributeName);
1168         if (attribute == null) {
1169             throw new IllegalByDtdException(elementName, attributeName, attributeValue);
1170         } else if (attribute.isDeprecatedAttribute) {
1171             return true;
1172         }
1173         return attribute.deprecatedValues.contains(attributeValue); // don't need special test for "*"
1174     }
1175 
isOrdered(String elementName)1176     public boolean isOrdered(String elementName) {
1177         Element element = nameToElement.get(elementName);
1178         if (element == null) {
1179             if (elementName.startsWith("icu:")) {
1180                 return false;
1181             }
1182             throw new IllegalByDtdException(elementName, null, null);
1183         }
1184         return element.isOrderedElement;
1185     }
1186 
getAttributeStatus(String elementName, String attributeName)1187     public AttributeStatus getAttributeStatus(String elementName, String attributeName) {
1188         if ("_q".equals(attributeName)) {
1189             return AttributeStatus.distinguished; // special case
1190         }
1191         Element element = nameToElement.get(elementName);
1192         if (element == null) {
1193             if (elementName.startsWith("icu:")) {
1194                 return AttributeStatus.distinguished;
1195             }
1196             throw new IllegalByDtdException(elementName, attributeName, null);
1197         }
1198         Attribute attribute = element.getAttributeNamed(attributeName);
1199         if (attribute == null) {
1200             if (elementName.startsWith("icu:")) {
1201                 return AttributeStatus.distinguished;
1202             }
1203             throw new IllegalByDtdException(elementName, attributeName, null);
1204         }
1205         return attribute.attributeStatus;
1206     }
1207 
1208     // The default is a map comparator, which compares numbers as numbers, and strings with UCA
1209     private static MapComparator<String> valueOrdering = new MapComparator<String>().setErrorOnMissing(false).freeze();
1210 
1211     static MapComparator<String> dayValueOrder = new MapComparator<String>().add(
1212         "sun", "mon", "tue", "wed", "thu", "fri", "sat").freeze();
1213     static MapComparator<String> dayPeriodOrder = new MapComparator<String>().add(
1214         "midnight", "am", "noon", "pm",
1215         "morning1", "morning2", "afternoon1", "afternoon2", "evening1", "evening2", "night1", "night2",
1216         // The ones on the following line are no longer used actively. Can be removed later?
1217         "earlyMorning", "morning", "midDay", "afternoon", "evening", "night", "weeHours").freeze();
1218     static MapComparator<String> listPatternOrder = new MapComparator<String>().add(
1219         "start", "middle", "end", "2", "3").freeze();
1220     static MapComparator<String> widthOrder = new MapComparator<String>().add(
1221         "abbreviated", "narrow", "short", "wide", "all").freeze();
1222     static MapComparator<String> lengthOrder = new MapComparator<String>().add(
1223         "full", "long", "medium", "short").freeze();
1224     static MapComparator<String> dateFieldOrder = new MapComparator<String>().add(
1225         "era", "era-short", "era-narrow",
1226         "year", "year-short", "year-narrow",
1227         "quarter", "quarter-short", "quarter-narrow",
1228         "month", "month-short", "month-narrow",
1229         "week", "week-short", "week-narrow",
1230         "weekOfMonth", "weekOfMonth-short", "weekOfMonth-narrow",
1231         "day", "day-short", "day-narrow",
1232         "dayOfYear", "dayOfYear-short", "dayOfYear-narrow",
1233         "weekday", "weekday-short", "weekday-narrow",
1234         "weekdayOfMonth", "weekdayOfMonth-short", "weekdayOfMonth-narrow",
1235         "sun", "sun-short", "sun-narrow",
1236         "mon", "mon-short", "mon-narrow",
1237         "tue", "tue-short", "tue-narrow",
1238         "wed", "wed-short", "wed-narrow",
1239         "thu", "thu-short", "thu-narrow",
1240         "fri", "fri-short", "fri-narrow",
1241         "sat", "sat-short", "sat-narrow",
1242         "dayperiod-short", "dayperiod", "dayperiod-narrow",
1243         "hour", "hour-short", "hour-narrow",
1244         "minute", "minute-short", "minute-narrow",
1245         "second", "second-short", "second-narrow",
1246         "zone", "zone-short", "zone-narrow").freeze();
1247     static MapComparator<String> unitOrder = new MapComparator<String>().add(
1248         "acceleration-g-force", "acceleration-meter-per-second-squared",
1249         "angle-revolution", "angle-radian", "angle-degree", "angle-arc-minute", "angle-arc-second",
1250         "area-square-kilometer", "area-hectare", "area-square-meter", "area-square-centimeter",
1251         "area-square-mile", "area-acre", "area-square-yard", "area-square-foot", "area-square-inch",
1252         "area-dunam",
1253         "concentr-karat",
1254         "concentr-milligram-per-deciliter", "concentr-millimole-per-liter",
1255         "concentr-part-per-million", "concentr-percent", "concentr-permille", "concentr-permyriad",
1256         "concentr-mole",
1257         "consumption-liter-per-kilometer", "consumption-liter-per-100kilometers",
1258         "consumption-mile-per-gallon", "consumption-mile-per-gallon-imperial",
1259         "digital-petabyte", "digital-terabyte", "digital-terabit", "digital-gigabyte", "digital-gigabit",
1260         "digital-megabyte", "digital-megabit", "digital-kilobyte", "digital-kilobit",
1261         "digital-byte", "digital-bit",
1262         "duration-century", "duration-decade",
1263         "duration-year", "duration-year-person",
1264         "duration-month", "duration-month-person",
1265         "duration-week", "duration-week-person",
1266         "duration-day", "duration-day-person",
1267         "duration-hour", "duration-minute", "duration-second",
1268         "duration-millisecond", "duration-microsecond", "duration-nanosecond",
1269         "electric-ampere", "electric-milliampere", "electric-ohm", "electric-volt",
1270         "energy-kilocalorie", "energy-calorie", "energy-foodcalorie", "energy-kilojoule", "energy-joule", "energy-kilowatt-hour",
1271         "energy-electronvolt",
1272         "energy-british-thermal-unit",
1273         "energy-therm-us",
1274         "force-pound-force",
1275         "force-newton",
1276         "frequency-gigahertz", "frequency-megahertz", "frequency-kilohertz", "frequency-hertz",
1277         "graphics-em", "graphics-pixel", "graphics-megapixel",
1278         "graphics-pixel-per-centimeter", "graphics-pixel-per-inch",
1279         "graphics-dot-per-centimeter", "graphics-dot-per-inch",
1280         "length-kilometer", "length-meter", "length-decimeter", "length-centimeter",
1281         "length-millimeter", "length-micrometer", "length-nanometer", "length-picometer",
1282         "length-mile", "length-yard", "length-foot", "length-inch",
1283         "length-parsec", "length-light-year", "length-astronomical-unit",
1284         "length-furlong", "length-fathom",
1285         "length-nautical-mile", "length-mile-scandinavian",
1286         "length-point",
1287         "length-solar-radius",
1288         "light-lux",
1289         "light-solar-luminosity",
1290         "mass-metric-ton", "mass-kilogram", "mass-gram", "mass-milligram", "mass-microgram",
1291         "mass-ton", "mass-stone", "mass-pound", "mass-ounce",
1292         "mass-ounce-troy", "mass-carat",
1293         "mass-dalton",
1294         "mass-earth-mass",
1295         "mass-solar-mass",
1296         "power-gigawatt", "power-megawatt", "power-kilowatt", "power-watt", "power-milliwatt",
1297         "power-horsepower",
1298         "pressure-millimeter-of-mercury",
1299         "pressure-pound-per-square-inch", "pressure-inch-hg", "pressure-bar", "pressure-millibar", "pressure-atmosphere",
1300         "pressure-pascal",
1301         "pressure-hectopascal",
1302         "pressure-kilopascal",
1303         "pressure-megapascal",
1304         "speed-kilometer-per-hour", "speed-meter-per-second", "speed-mile-per-hour", "speed-knot",
1305         "temperature-generic", "temperature-celsius", "temperature-fahrenheit", "temperature-kelvin",
1306         "torque-pound-foot",
1307         "torque-newton-meter",
1308         "volume-cubic-kilometer", "volume-cubic-meter", "volume-cubic-centimeter",
1309         "volume-cubic-mile", "volume-cubic-yard", "volume-cubic-foot", "volume-cubic-inch",
1310         "volume-megaliter", "volume-hectoliter", "volume-liter", "volume-deciliter", "volume-centiliter", "volume-milliliter",
1311         "volume-pint-metric", "volume-cup-metric",
1312         "volume-acre-foot",
1313         "volume-bushel", "volume-gallon", "volume-gallon-imperial", "volume-quart", "volume-pint", "volume-cup",
1314         "volume-fluid-ounce", "volume-fluid-ounce-imperial", "volume-tablespoon", "volume-teaspoon",
1315         "volume-barrel").freeze();
1316 
1317     static MapComparator<String> countValueOrder = new MapComparator<String>().add(
1318         "0", "1", "zero", "one", "two", "few", "many", "other").freeze();
1319     static MapComparator<String> unitLengthOrder = new MapComparator<String>().add(
1320         "long", "short", "narrow").freeze();
1321     static MapComparator<String> currencyFormatOrder = new MapComparator<String>().add(
1322         "standard", "accounting").freeze();
1323     static Comparator<String> zoneOrder = StandardCodes.make().getTZIDComparator();
1324 
1325     static final Comparator<String> COMP = (Comparator) CLDRConfig.getInstance().getCollator();
1326 
1327     // Hack for US
1328     static final Comparator<String> UNICODE_SET_COMPARATOR = new Comparator<String>() {
1329         @Override
1330         public int compare(String o1, String o2) {
1331             if (o1.contains("{")) {
1332                 o1 = o1.replace("{", "");
1333             }
1334             if (o2.contains("{")) {
1335                 o2 = o2.replace("{", "");
1336             }
1337             return COMP.compare(o1, o2);
1338         }
1339 
1340     };
1341 
getAttributeValueComparator(String element, String attribute)1342     public static Comparator<String> getAttributeValueComparator(String element, String attribute) {
1343         return getAttributeValueComparator(DtdType.ldml, element, attribute);
1344     }
1345 
getAttributeValueComparator(DtdType type, String element, String attribute)1346     static Comparator<String> getAttributeValueComparator(DtdType type, String element, String attribute) {
1347         // The default is a map comparator, which compares numbers as numbers, and strings with UCA
1348         Comparator<String> comp = valueOrdering;
1349         if (type != DtdType.ldml && type != DtdType.ldmlICU) {
1350             return comp;
1351         }
1352         if (attribute.equals("day")) { // && (element.startsWith("weekend")
1353             comp = dayValueOrder;
1354         } else if (attribute.equals("type")) {
1355             if (element.endsWith("FormatLength")) {
1356                 comp = lengthOrder;
1357             } else if (element.endsWith("Width")) {
1358                 comp = widthOrder;
1359             } else if (element.equals("day")) {
1360                 comp = dayValueOrder;
1361             } else if (element.equals("field")) {
1362                 comp = dateFieldOrder;
1363             } else if (element.equals("zone")) {
1364                 comp = zoneOrder;
1365             } else if (element.equals("listPatternPart")) {
1366                 comp = listPatternOrder;
1367             } else if (element.equals("currencyFormat")) {
1368                 comp = currencyFormatOrder;
1369             } else if (element.equals("unitLength")) {
1370                 comp = unitLengthOrder;
1371             } else if (element.equals("unit")) {
1372                 comp = unitOrder;
1373             } else if (element.equals("dayPeriod")) {
1374                 comp = dayPeriodOrder;
1375             }
1376         } else if (attribute.equals("count") && !element.equals("minDays")) {
1377             comp = countValueOrder;
1378         } else if (attribute.equals("cp") && element.equals("annotation")) {
1379             comp = UNICODE_SET_COMPARATOR;
1380         }
1381         return comp;
1382     }
1383 
1384     /**
1385      * Comparator for attributes in CLDR files
1386      */
1387     private static AttributeValueComparator ldmlAvc = new AttributeValueComparator() {
1388         @Override
1389         public int compare(String element, String attribute, String value1, String value2) {
1390             Comparator<String> comp = getAttributeValueComparator(element, attribute);
1391             return comp.compare(value1, value2);
1392         }
1393     };
1394 
hasValue(String elementName)1395     public boolean hasValue(String elementName) {
1396         return nameToElement.get(elementName).type == ElementType.PCDATA;
1397     }
1398 
isMetadata(XPathParts pathPlain)1399     public boolean isMetadata(XPathParts pathPlain) {
1400         for (String s : pathPlain.getElements()) {
1401             Element e = getElementFromName().get(s);
1402             if (e.elementStatus == ElementStatus.metadata) {
1403                 return true;
1404             }
1405         }
1406         return false;
1407     }
1408 
isMetadataOld(DtdType dtdType2, XPathParts pathPlain)1409     public static boolean isMetadataOld(DtdType dtdType2, XPathParts pathPlain) {
1410         // TODO Don't use hard-coded list; instead add to DTD annotations
1411         final String element1 = pathPlain.getElement(1);
1412         final String element2 = pathPlain.getElement(2);
1413         final String elementN = pathPlain.getElement(-1);
1414         switch (dtdType2) {
1415         case ldml:
1416             switch (element1) {
1417             case "generation":
1418             case "metadata":
1419                 return true;
1420             }
1421             break;
1422         case ldmlBCP47:
1423             switch (element1) {
1424             case "generation":
1425             case "version":
1426                 return true;
1427             }
1428             break;
1429             ////supplementalData/transforms/transform[@source="am"][@target="am_FONIPA"][@direction="forward"]/comment
1430         case supplementalData:
1431             // these are NOT under /metadata/ but are actually metadata
1432             switch (element1) {
1433             case "generation":
1434             case "version":
1435             case "validity":
1436             case "references":
1437             case "coverageLevels":
1438                 return true;
1439             case "transforms":
1440                 return elementN.equals("comment");
1441             case "metadata":
1442                 // these ARE under /metadata/, but many others under /metadata/ are NOT actually metadata.
1443                 switch (element2) {
1444                 case "validity":
1445                 case "serialElements":
1446                 case "suppress":
1447                 case "distinguishing":
1448                 case "blocking":
1449                 case "casingData":
1450                     return true;
1451                 }
1452                 break;
1453             }
1454             break;
1455         default:
1456         }
1457         return false;
1458     }
1459 
isDeprecated(XPathParts pathPlain)1460     public boolean isDeprecated(XPathParts pathPlain) {
1461         for (int i = 0; i < pathPlain.size(); ++i) {
1462             String elementName = pathPlain.getElement(i);
1463             if (isDeprecated(elementName, "*", null)) {
1464                 return true;
1465             }
1466             for (String attribute : pathPlain.getAttributeKeys(i)) {
1467                 String attributeValue = pathPlain.getAttributeValue(i, attribute);
1468                 if (isDeprecated(elementName, attribute, attributeValue)) {
1469                     return true;
1470                 }
1471             }
1472         }
1473         return false;
1474     }
1475 
1476     public final static Splitter SPACE_SPLITTER = Splitter.on(CharMatcher.whitespace()).trimResults().omitEmptyStrings();
1477     public final static Splitter BAR_SPLITTER = Splitter.on('|').trimResults().omitEmptyStrings();
1478     public final static Splitter CR_SPLITTER = Splitter.on(CharMatcher.anyOf("\n\r")).trimResults().omitEmptyStrings();
1479 
1480     private static class XPathPartsSet {
1481         private final Set<XPathParts> list = new LinkedHashSet<>();
1482 
addElement(String element)1483         private void addElement(String element) {
1484             if (list.isEmpty()) {
1485                 list.add(new XPathParts().addElement(element));
1486             } else {
1487                 for (XPathParts item : list) {
1488                     item.addElement(element);
1489                 }
1490             }
1491         }
1492 
addAttribute(String attribute, String attributeValue)1493         private void addAttribute(String attribute, String attributeValue) {
1494             for (XPathParts item : list) {
1495                 item.addAttribute(attribute, attributeValue);
1496             }
1497         }
1498 
setElement(int i, String string)1499         private void setElement(int i, String string) {
1500             for (XPathParts item : list) {
1501                 item.setElement(i, string);
1502             }
1503         }
1504 
addAttributes(String attribute, List<String> attributeValues)1505         private void addAttributes(String attribute, List<String> attributeValues) {
1506             if (attributeValues.size() == 1) {
1507                 addAttribute(attribute, attributeValues.iterator().next());
1508             } else {
1509                 // duplicate all the items in the list with the given values
1510                 Set<XPathParts> newList = new LinkedHashSet<>();
1511                 for (XPathParts item : list) {
1512                     for (String attributeValue : attributeValues) {
1513                         XPathParts newItem = item.cloneAsThawed();
1514                         newItem.addAttribute(attribute, attributeValue);
1515                         newList.add(newItem);
1516                     }
1517                 }
1518                 list.clear();
1519                 list.addAll(newList);
1520             }
1521         }
1522 
toStrings()1523         private ImmutableSet<String> toStrings() {
1524             Builder<String> result = new ImmutableSet.Builder<>();
1525 
1526             for (XPathParts item : list) {
1527                 result.add(item.toString());
1528             }
1529             return result.build();
1530         }
1531 
1532         @Override
toString()1533         public String toString() {
1534             return list.toString();
1535         }
1536     }
1537 
getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras)1538     public Set<String> getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras) {
1539         extras.clear();
1540         Map<String, String> valueAttributes = new HashMap<>();
1541         XPathPartsSet pathResult = new XPathPartsSet();
1542         String element = null;
1543         for (int i = 0; i < pathPlain.size(); ++i) {
1544             element = pathPlain.getElement(i);
1545             pathResult.addElement(element);
1546             valueAttributes.clear();
1547             for (String attribute : pathPlain.getAttributeKeys(i)) {
1548                 AttributeStatus status = getAttributeStatus(element, attribute);
1549                 final String attributeValue = pathPlain.getAttributeValue(i, attribute);
1550                 switch (status) {
1551                 case distinguished:
1552                     AttributeType attrType = getAttributeType(element, attribute);
1553                     if (attrType == AttributeType.NMTOKENS) {
1554                         pathResult.addAttributes(attribute, SPACE_SPLITTER.splitToList(attributeValue));
1555                     } else {
1556                         pathResult.addAttribute(attribute, attributeValue);
1557                     }
1558                     break;
1559                 case value:
1560                     valueAttributes.put(attribute, attributeValue);
1561                     break;
1562                 case metadata:
1563                     break;
1564                 }
1565             }
1566             if (!valueAttributes.isEmpty()) {
1567                 boolean hasValue = hasValue(element);
1568                 // if it doesn't have a value, we construct new child elements, with _ prefix
1569                 // if it does have a value, we have to play a further trick, since
1570                 // we can't have a value and child elements at the same level.
1571                 // So we use a _ suffix on the element.
1572                 if (hasValue) {
1573                     pathResult.setElement(i, element + "_");
1574                 } else {
1575                     int debug = 0;
1576                 }
1577                 for (Entry<String, String> attributeAndValue : valueAttributes.entrySet()) {
1578                     final String attribute = attributeAndValue.getKey();
1579                     final String attributeValue = attributeAndValue.getValue();
1580 
1581                     Set<String> pathsShort = pathResult.toStrings();
1582                     AttributeType attrType = getAttributeType(element, attribute);
1583                     for (String pathShort : pathsShort) {
1584                         pathShort += "/_" + attribute;
1585                         if (attrType == AttributeType.NMTOKENS) {
1586                             for (String valuePart : SPACE_SPLITTER.split(attributeValue)) {
1587                                 extras.put(pathShort, valuePart);
1588                             }
1589                         } else {
1590                             extras.put(pathShort, attributeValue);
1591                         }
1592                     }
1593                 }
1594                 if (hasValue) {
1595                     pathResult.setElement(i, element); // restore
1596                 }
1597             }
1598         }
1599         // Only add the path if it could have a value, looking at the last element
1600         if (!hasValue(element)) {
1601             return null;
1602         }
1603         return pathResult.toStrings();
1604     }
1605 
getAttributeType(String elementName, String attributeName)1606     public AttributeType getAttributeType(String elementName, String attributeName) {
1607         Attribute attr = getAttribute(elementName, attributeName);
1608         return (attr != null) ? attr.type : null;
1609     }
1610 
getAttribute(String elementName, String attributeName)1611     public Attribute getAttribute(String elementName, String attributeName) {
1612         Element element = nameToElement.get(elementName);
1613         return (element != null) ? element.getAttributeNamed(attributeName) : null;
1614     }
1615 
1616     // TODO: add support for following to DTD annotations, and rework API
1617 
1618     static final Set<String> SPACED_VALUES = ImmutableSet.of(
1619         "idValidity",
1620         "languageGroup");
1621 
getValueSplitter(XPathParts pathPlain)1622     public static Splitter getValueSplitter(XPathParts pathPlain) {
1623         if (!Collections.disjoint(pathPlain.getElements(), SPACED_VALUES)) {
1624             return SPACE_SPLITTER;
1625         } else if (pathPlain.getElement(-1).equals("annotation")
1626             && !pathPlain.getAttributeKeys(-1).contains("tts")) {
1627             return BAR_SPLITTER;
1628         }
1629         return CR_SPLITTER;
1630     }
1631 
isComment(XPathParts pathPlain, String line)1632     public static boolean isComment(XPathParts pathPlain, String line) {
1633         if (pathPlain.contains("transform")) {
1634             if (line.startsWith("#")) {
1635                 return true;
1636             }
1637         }
1638         return false;
1639     }
1640 
isExtraSplit(String extraPath)1641     public static boolean isExtraSplit(String extraPath) {
1642         if (extraPath.endsWith("/_type") && extraPath.startsWith("//supplementalData/metaZones/mapTimezones")) {
1643             return true;
1644         }
1645         return false;
1646     }
1647 
1648     /**
1649      * Return the value status for an EAV
1650      */
getValueStatus(String elementName, String attributeName, String value)1651     public ValueStatus getValueStatus(String elementName, String attributeName, String value) {
1652         Element element = nameToElement.get(elementName);
1653         if (element == null) {
1654             return ValueStatus.invalid;
1655         }
1656         Attribute attr = element.getAttributeNamed(attributeName);
1657         if (attr == null) {
1658             return ValueStatus.invalid;
1659         }
1660         return attr.getValueStatus(value);
1661     }
1662 
1663     /**
1664      * Return element-attribute pairs with non-enumerated values, for quick checks.
1665      */
getNonEnumerated(Map<String,String> matchValues)1666     public Multimap<String, String> getNonEnumerated(Map<String,String> matchValues) {
1667         Multimap<String,String> nonEnumeratedElementToAttribute = TreeMultimap.create(); // make tree for ease of debugging
1668         for (Entry<String, Element> entry : nameToElement.entrySet()) {
1669             Element element = entry.getValue();
1670             for (Attribute attribute : element.attributes.keySet()) {
1671                 if (attribute.type != AttributeType.ENUMERATED_TYPE) {
1672                     String elementName = element.getName();
1673                     String attrName = attribute.getName();
1674                     nonEnumeratedElementToAttribute.put(elementName, attrName);
1675                     if (attribute.matchValue != null) {
1676                         matchValues.put(elementName + "\t" + attrName, attribute.matchValue.getName());
1677                     }
1678                 }
1679             }
1680         }
1681         return ImmutableSetMultimap.copyOf(nonEnumeratedElementToAttribute);
1682     }
1683 }
1684