• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.util;
2 
3 import java.io.File;
4 import java.io.StringReader;
5 import java.util.ArrayList;
6 import java.util.Arrays;
7 import java.util.Collection;
8 import java.util.Collections;
9 import java.util.Comparator;
10 import java.util.HashMap;
11 import java.util.HashSet;
12 import java.util.Iterator;
13 import java.util.LinkedHashMap;
14 import java.util.LinkedHashSet;
15 import java.util.List;
16 import java.util.Locale;
17 import java.util.Map;
18 import java.util.Map.Entry;
19 import java.util.Set;
20 import java.util.TreeMap;
21 import java.util.concurrent.ConcurrentHashMap;
22 import java.util.concurrent.ConcurrentMap;
23 import java.util.regex.Pattern;
24 
25 import com.google.common.base.CharMatcher;
26 import com.google.common.base.Joiner;
27 import com.google.common.base.Splitter;
28 import com.google.common.collect.ImmutableSet;
29 import com.google.common.collect.ImmutableSet.Builder;
30 import com.google.common.collect.ImmutableSetMultimap;
31 import com.google.common.collect.Multimap;
32 import com.google.common.collect.TreeMultimap;
33 import com.ibm.icu.impl.Relation;
34 import com.ibm.icu.text.Transform;
35 
36 /**
37  * An immutable object that contains the structure of a DTD.
38  * @author markdavis
39  */
40 public class DtdData extends XMLFileReader.SimpleHandler {
41     private static final String COMMENT_PREFIX = System.lineSeparator() + "    ";
42     private static final boolean SHOW_ALL = CldrUtility.getProperty("show_all", false);
43     private static final boolean USE_SYNTHESIZED = false;
44 
45     private static final boolean DEBUG = false;
46     private static final Pattern FILLER = PatternCache.get("[^-a-zA-Z0-9#_:]");
47 
48     private final Relation<String, Attribute> nameToAttributes = Relation.of(new TreeMap<String, Set<Attribute>>(), LinkedHashSet.class);
49     private Map<String, Element> nameToElement = new HashMap<>();
50     private MapComparator<String> elementComparator;
51     private MapComparator<String> attributeComparator;
52 
53     public final Element ROOT;
54     public final Element PCDATA = elementFrom("#PCDATA");
55     public final Element ANY = elementFrom("ANY");
56     public final DtdType dtdType;
57     public final String version;
58     private Element lastElement;
59     private Attribute lastAttribute;
60     private Set<String> preCommentCache;
61     private DtdComparator dtdComparator;
62 
63     public enum AttributeStatus {
64         distinguished ("§d"),
65         value ("§v"),
66         metadata ("§m︎");
67         public final String shortName;
AttributeStatus(String shortName)68         AttributeStatus(String shortName) {
69             this.shortName = shortName;
70         }
getShortName(AttributeStatus status)71         public static String getShortName(AttributeStatus status) {
72             return status == null ? "" : status.shortName;
73         }
74     }
75 
76     public enum Mode {
77         REQUIRED("#REQUIRED"), OPTIONAL("#IMPLIED"), FIXED("#FIXED"), NULL("null");
78 
79         public final String source;
80 
Mode(String s)81         Mode(String s) {
82             source = s;
83         }
84 
forString(String mode)85         public static Mode forString(String mode) {
86             for (Mode value : Mode.values()) {
87                 if (value.source.equals(mode)) {
88                     return value;
89                 }
90             }
91             if (mode == null) {
92                 return NULL;
93             }
94             throw new IllegalArgumentException(mode);
95         }
96     }
97 
98     public enum AttributeType {
99         CDATA, ID, IDREF, IDREFS, ENTITY, ENTITIES, NMTOKEN, NMTOKENS, ENUMERATED_TYPE
100     }
101 
102     static final Set<String> DRAFT_ON_NON_LEAF_ALLOWED = ImmutableSet.of("collation", "transform", "unitPreferenceData", "rulesetGrouping");
103 
104     public static class Attribute implements Named {
105         private static final Joiner JOINER_COMMA_SPACE = Joiner.on(", ");
106         public static final String AUG_TRAIL = "⟫";
107         public static final String AUG_LEAD = "⟪";
108         public static final String ENUM_TRAIL = "⟩";
109         public static final String ENUM_LEAD = "⟨";
110         public static final Pattern LEAD_TRAIL = Pattern.compile("(.*[" + AUG_LEAD + ENUM_LEAD + "])(.*)([" + AUG_TRAIL + ENUM_TRAIL + "].*)");
111         public final String name;
112         public final Element element;
113         public final Mode mode;
114         public final String defaultValue;
115         public final AttributeType type;
116         public final Map<String, Integer> values;
117         private final Set<String> commentsPre;
118         private Set<String> commentsPost;
119         private boolean isDeprecatedAttribute;
120         public AttributeStatus attributeStatus = AttributeStatus.distinguished; // default unless reset by annotations, or for xml: attributes
121         private Set<String> deprecatedValues = Collections.emptySet();
122         public MatchValue matchValue;
123         private final Comparator<String> attributeValueComparator;
124 
Attribute(DtdType dtdType, Element element2, String aName, Mode mode2, String[] split, String value2, Set<String> firstComment)125         private Attribute(DtdType dtdType, Element element2, String aName, Mode mode2, String[] split, String value2, Set<String> firstComment) {
126             commentsPre = firstComment;
127             element = element2;
128             name = aName.intern();
129             if (name.equals("draft") // normally never permitted on elements with children, but special cases...
130                 && !DRAFT_ON_NON_LEAF_ALLOWED.contains(element.getName())) {
131                 int elementChildrenCount = element.getChildren().size();
132                 if (elementChildrenCount > 1
133                     || elementChildrenCount == 1 && !element.getChildren().keySet().iterator().next().getName().equals("cp")) {
134                     isDeprecatedAttribute = true;
135                     if (DEBUG) {
136                         System.out.println(element.getName() + ":" + element.getChildren());
137                     }
138                 }
139             } else if (name.startsWith("xml:")) {
140                 attributeStatus = AttributeStatus.metadata;
141             }
142             mode = mode2;
143             defaultValue = value2 == null ? null
144                 : value2.intern();
145             AttributeType _type = AttributeType.ENUMERATED_TYPE;
146             Map<String, Integer> _values = Collections.emptyMap();
147             if (split.length == 1) {
148                 try {
149                     _type = AttributeType.valueOf(split[0]);
150                 } catch (Exception e) {
151                 }
152             }
153             type = _type;
154 
155             if (_type == AttributeType.ENUMERATED_TYPE) {
156                 LinkedHashMap<String, Integer> temp = new LinkedHashMap<>();
157                 for (String part : split) {
158                     if (part.length() != 0) {
159                         temp.put(part.intern(), temp.size());
160                     }
161                 }
162                 _values = Collections.unmodifiableMap(temp);
163             }
164             values = _values;
165             attributeValueComparator = getAttributeValueComparator(dtdType, element.name, name);
166         }
167 
168         @Override
toString()169         public String toString() {
170             return element.name + ":" + name;
171         }
172 
getSampleValue()173         public String getSampleValue() {
174             return type == AttributeType.ENUMERATED_TYPE  ? (values.containsKey("year") ? "year" : values.keySet().iterator().next())
175                 : matchValue != null ? matchValue.getSample()
176                     : MatchValue.DEFAULT_SAMPLE;
177         }
178 
appendDtdString(StringBuilder b)179         public StringBuilder appendDtdString(StringBuilder b) {
180             Attribute a = this;
181             b.append("<!ATTLIST " + element.name + " " + a.name);
182             boolean first;
183             if (a.type == AttributeType.ENUMERATED_TYPE) {
184                 b.append(" (");
185                 first = true;
186                 for (String s : a.values.keySet()) {
187                     if (deprecatedValues.contains(s)) {
188                         continue;
189                     }
190                     if (first) {
191                         first = false;
192                     } else {
193                         b.append(" | ");
194                     }
195                     b.append(s);
196                 }
197                 b.append(")");
198             } else {
199                 b.append(' ').append(a.type);
200             }
201             if (a.mode != Mode.NULL) {
202                 b.append(" ").append(a.mode.source);
203             }
204             if (a.defaultValue != null) {
205                 b.append(" \"").append(a.defaultValue).append('"');
206             }
207             b.append(" >");
208             return b;
209         }
210 
features()211         public String features() {
212             return (type == AttributeType.ENUMERATED_TYPE ? values.keySet().toString() : type.toString())
213                 + (mode == Mode.NULL ? "" : ", mode=" + mode)
214                 + (defaultValue == null ? "" : ", default=" + defaultValue);
215         }
216 
217         @Override
getName()218         public String getName() {
219             return name;
220         }
221 
222         private static Splitter COMMA = Splitter.on(',').trimResults();
223 
addComment(String commentIn)224         public void addComment(String commentIn) {
225             if (commentIn.startsWith("@")) {
226                 switch (commentIn) {
227                 case "@METADATA":
228                     attributeStatus = AttributeStatus.metadata;
229                     break;
230                 case "@VALUE":
231                     attributeStatus = AttributeStatus.value;
232                     break;
233                 case "@DEPRECATED":
234                     isDeprecatedAttribute = true;
235                     break;
236                 default:
237                     int colonPos = commentIn.indexOf(':');
238                     if (colonPos < 0) {
239                         throw new IllegalArgumentException(element.name + " " + name +
240                             "= : Unrecognized ATTLIST annotation: " + commentIn);
241                     }
242                     String command = commentIn.substring(0, colonPos);
243                     String argument = commentIn.substring(colonPos + 1);
244                     switch(command) {
245                     case "@DEPRECATED":
246                         deprecatedValues = Collections.unmodifiableSet(new HashSet<>(COMMA.splitToList(argument)));
247                         break;
248                     case "@MATCH":
249                         if (matchValue != null) {
250                             throw new IllegalArgumentException(element.name + " " + name +
251                                 "= : Conflicting @MATCH: " + matchValue.getName() + " & " + argument);
252                         }
253                         matchValue = MatchValue.of(argument);
254                         break;
255                     default:
256                         throw new IllegalArgumentException(element.name + " " + name +
257                             "= : Unrecognized ATTLIST annotation: " + commentIn);
258                     }
259                 }
260                 return;
261             }
262             commentsPost = addUnmodifiable(commentsPost, commentIn.trim());
263         }
264 
265         /**
266          * Special version of identity; only considers name and name of element
267          */
268         @Override
equals(Object obj)269         public boolean equals(Object obj) {
270             if (!(obj instanceof Attribute)) {
271                 return false;
272             }
273             Attribute that = (Attribute) obj;
274             return name.equals(that.name)
275                 && element.name.equals(that.element.name) // don't use plain element: circularity
276                 // not relevant to identity
277                 //                && Objects.equals(comment, that.comment)
278                 //                && mode.equals(that.mode)
279                 //                && Objects.equals(defaultValue, that.defaultValue)
280                 //                && type.equals(that.type)
281                 //                && values.equals(that.values)
282                 ;
283         }
284 
285         /**
286          * Special version of identity; only considers name and name of element
287          */
288         @Override
hashCode()289         public int hashCode() {
290             return name.hashCode() * 37
291                 + element.name.hashCode() // don't use plain element: circularity
292                 // not relevant to identity
293                 //                ) * 37 + Objects.hashCode(comment)) * 37
294                 //                + mode.hashCode()) * 37
295                 //                + Objects.hashCode(defaultValue)) * 37
296                 //                + type.hashCode()) * 37
297                 //                + values.hashCode()
298                 ;
299         }
300 
isDeprecated()301         public boolean isDeprecated() {
302             return isDeprecatedAttribute;
303         }
304 
isDeprecatedValue(String value)305         public boolean isDeprecatedValue(String value) {
306             return deprecatedValues.contains(value);
307         }
308 
getStatus()309         public AttributeStatus getStatus() {
310             return attributeStatus;
311         }
312 
getValueStatus(String value)313         public ValueStatus getValueStatus(String value) {
314             return deprecatedValues.contains(value) ? ValueStatus.invalid
315                 : type == AttributeType.ENUMERATED_TYPE  ? (values.containsKey(value) ? ValueStatus.valid  : ValueStatus.invalid)
316                     : matchValue == null ? ValueStatus.unknown
317                         : matchValue.is(value) ? ValueStatus.valid
318                             : ValueStatus.invalid;
319         }
320 
getMatchString()321         public String getMatchString() {
322             return type == AttributeType.ENUMERATED_TYPE ? ENUM_LEAD + JOINER_COMMA_SPACE.join(values.keySet()) + ENUM_TRAIL
323                 : matchValue != null ? AUG_LEAD + matchValue.toString() + AUG_TRAIL
324                     : "";
325         }
326 
getMatchingName(Map<Attribute, Integer> attributes)327         public Attribute getMatchingName(Map<Attribute, Integer> attributes) {
328             for (Attribute attribute : attributes.keySet()) {
329                 if (name.equals(attribute.getName())) {
330                     return attribute;
331                 }
332             }
333             return null;
334         }
335 
336     }
337 
338     public enum ValueStatus {invalid, unknown, valid}
339 
DtdData(DtdType type, String version)340     private DtdData(DtdType type, String version) {
341         this.dtdType = type;
342         this.ROOT = elementFrom(type.rootType.toString());
343         this.version = version;
344     }
345 
addAttribute(String eName, String aName, String type, String mode, String value)346     private void addAttribute(String eName, String aName, String type, String mode, String value) {
347         Attribute a = new Attribute(dtdType, nameToElement.get(eName), aName, Mode.forString(mode), FILLER.split(type), value, preCommentCache);
348         preCommentCache = null;
349         getAttributesFromName().put(aName, a);
350         CldrUtility.putNew(a.element.attributes, a, a.element.attributes.size());
351         lastElement = null;
352         lastAttribute = a;
353     }
354 
355     public enum ElementType {
356         EMPTY, ANY, PCDATA("(#PCDATA)"), CHILDREN;
357         public final String source;
358 
ElementType(String s)359         private ElementType(String s) {
360             source = s;
361         }
362 
ElementType()363         private ElementType() {
364             source = name();
365         }
366     }
367 
368     interface Named {
getName()369         String getName();
370     }
371 
372     public enum ElementStatus {
373         regular, metadata
374     }
375 
376     public static class Element implements Named {
377         public final String name;
378         private String rawModel;
379         private ElementType type;
380         private final Map<Element, Integer> children = new LinkedHashMap<>();
381         private final Map<Attribute, Integer> attributes = new LinkedHashMap<>();
382         private Set<String> commentsPre;
383         private Set<String> commentsPost;
384         private String model;
385         private boolean isOrderedElement;
386         private boolean isDeprecatedElement;
387         private boolean isTechPreviewElement;
388         private ElementStatus elementStatus = ElementStatus.regular;
389 
Element(String name2)390         private Element(String name2) {
391             name = name2.intern();
392         }
393 
setChildren(DtdData dtdData, String model, Set<String> precomments)394         private void setChildren(DtdData dtdData, String model, Set<String> precomments) {
395             this.commentsPre = precomments;
396             rawModel = model;
397             this.model = clean(model);
398             if (model.equals("EMPTY")) {
399                 type = ElementType.EMPTY;
400                 return;
401             }
402             type = ElementType.CHILDREN;
403             for (String part : FILLER.split(model)) {
404                 if (part.length() != 0) {
405                     if (part.equals("#PCDATA")) {
406                         type = ElementType.PCDATA;
407                     } else if (part.equals("ANY")) {
408                         type = ElementType.ANY;
409                     } else {
410                         CldrUtility.putNew(children, dtdData.elementFrom(part), children.size());
411                     }
412                 }
413             }
414             if ((type == ElementType.CHILDREN) == (children.size() == 0)
415                 && !model.startsWith("(#PCDATA|cp")) {
416                 throw new IllegalArgumentException("CLDR does not permit Mixed content. " + name + ":" + model);
417             }
418         }
419 
420         static final Pattern CLEANER1 = PatternCache.get("([,|(])(?=\\S)");
421         static final Pattern CLEANER2 = PatternCache.get("(?=\\S)([|)])");
422 
clean(String model2)423         private String clean(String model2) {
424             // (x) -> ( x );
425             // x,y -> x, y
426             // x|y -> x | y
427             String result = CLEANER1.matcher(model2).replaceAll("$1 ");
428             result = CLEANER2.matcher(result).replaceAll(" $1");
429             return result.equals(model2)
430                 ? model2
431                     : result; // for debugging
432         }
433 
containsAttribute(String string)434         public boolean containsAttribute(String string) {
435             for (Attribute a : attributes.keySet()) {
436                 if (a.name.equals(string)) {
437                     return true;
438                 }
439             }
440             return false;
441         }
442 
443         @Override
toString()444         public String toString() {
445             return name;
446         }
447 
toDtdString()448         public String toDtdString() {
449             return "<!ELEMENT " + name + " " + getRawModel() + " >";
450         }
451 
getType()452         public ElementType getType() {
453             return type;
454         }
455 
getChildren()456         public Map<Element, Integer> getChildren() {
457             return Collections.unmodifiableMap(children);
458         }
459 
getAttributes()460         public Map<Attribute, Integer> getAttributes() {
461             return Collections.unmodifiableMap(attributes);
462         }
463 
464         @Override
getName()465         public String getName() {
466             return name;
467         }
468 
getChildNamed(String string)469         public Element getChildNamed(String string) {
470             for (Element e : children.keySet()) {
471                 if (e.name.equals(string)) {
472                     return e;
473                 }
474             }
475             return null;
476         }
477 
getAttributeNamed(String string)478         public Attribute getAttributeNamed(String string) {
479             for (Attribute a : attributes.keySet()) {
480                 if (a.name.equals(string)) {
481                     return a;
482                 }
483             }
484             return null;
485         }
486 
addComment(String addition)487         public void addComment(String addition) {
488             if (addition.startsWith("@")) {
489                 // there are exactly 4 cases: deprecated, ordered, techPreview and metadata
490                 switch (addition) {
491                 case "@ORDERED":
492                     isOrderedElement = true;
493                     break;
494                 case "@DEPRECATED":
495                     isDeprecatedElement = true;
496                     break;
497                 case "@METADATA":
498                     elementStatus = ElementStatus.metadata;
499                     break;
500                 case "@TECHPREVIEW":
501                     isTechPreviewElement = true;
502                     break;
503                 default:
504                     if (addition.startsWith("@MATCH") ||
505                         addition.startsWith("@VALUE")) {
506                         // Try to catch this case
507                         throw new IllegalArgumentException(name +
508                             ": Unrecognized ELEMENT annotation (this isn't ATTLIST!): " +
509                             addition);
510                     } else {
511                         throw new IllegalArgumentException(name +
512                             ": Unrecognized ELEMENT annotation: " +
513                             addition);
514                     }
515                 }
516                 return;
517             }
518             commentsPost = addUnmodifiable(commentsPost, addition.trim());
519         }
520 
521         /**
522          * Special version of equals. Only the name is considered in the identity.
523          */
524         @Override
equals(Object obj)525         public boolean equals(Object obj) {
526             if (!(obj instanceof Element)) {
527                 return false;
528             }
529             Element that = (Element) obj;
530             return name.equals(that.name)
531                 // not relevant to the identity of the object
532                 //                && Objects.equals(comment, that.comment)
533                 //                && type == that.type
534                 //                && attributes.equals(that.attributes)
535                 //                && children.equals(that.children)
536                 ;
537         }
538 
539         /**
540          * Special version of hashcode. Only the name is considered in the identity.
541          */
542         @Override
hashCode()543         public int hashCode() {
544             return name.hashCode()
545                 // not relevant to the identity of the object
546                 // * 37 + Objects.hashCode(comment)
547                 //) * 37 + Objects.hashCode(type)
548                 //                ) * 37 + attributes.hashCode()
549                 //                ) * 37 + children.hashCode()
550                 ;
551         }
552 
isDeprecated()553         public boolean isDeprecated() {
554             return isDeprecatedElement;
555         }
556 
isOrdered()557         public boolean isOrdered() {
558             return isOrderedElement;
559         }
560 
isTechPreview()561         public boolean isTechPreview() {
562             return isTechPreviewElement;
563         }
564 
getElementStatus()565         public ElementStatus getElementStatus() {
566             return elementStatus;
567         }
568 
569         /**
570          * @return the rawModel
571          */
getRawModel()572         public String getRawModel() {
573             return rawModel;
574         }
575     }
576 
elementFrom(String name)577     private Element elementFrom(String name) {
578         Element result = nameToElement.get(name);
579         if (result == null) {
580             nameToElement.put(name, result = new Element(name));
581         }
582         return result;
583     }
584 
addElement(String name2, String model)585     private void addElement(String name2, String model) {
586         Element element = elementFrom(name2);
587         element.setChildren(this, model, preCommentCache);
588         preCommentCache = null;
589         lastElement = element;
590         lastAttribute = null;
591     }
592 
addComment(String comment)593     private void addComment(String comment) {
594         comment = comment.trim();
595         if (preCommentCache != null || comment.startsWith("#")) { // the precomments are "sticky"
596             if (comment.startsWith("@")) {
597                 throw new IllegalArgumentException("@ annotation comment must follow element or attribute, without intervening # comment");
598             }
599             preCommentCache = addUnmodifiable(preCommentCache, comment);
600         } else if (lastElement != null) {
601             lastElement.addComment(comment);
602         } else if (lastAttribute != null) {
603             lastAttribute.addComment(comment);
604         } else {
605             if (comment.startsWith("@")) {
606                 throw new IllegalArgumentException("@ annotation comment must follow element or attribute, without intervening # comment");
607             }
608             preCommentCache = addUnmodifiable(preCommentCache, comment);
609         }
610     }
611 
612     // TODO hide this
613     /**
614      * @deprecated
615      */
616     @Deprecated
617     @Override
handleElementDecl(String name, String model)618     public void handleElementDecl(String name, String model) {
619         if (SHOW_ALL) {
620             // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, listPatterns?, collations?, posix?, segmentations?, rbnf?, annotations?, metadata?, references?, special*))) >
621             System.out.println(System.lineSeparator() + "<!ELEMENT " + name + " " + model + " >");
622         }
623         addElement(name, model);
624     }
625 
626     // TODO hide this
627     /**
628      * @deprecated
629      */
630     @Deprecated
631     @Override
handleStartDtd(String name, String publicId, String systemId)632     public void handleStartDtd(String name, String publicId, String systemId) {
633         DtdType explicitDtdType = DtdType.valueOf(name);
634         if (explicitDtdType != dtdType && explicitDtdType != dtdType.rootType) {
635             throw new IllegalArgumentException("Mismatch in dtdTypes");
636         }
637     }
638 
639     /**
640      * @deprecated
641      */
642     @Deprecated
643     @Override
handleAttributeDecl(String eName, String aName, String type, String mode, String value)644     public void handleAttributeDecl(String eName, String aName, String type, String mode, String value) {
645         if (SHOW_ALL) {
646             // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false ) #IMPLIED >
647             // <!ATTLIST version number CDATA #REQUIRED >
648             // <!ATTLIST version cldrVersion CDATA #FIXED "27" >
649 
650             System.out.println("<!ATTLIST " + eName
651                 + " " + aName
652                 + " " + type
653                 + " " + mode
654                 + (value == null ? "" : " \"" + value + "\"")
655                 + " >");
656         }
657         // HACK for 1.1.1
658         if (eName.equals("draft")) {
659             eName = "week";
660         }
661         addAttribute(eName, aName, type, mode, value);
662     }
663 
664     /**
665      * @deprecated
666      */
667     @Deprecated
668     @Override
handleComment(String path, String comment)669     public void handleComment(String path, String comment) {
670         if (comment.contains("Copyright")) {
671             // Zap the copyright comment, replace it with the current one.
672             comment = CldrUtility.getCopyrightString();
673         }
674         if (SHOW_ALL) {
675             // <!-- true and false are deprecated. -->
676             System.out.println("<!-- " + comment.trim() + " -->");
677         }
678         addComment(comment);
679     }
680 
681     // TODO hide this
682     /**
683      * @deprecated
684      */
685     @Deprecated
686     @Override
handleEndDtd()687     public void handleEndDtd() {
688         throw new XMLFileReader.AbortException();
689     }
690 
691     /**
692      * Note that it always gets the trunk version
693      * @deprecated depends on static config, use {@link DtdData#getInstance(DtdType, File)} instead
694      */
695     @Deprecated
getInstance(DtdType type)696     public static DtdData getInstance(DtdType type) {
697         return getInstance(type, CLDRConfig.getInstance().getCldrBaseDirectory());
698     }
699 
700     /**
701      * Special form using version, used only by tests, etc.
702      */
getInstance(DtdType type, String version)703     public static DtdData getInstance(DtdType type, String version) {
704         // Map out versions that had no DTD
705         if (version != null) {
706             switch (version) {
707             case "1.1.1":
708                 version="1.1";
709                 break;
710             case "1.4.1":
711                 version="1.4";
712                 break;
713             case "1.5.1":
714                 version="1.5.0.1";
715                 break;
716             default:
717             }
718         }
719         File directory = version == null ? CLDRConfig.getInstance().getCldrBaseDirectory()
720             : new File(CLDRPaths.ARCHIVE_DIRECTORY + "/cldr-" + version);
721 
722         return getInstance(type, version, directory);
723     }
724 
725     private static final ConcurrentMap<Pair<DtdType, File>, DtdData> CACHE = new ConcurrentHashMap<>();
726 
727     /**
728      * Normal version of DtdData
729      * Get a DtdData, given the CLDR root directory.
730      * @param type which DtdType to return
731      * @param directory the CLDR Root directory, which contains the "common" directory.
732      * @return
733      */
getInstance(DtdType type, File directory)734     public static DtdData getInstance(DtdType type, File directory) {
735         Pair<DtdType, File> key = new Pair<>(type, directory);
736         DtdData data = CACHE.computeIfAbsent(key, k -> getInstance(type, null, directory));
737         return data;
738     }
739 
getInstance(DtdType type, String version, File directory)740     private static DtdData getInstance(DtdType type, String version, File directory) {
741         DtdData simpleHandler = new DtdData(type, version);
742         XMLFileReader xfr = new XMLFileReader().setHandler(simpleHandler);
743         if (type != type.rootType) {
744             // read the real first, then add onto it.
745             readFile(type.rootType, xfr, directory);
746         }
747         readFile(type, xfr, directory);
748         // HACK
749         if (type == DtdType.ldmlICU) {
750             Element special = simpleHandler.nameToElement.get("special");
751             for (String extraElementName : Arrays.asList(
752                 "icu:breakIteratorData",
753                 "icu:UCARules",
754                 "icu:scripts",
755                 "icu:transforms",
756                 "icu:ruleBasedNumberFormats",
757                 "icu:isLeapMonth",
758                 "icu:version",
759                 "icu:breakDictionaryData",
760                 "icu:depends")) {
761                 Element extraElement = simpleHandler.nameToElement.get(extraElementName);
762                 special.children.put(extraElement, special.children.size());
763             }
764         }
765         if (simpleHandler.ROOT.children.size() == 0) {
766             throw new IllegalArgumentException("Internal Error: DtdData.getInstance(" +
767                 type + ", ...): readFile() failed to return any children!");
768             // should never happen
769         }
770         simpleHandler.finish();
771         simpleHandler.freeze();
772         return simpleHandler;
773     }
774 
finish()775     private void finish() {
776         dtdComparator = new DtdComparator();
777     }
778 
readFile(DtdType type, XMLFileReader xfr, File directory)779     public static void readFile(DtdType type, XMLFileReader xfr, File directory) {
780         File file = new File(directory, type.dtdPath);
781         StringReader s = new StringReader("<?xml version='1.0' encoding='UTF-8' ?>"
782             + "<!DOCTYPE " + type
783             + " SYSTEM '" + file.getAbsolutePath() + "'>");
784         try {
785             xfr.read(type.toString(), s, -1, true); //  DTD_TYPE_TO_FILE.get(type)
786         } catch (IllegalArgumentException iae) {
787             // rethrow
788             throw new IllegalArgumentException("Error while reading " + type, iae);
789         }
790     }
791 
freeze()792     private void freeze() {
793         if (version == null) { // only generate for new versions
794             MergeLists<String> elementMergeList = new MergeLists<>();
795             elementMergeList.add(dtdType.toString());
796             MergeLists<String> attributeMergeList = new MergeLists<>();
797             attributeMergeList.add("_q");
798 
799             for (Element element : nameToElement.values()) {
800                 if (element.children.size() > 0) {
801                     Collection<String> names = getNames(element.children.keySet());
802                     elementMergeList.add(names);
803                     if (DEBUG) {
804                         System.out.println(element.getName() + "\t→\t" + names);
805                     }
806                 }
807                 if (element.attributes.size() > 0) {
808                     Collection<String> names = getNames(element.attributes.keySet());
809                     attributeMergeList.add(names);
810                     if (DEBUG) {
811                         System.out.println(element.getName() + "\t→\t@" + names);
812                     }
813                 }
814             }
815             List<String> elementList = elementMergeList.merge();
816             List<String> attributeList = attributeMergeList.merge();
817             if (DEBUG) {
818                 System.out.println("Element Ordering:\t" + elementList);
819                 System.out.println("Attribute Ordering:\t" + attributeList);
820             }
821             elementComparator = new MapComparator<>(elementList).setErrorOnMissing(true).freeze();
822             attributeComparator = new MapComparator<>(attributeList).setErrorOnMissing(true).freeze();
823         }
824         nameToAttributes.freeze();
825         nameToElement = Collections.unmodifiableMap(nameToElement);
826     }
827 
getNames(Collection<? extends Named> keySet)828     private Collection<String> getNames(Collection<? extends Named> keySet) {
829         List<String> result = new ArrayList<>();
830         for (Named e : keySet) {
831             result.add(e.getName());
832         }
833         return result;
834     }
835 
836     public enum DtdItem {
837         ELEMENT, ATTRIBUTE, ATTRIBUTE_VALUE
838     }
839 
840     public interface AttributeValueComparator {
compare(String element, String attribute, String value1, String value2)841         public int compare(String element, String attribute, String value1, String value2);
842     }
843 
getDtdComparator(AttributeValueComparator avc)844     public Comparator<String> getDtdComparator(AttributeValueComparator avc) {
845         return dtdComparator;
846     }
847 
getDtdComparator()848     public DtdComparator getDtdComparator() {
849         return dtdComparator;
850     }
851 
852     public class DtdComparator implements Comparator<String> {
853         @Override
compare(String path1, String path2)854         public int compare(String path1, String path2) {
855             XPathParts a = XPathParts.getFrozenInstance(path1);
856             XPathParts b = XPathParts.getFrozenInstance(path2);
857             return xpathComparator(a, b);
858         }
859 
xpathComparator(XPathParts a, XPathParts b)860         public int xpathComparator(XPathParts a, XPathParts b) {
861             // there must always be at least one element
862             String baseA = a.getElement(0);
863             String baseB = b.getElement(0);
864             if (!ROOT.name.equals(baseA) || !ROOT.name.equals(baseB)) {
865                 throw new IllegalArgumentException("Comparing different DTDs: " + ROOT.name + ", " + baseA + ", " + baseB);
866             }
867             int min = Math.min(a.size(), b.size());
868             Element parent = ROOT;
869             Element elementA;
870             for (int i = 1; i < min; ++i, parent = elementA) {
871                 // add extra test for "fake" elements, used in diffing. they always start with _
872                 String elementRawA = a.getElement(i);
873                 String elementRawB = b.getElement(i);
874                 if (elementRawA.startsWith("_")) {
875                     return elementRawB.startsWith("_") ? elementRawA.compareTo(elementRawB) : -1;
876                 } else if (elementRawB.startsWith("_")) {
877                     return 1;
878                 }
879                 //
880                 elementA = nameToElement.get(elementRawA);
881                 Element elementB = nameToElement.get(elementRawB);
882                 if (elementA != elementB) {
883                     int aa = parent.children.get(elementA);
884                     int bb = parent.children.get(elementB);
885                     return aa - bb;
886                 }
887                 int countA = a.getAttributeCount(i);
888                 int countB = b.getAttributeCount(i);
889                 if (countA == 0 && countB == 0) {
890                     continue;
891                 }
892                 // we have two ways to compare the attributes. One based on the dtd,
893                 // and one based on explicit comparators
894 
895                 // at this point the elements are the same and correspond to elementA
896                 // in the dtd
897 
898                 // Handle the special added elements
899                 String aqValue = a.getAttributeValue(i, "_q");
900                 if (aqValue != null) {
901                     String bqValue = b.getAttributeValue(i, "_q");
902                     if (!aqValue.equals(bqValue)) {
903                         int aValue = Integer.parseInt(aqValue);
904                         int bValue = Integer.parseInt(bqValue);
905                         return aValue - bValue;
906                     }
907                     --countA;
908                     --countB;
909                 }
910 
911                 attributes: for (Entry<Attribute, Integer> attr : elementA.attributes.entrySet()) {
912                     Attribute main = attr.getKey();
913                     String valueA = a.getAttributeValue(i, main.name);
914                     String valueB = b.getAttributeValue(i, main.name);
915                     if (valueA == null) {
916                         if (valueB != null) {
917                             return -1;
918                         }
919                     } else if (valueB == null) {
920                         return 1;
921                     } else if (valueA.equals(valueB)) {
922                         --countA;
923                         --countB;
924                         if (countA == 0 && countB == 0) {
925                             break attributes;
926                         }
927                         continue; // TODO
928                     } else if (main.attributeValueComparator != null) {
929                         return main.attributeValueComparator.compare(valueA, valueB);
930                     } else if (main.values.size() != 0) {
931                         int aa = main.values.get(valueA);
932                         int bb = main.values.get(valueB);
933                         return aa - bb;
934                     } else {
935                         return valueA.compareTo(valueB);
936                     }
937                 }
938                 if (countA != 0 || countB != 0) {
939                     throw new IllegalArgumentException();
940                 }
941             }
942             return a.size() - b.size();
943         }
944     }
945 
getAttributeComparator()946     public MapComparator<String> getAttributeComparator() {
947         return attributeComparator;
948     }
949 
950 
getElementComparator()951     public MapComparator<String> getElementComparator() {
952         return elementComparator;
953     }
954 
getAttributesFromName()955     public Relation<String, Attribute> getAttributesFromName() {
956         return nameToAttributes;
957     }
958 
getElementFromName()959     public Map<String, Element> getElementFromName() {
960         return nameToElement;
961     }
962 
963     @Override
toString()964     public String toString() {
965         StringBuilder b = new StringBuilder();
966         // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, listPatterns?, collations?, posix?, segmentations?, rbnf?, metadata?, references?, special*))) >
967         // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false ) #IMPLIED > <!-- true and false are deprecated. -->
968         Seen seen = new Seen(dtdType);
969         seen.seenElements.add(ANY);
970         seen.seenElements.add(PCDATA);
971         toString(ROOT, b, seen);
972 
973         // Hack for ldmlIcu: catch the items that are not mentioned in the original
974         int currentEnd = b.length();
975         for (Element e : nameToElement.values()) {
976             toString(e, b, seen);
977         }
978         if (currentEnd != b.length()) {
979             b.insert(currentEnd,
980                 System.lineSeparator() + System.lineSeparator()
981                 + "<!-- Elements not reachable from root! -->"
982                 + System.lineSeparator());
983         }
984         return b.toString();
985     }
986 
987     static final class Seen {
988         Set<Element> seenElements = new HashSet<>();
989         Set<Attribute> seenAttributes = new HashSet<>();
990 
Seen(DtdType dtdType)991         public Seen(DtdType dtdType) {
992             if (dtdType.rootType == dtdType) {
993                 return;
994             }
995             DtdData otherData = DtdData.getInstance(dtdType.rootType);
996             walk(otherData, otherData.ROOT);
997             seenElements.remove(otherData.nameToElement.get("special"));
998         }
999 
walk(DtdData otherData, Element current)1000         private void walk(DtdData otherData, Element current) {
1001             seenElements.add(current);
1002             seenAttributes.addAll(current.attributes.keySet());
1003             for (Element e : current.children.keySet()) {
1004                 walk(otherData, e);
1005             }
1006         }
1007     }
1008 
getDescendents(Element start, Set<Element> toAddTo)1009     public Set<Element> getDescendents(Element start, Set<Element> toAddTo) {
1010         if (!toAddTo.contains(start)) {
1011             toAddTo.add(start);
1012             for (Element e : start.children.keySet()) {
1013                 getDescendents(e, toAddTo);
1014             }
1015         }
1016         return toAddTo;
1017     }
1018 
toString(Element current, StringBuilder b, Seen seen)1019     private void toString(Element current, StringBuilder b, Seen seen) {
1020         boolean first = true;
1021         if (seen.seenElements.contains(current)) {
1022             return;
1023         }
1024         seen.seenElements.add(current);
1025         boolean elementDeprecated = isDeprecated(current.name, "*", "*");
1026 
1027         showComments(b, current.commentsPre, true);
1028         b.append("\n\n<!ELEMENT " + current.name + " " + current.model + " >");
1029         if (USE_SYNTHESIZED) {
1030             Element aliasElement = getElementFromName().get("alias");
1031             //b.append(current.rawChildren);
1032             if (!current.children.isEmpty()) {
1033                 LinkedHashSet<Element> elements = new LinkedHashSet<>(current.children.keySet());
1034                 boolean hasAlias = aliasElement != null && elements.remove(aliasElement);
1035                 //boolean hasSpecial = specialElement != null && elements.remove(specialElement);
1036                 if (hasAlias) {
1037                     b.append("(alias |");
1038                 }
1039                 b.append("(");
1040                 // <!ELEMENT transformNames ( alias | (transformName | special)* ) >
1041                 // <!ELEMENT layout ( alias | (orientation*, inList*, inText*, special*) ) >
1042 
1043                 for (Element e : elements) {
1044                     if (first) {
1045                         first = false;
1046                     } else {
1047                         b.append(", ");
1048                     }
1049                     b.append(e.name);
1050                     if (e.type != ElementType.PCDATA) {
1051                         b.append("*");
1052                     }
1053                 }
1054                 if (hasAlias) {
1055                     b.append(")");
1056                 }
1057                 b.append(")");
1058             } else {
1059                 b.append(current.type == null ? "???" : current.type.source);
1060             }
1061             b.append(">");
1062         }
1063         showComments(b, current.commentsPost, false);
1064         if (isOrdered(current.name)) {
1065             b.append(COMMENT_PREFIX + "<!--@ORDERED-->");
1066         }
1067         if (isTechPreview(current.name)) {
1068             b.append(COMMENT_PREFIX + "<!--@TECHPREVIEW-->");
1069         }
1070         if (current.getElementStatus() != ElementStatus.regular) {
1071             b.append(COMMENT_PREFIX + "<!--@"
1072                 + current.getElementStatus().toString().toUpperCase(Locale.ROOT)
1073                 + "-->");
1074         }
1075         if (elementDeprecated) {
1076             b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->");
1077         }
1078 
1079         LinkedHashSet<String> deprecatedValues = new LinkedHashSet<>();
1080 
1081         for (Attribute a : current.attributes.keySet()) {
1082             if (seen.seenAttributes.contains(a)) {
1083                 continue;
1084             }
1085             seen.seenAttributes.add(a);
1086             boolean attributeDeprecated = elementDeprecated || isDeprecated(current.name, a.name, "*");
1087 
1088             deprecatedValues.clear();
1089 
1090             showComments(b, a.commentsPre, true);
1091             b.append("\n<!ATTLIST " + current.name + " " + a.name);
1092             if (a.type == AttributeType.ENUMERATED_TYPE) {
1093                 b.append(" (");
1094                 first = true;
1095                 for (String s : a.values.keySet()) {
1096                     if (first) {
1097                         first = false;
1098                     } else {
1099                         b.append(" | ");
1100                     }
1101                     b.append(s);
1102                     if (!attributeDeprecated && isDeprecated(current.name, a.name, s)) {
1103                         deprecatedValues.add(s);
1104                     }
1105                 }
1106                 b.append(")");
1107             } else {
1108                 b.append(' ').append(a.type);
1109             }
1110             if (a.mode != Mode.NULL) {
1111                 b.append(" ").append(a.mode.source);
1112             }
1113             if (a.defaultValue != null) {
1114                 b.append(" \"").append(a.defaultValue).append('"');
1115             }
1116             b.append(" >");
1117             showComments(b, a.commentsPost, false);
1118 //            if (attributeDeprecated != deprecatedComment) {
1119 //                System.out.println("*** BAD DEPRECATION ***" + a);
1120 //            }
1121             if (a.matchValue != null) {
1122                 b.append(COMMENT_PREFIX + "<!--@MATCH:" + a.matchValue.getName() + "-->");
1123             }
1124             if (METADATA.contains(a.name) || a.attributeStatus == AttributeStatus.metadata) {
1125                 b.append(COMMENT_PREFIX + "<!--@METADATA-->");
1126             } else if (!isDistinguishing(current.name, a.name)) {
1127                 b.append(COMMENT_PREFIX + "<!--@VALUE-->");
1128             }
1129             if (attributeDeprecated) {
1130                 b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->");
1131             } else if (!deprecatedValues.isEmpty()) {
1132                 b.append(COMMENT_PREFIX + "<!--@DEPRECATED:" + Joiner.on(", ")
1133                 .join(deprecatedValues) + "-->");
1134             }
1135         }
1136         if (current.children.size() > 0) {
1137             for (Element e : current.children.keySet()) {
1138                 toString(e, b, seen);
1139             }
1140         }
1141     }
1142 
showComments(StringBuilder b, Set<String> comments, boolean separate)1143     private void showComments(StringBuilder b, Set<String> comments, boolean separate) {
1144         if (comments == null) {
1145             return;
1146         }
1147         if (separate && b.length() != 0) {
1148             b.append(System.lineSeparator());
1149         }
1150         for (String c : comments) {
1151             boolean deprecatedComment = false; // the following served its purpose... c.toLowerCase(Locale.ENGLISH).contains("deprecat");
1152             if (!deprecatedComment) {
1153                 if (separate) {
1154                     // special handling for very first comment
1155                     if (b.length() == 0) {
1156                         b.append("<!--")
1157                         .append(System.lineSeparator())
1158                         .append(c)
1159                         .append(System.lineSeparator())
1160                         .append("-->");
1161                         continue;
1162                     }
1163                     b.append(System.lineSeparator());
1164                 } else {
1165                     b.append(COMMENT_PREFIX);
1166                 }
1167                 b.append("<!-- ").append(c).append(" -->");
1168             }
1169         }
1170     }
1171 
removeFirst(Collection<T> elements, Transform<T, Boolean> matcher)1172     public static <T> T removeFirst(Collection<T> elements, Transform<T, Boolean> matcher) {
1173         for (Iterator<T> it = elements.iterator(); it.hasNext();) {
1174             T item = it.next();
1175             if (matcher.transform(item) == Boolean.TRUE) {
1176                 it.remove();
1177                 return item;
1178             }
1179         }
1180         return null;
1181     }
1182 
getElements()1183     public Set<Element> getElements() {
1184         return new LinkedHashSet<>(nameToElement.values());
1185     }
1186 
getAttributes()1187     public Set<Attribute> getAttributes() {
1188         return new LinkedHashSet<>(nameToAttributes.values());
1189     }
1190 
isDistinguishing(String elementName, String attribute)1191     public boolean isDistinguishing(String elementName, String attribute) {
1192         return getAttributeStatus(elementName, attribute) == AttributeStatus.distinguished;
1193     }
1194 
1195     static final Set<String> METADATA = new HashSet<>(Arrays.asList("references", "standard", "draft"));
1196 
addUnmodifiable(Set<String> comment, String addition)1197     static final Set<String> addUnmodifiable(Set<String> comment, String addition) {
1198         if (comment == null) {
1199             return Collections.singleton(addition);
1200         } else {
1201             comment = new LinkedHashSet<>(comment);
1202             comment.add(addition);
1203             return Collections.unmodifiableSet(comment);
1204         }
1205     }
1206 
1207     public class IllegalByDtdException extends RuntimeException {
1208         private static final long serialVersionUID = 1L;
1209         public final String elementName;
1210         public final String attributeName;
1211         public final String attributeValue;
1212 
IllegalByDtdException(String elementName, String attributeName, String attributeValue)1213         public IllegalByDtdException(String elementName, String attributeName, String attributeValue) {
1214             this.elementName = elementName;
1215             this.attributeName = attributeName;
1216             this.attributeValue = attributeValue;
1217         }
1218 
1219         @Override
getMessage()1220         public String getMessage() {
1221             return "Dtd " + dtdType
1222                 + " doesn’t allow "
1223                 + "element=" + elementName
1224                 + (attributeName == null ? "" : ", attribute: " + attributeName)
1225                 + (attributeValue == null ? "" : ", attributeValue: " + attributeValue);
1226         }
1227     }
1228 
1229     //@SuppressWarnings("unused")
isDeprecated(String elementName, String attributeName, String attributeValue)1230     public boolean isDeprecated(String elementName, String attributeName, String attributeValue) {
1231         Element element = getElementThrowingIfNull(elementName, null, null);
1232         if (element.isDeprecatedElement) {
1233             return true;
1234         }
1235         if ("*".equals(attributeName) || "_q".equals(attributeName)) {
1236             return false;
1237         }
1238         Attribute attribute = element.getAttributeNamed(attributeName);
1239         if (attribute == null) {
1240             throw new IllegalByDtdException(elementName, attributeName, attributeValue);
1241         } else if (attribute.isDeprecatedAttribute) {
1242             return true;
1243         }
1244         return attribute.deprecatedValues.contains(attributeValue); // don't need special test for "*"
1245     }
1246 
1247     /**
1248      * Returns whether an element (specified by its full name) is ordered. This method
1249      * understands all elements in the DTDs used (including the ICU extensions), but will
1250      * throw IllegalByDtdException for unknown elements. See CLDR-8614 for more background.
1251      */
isOrdered(String elementName)1252     public boolean isOrdered(String elementName) {
1253         Element element = getElementThrowingIfNull(elementName, null, null);
1254         return element.isOrdered();
1255     }
1256 
getElementThrowingIfNull(String elementName, String attributeName, String value)1257     public Element getElementThrowingIfNull(String elementName, String attributeName, String value) {
1258         Element element = nameToElement.get(elementName);
1259         if (element == null) {
1260             throw new IllegalByDtdException(elementName, attributeName, value);
1261         }
1262         return element;
1263     }
1264 
1265     /**
1266      * Returns whether an element (specified by its full name) is a tech preview. This method
1267      * understands all elements in the DTDs used (including the ICU extensions), but will
1268      * throw IllegalByDtdException for unknown elements. See CLDR-8614 for more background.
1269      */
isTechPreview(String elementName)1270     public boolean isTechPreview(String elementName) {
1271         Element element = getElementThrowingIfNull(elementName, null, null);
1272         return element.isTechPreview();
1273     }
1274 
1275 
getAttributeStatus(String elementName, String attributeName)1276     public AttributeStatus getAttributeStatus(String elementName, String attributeName) {
1277         if ("_q".equals(attributeName)) {
1278             return AttributeStatus.distinguished; // special case
1279         }
1280         Element element = nameToElement.get(elementName);
1281         if (element == null) {
1282             if (elementName.startsWith("icu:")) {
1283                 return AttributeStatus.distinguished;
1284             }
1285             throw new IllegalByDtdException(elementName, attributeName, null);
1286         }
1287         Attribute attribute = element.getAttributeNamed(attributeName);
1288         if (attribute == null) {
1289             if (elementName.startsWith("icu:")) {
1290                 return AttributeStatus.distinguished;
1291             }
1292             throw new IllegalByDtdException(elementName, attributeName, null);
1293         }
1294         return attribute.attributeStatus;
1295     }
1296 
1297     // The default is a map comparator, which compares numbers as numbers, and strings with UCA
1298     private static MapComparator<String> valueOrdering = new MapComparator<String>().setErrorOnMissing(false).freeze();
1299 
1300     static MapComparator<String> dayValueOrder = new MapComparator<String>().add(
1301         "sun", "mon", "tue", "wed", "thu", "fri", "sat").freeze();
1302     static MapComparator<String> dayPeriodOrder = new MapComparator<String>().add(
1303         "midnight", "am", "noon", "pm",
1304         "morning1", "morning2", "afternoon1", "afternoon2", "evening1", "evening2", "night1", "night2",
1305         // The ones on the following line are no longer used actively. Can be removed later?
1306         "earlyMorning", "morning", "midDay", "afternoon", "evening", "night", "weeHours").freeze();
1307     static MapComparator<String> dateTimeFormatOrder = new MapComparator<String>().add(
1308         "standard", "atTime").freeze();
1309     static MapComparator<String> listPatternOrder = new MapComparator<String>().add(
1310         "start", "middle", "end", "2", "3").freeze();
1311     static MapComparator<String> widthOrder = new MapComparator<String>().add(
1312         "abbreviated", "narrow", "short", "wide", "all").freeze();
1313     static MapComparator<String> lengthOrder = new MapComparator<String>().add(
1314         "full", "long", "medium", "short").freeze();
1315     static MapComparator<String> dateFieldOrder = new MapComparator<String>().add(
1316         "era", "era-short", "era-narrow",
1317         "year", "year-short", "year-narrow",
1318         "quarter", "quarter-short", "quarter-narrow",
1319         "month", "month-short", "month-narrow",
1320         "week", "week-short", "week-narrow",
1321         "weekOfMonth", "weekOfMonth-short", "weekOfMonth-narrow",
1322         "day", "day-short", "day-narrow",
1323         "dayOfYear", "dayOfYear-short", "dayOfYear-narrow",
1324         "weekday", "weekday-short", "weekday-narrow",
1325         "weekdayOfMonth", "weekdayOfMonth-short", "weekdayOfMonth-narrow",
1326         "sun", "sun-short", "sun-narrow",
1327         "mon", "mon-short", "mon-narrow",
1328         "tue", "tue-short", "tue-narrow",
1329         "wed", "wed-short", "wed-narrow",
1330         "thu", "thu-short", "thu-narrow",
1331         "fri", "fri-short", "fri-narrow",
1332         "sat", "sat-short", "sat-narrow",
1333         "dayperiod-short", "dayperiod", "dayperiod-narrow",
1334         "hour", "hour-short", "hour-narrow",
1335         "minute", "minute-short", "minute-narrow",
1336         "second", "second-short", "second-narrow",
1337         "zone", "zone-short", "zone-narrow").freeze();
1338     static MapComparator<String> nameFieldOrder = new MapComparator<String>().add(
1339         "prefix", "given", "given-informal", "given2",
1340         "surname", "surname-prefix", "surname-core", "surname2", "suffix").freeze();
1341     static MapComparator<String> orderValueOrder = new MapComparator<String>().add(
1342         "givenFirst", "surnameFirst", "sorting").freeze();
1343     static MapComparator<String> lengthValueOrder = new MapComparator<String>().add(
1344         "long", "medium", "short").freeze();
1345     static MapComparator<String> usageValueOrder = new MapComparator<String>().add(
1346         "referring", "addressing", "monogram").freeze();
1347     static MapComparator<String> formalityValueOrder = new MapComparator<String>().add(
1348         "formal", "informal").freeze();
1349     static MapComparator<String> sampleNameItemOrder = new MapComparator<String>().add(
1350         "givenOnly", "givenSurnameOnly", "given12Surname", "full").freeze();
1351 
1352     /* TODO: change this to be data-file driven. Can do with new Unit preferences info; also put them in a more meaningful order (metric vs other; size) */
1353 
1354     public static final MapComparator<String> unitOrder = new MapComparator<String>().add(
1355         "acceleration-g-force", "acceleration-meter-per-square-second",
1356         "acceleration-meter-per-second-squared", // deprecated
1357         "angle-revolution", "angle-radian", "angle-degree", "angle-arc-minute", "angle-arc-second",
1358         "area-square-kilometer", "area-hectare", "area-square-meter", "area-square-centimeter",
1359         "area-square-mile", "area-acre", "area-square-yard", "area-square-foot", "area-square-inch",
1360         "area-dunam",
1361         "concentr-karat",
1362         "proportion-karat",  // deprecated
1363         "concentr-milligram-ofglucose-per-deciliter",
1364         "concentr-milligram-per-deciliter",
1365         "concentr-millimole-per-liter",
1366         "concentr-item",
1367         "concentr-portion",
1368         "concentr-permillion",
1369         "concentr-part-per-million",  // deprecated
1370         "concentr-percent", "concentr-permille", "concentr-permyriad",
1371         "concentr-mole",
1372         "concentr-ofglucose",
1373         "consumption-liter-per-kilometer", "consumption-liter-per-100-kilometer",
1374         "consumption-liter-per-100kilometers", // deprecated
1375         "consumption-mile-per-gallon", "consumption-mile-per-gallon-imperial",
1376         "digital-petabyte", "digital-terabyte", "digital-terabit", "digital-gigabyte", "digital-gigabit",
1377         "digital-megabyte", "digital-megabit", "digital-kilobyte", "digital-kilobit",
1378         "digital-byte", "digital-bit",
1379         "duration-century", "duration-decade",
1380         "duration-year", "duration-year-person",
1381         "duration-quarter",
1382         "duration-month", "duration-month-person",
1383         "duration-week", "duration-week-person",
1384         "duration-day", "duration-day-person",
1385         "duration-hour", "duration-minute", "duration-second",
1386         "duration-millisecond", "duration-microsecond", "duration-nanosecond",
1387         "electric-ampere", "electric-milliampere", "electric-ohm", "electric-volt",
1388         "energy-kilocalorie", "energy-calorie", "energy-foodcalorie", "energy-kilojoule", "energy-joule", "energy-kilowatt-hour",
1389         "energy-electronvolt",
1390         "energy-british-thermal-unit",
1391         "energy-therm-us",
1392         "force-pound-force",
1393         "force-newton",
1394         "force-kilowatt-hour-per-100-kilometer",
1395         "frequency-gigahertz", "frequency-megahertz", "frequency-kilohertz", "frequency-hertz",
1396         "graphics-em", "graphics-pixel", "graphics-megapixel",
1397         "graphics-pixel-per-centimeter", "graphics-pixel-per-inch",
1398         "graphics-dot-per-centimeter", "graphics-dot-per-inch",
1399         "graphics-dot",
1400         "length-earth-radius",
1401         "length-100-kilometer",
1402         "length-kilometer", "length-meter", "length-decimeter", "length-centimeter",
1403         "length-millimeter", "length-micrometer", "length-nanometer", "length-picometer",
1404         "length-mile", "length-yard", "length-foot", "length-inch",
1405         "length-parsec", "length-light-year", "length-astronomical-unit",
1406         "length-furlong", "length-fathom",
1407         "length-nautical-mile", "length-mile-scandinavian",
1408         "length-point",
1409         "length-solar-radius",
1410         "light-lux",
1411         "light-candela",
1412         "light-lumen",
1413         "light-solar-luminosity",
1414         "mass-tonne", "mass-metric-ton", "mass-kilogram", "mass-gram", "mass-milligram", "mass-microgram",
1415         "mass-ton", "mass-stone", "mass-pound", "mass-ounce",
1416         "mass-ounce-troy", "mass-carat",
1417         "mass-dalton",
1418         "mass-earth-mass",
1419         "mass-solar-mass",
1420 
1421         "mass-grain",
1422 
1423         "power-gigawatt", "power-megawatt", "power-kilowatt", "power-watt", "power-milliwatt",
1424         "power-horsepower",
1425         "pressure-millimeter-ofhg",
1426         "pressure-millimeter-of-mercury", // deprecated
1427         "pressure-ofhg",
1428         "pressure-pound-force-per-square-inch",
1429         "pressure-pound-per-square-inch", // deprecated
1430         "pressure-inch-ofhg",
1431         "pressure-inch-hg",  // deprecated
1432         "pressure-bar", "pressure-millibar", "pressure-atmosphere",
1433         "pressure-pascal",
1434         "pressure-hectopascal",
1435         "pressure-kilopascal",
1436         "pressure-megapascal",
1437         "speed-kilometer-per-hour", "speed-meter-per-second", "speed-mile-per-hour", "speed-knot",
1438         "temperature-generic", "temperature-celsius", "temperature-fahrenheit", "temperature-kelvin",
1439         "torque-pound-force-foot",
1440         "torque-pound-foot", // deprecated
1441         "torque-newton-meter",
1442         "volume-cubic-kilometer", "volume-cubic-meter", "volume-cubic-centimeter",
1443         "volume-cubic-mile", "volume-cubic-yard", "volume-cubic-foot", "volume-cubic-inch",
1444         "volume-megaliter", "volume-hectoliter", "volume-liter", "volume-deciliter", "volume-centiliter", "volume-milliliter",
1445         "volume-pint-metric", "volume-cup-metric",
1446         "volume-acre-foot",
1447         "volume-bushel", "volume-gallon", "volume-gallon-imperial", "volume-quart", "volume-pint", "volume-cup",
1448         "volume-fluid-ounce", "volume-fluid-ounce-imperial", "volume-tablespoon", "volume-teaspoon",
1449         "volume-barrel",
1450 
1451         "volume-dessert-spoon",
1452         "volume-dessert-spoon-imperial",
1453         "volume-drop",
1454         "volume-dram",
1455         "volume-jigger",
1456         "volume-pinch",
1457         "volume-quart-imperial"
1458         // "volume-pint-imperial"
1459         ).freeze();
1460 
1461     static MapComparator<String> countValueOrder = new MapComparator<String>().add(
1462         "0", "1", "zero", "one", "two", "few", "many", "other").freeze();
1463     static MapComparator<String> unitLengthOrder = new MapComparator<String>().add(
1464         "long", "short", "narrow").freeze();
1465     static MapComparator<String> currencyFormatOrder = new MapComparator<String>().add(
1466         "standard", "accounting").freeze();
1467     static Comparator<String> zoneOrder = StandardCodes.make().getTZIDComparator();
1468 
1469     static final Comparator<String> COMP = (Comparator) CLDRConfig.getInstance().getCollator();
1470 
1471     // Hack for US
1472     static final Comparator<String> UNICODE_SET_COMPARATOR = new Comparator<>() {
1473         @Override
1474         public int compare(String o1, String o2) {
1475             if (o1.contains("{")) {
1476                 o1 = o1.replace("{", "");
1477             }
1478             if (o2.contains("{")) {
1479                 o2 = o2.replace("{", "");
1480             }
1481             return COMP.compare(o1, o2);
1482         }
1483 
1484     };
1485 
getAttributeValueComparator(String element, String attribute)1486     public static Comparator<String> getAttributeValueComparator(String element, String attribute) {
1487         return getAttributeValueComparator(DtdType.ldml, element, attribute);
1488     }
1489 
getAttributeValueComparator(DtdType type, String element, String attribute)1490     static Comparator<String> getAttributeValueComparator(DtdType type, String element, String attribute) {
1491         // The default is a map comparator, which compares numbers as numbers, and strings with UCA
1492         Comparator<String> comp = valueOrdering;
1493         if (type != DtdType.ldml && type != DtdType.ldmlICU) {
1494             return comp;
1495         }
1496         if (attribute.equals("day")) { // && (element.startsWith("weekend")
1497             comp = dayValueOrder;
1498         } else if (attribute.equals("type")) {
1499             if (element.endsWith("FormatLength")) {
1500                 comp = lengthOrder;
1501             } else if (element.endsWith("Width")) {
1502                 comp = widthOrder;
1503             } else if (element.equals("day")) {
1504                 comp = dayValueOrder;
1505             } else if (element.equals("field")) {
1506                 comp = dateFieldOrder;
1507             } else if (element.equals("zone")) {
1508                 comp = zoneOrder;
1509             } else if (element.equals("listPatternPart")) {
1510                 comp = listPatternOrder;
1511             } else if (element.equals("currencyFormat")) {
1512                 comp = currencyFormatOrder;
1513             } else if (element.equals("unitLength")) {
1514                 comp = unitLengthOrder;
1515             } else if (element.equals("unit")) {
1516                 comp = unitOrder;
1517             } else if (element.equals("dayPeriod")) {
1518                 comp = dayPeriodOrder;
1519             } else if (element.equals("dateTimeFormat")) {
1520                 comp = dateTimeFormatOrder;
1521             } else if (element.equals("nameField")) {
1522                 comp = nameFieldOrder;
1523             }
1524         } else if (attribute.equals("order") && element.equals("personName")) {
1525             comp = orderValueOrder;
1526         } else if (attribute.equals("length") && element.equals("personName")) {
1527             comp = lengthValueOrder;
1528         } else if (attribute.equals("usage") && element.equals("personName")) {
1529             comp = usageValueOrder;
1530         } else if (attribute.equals("formality")) {
1531             comp = formalityValueOrder;
1532         } else if (attribute.equals("item") && element.equals("sampleName")) {
1533             comp = sampleNameItemOrder;
1534         } else if (attribute.equals("count") && !element.equals("minDays")) {
1535             comp = countValueOrder;
1536         } else if (attribute.equals("cp") && element.equals("annotation")) {
1537             comp = UNICODE_SET_COMPARATOR;
1538         }
1539         return comp;
1540     }
1541 
1542     /**
1543      * Comparator for attributes in CLDR files
1544      */
1545     private static AttributeValueComparator ldmlAvc = new AttributeValueComparator() {
1546         @Override
1547         public int compare(String element, String attribute, String value1, String value2) {
1548             Comparator<String> comp = getAttributeValueComparator(element, attribute);
1549             return comp.compare(value1, value2);
1550         }
1551     };
1552 
hasValue(String elementName)1553     public boolean hasValue(String elementName) {
1554         return nameToElement.get(elementName).type == ElementType.PCDATA;
1555     }
1556 
isMetadata(XPathParts pathPlain)1557     public boolean isMetadata(XPathParts pathPlain) {
1558         for (String s : pathPlain.getElements()) {
1559             Element e = getElementFromName().get(s);
1560             if (e.elementStatus == ElementStatus.metadata) {
1561                 return true;
1562             }
1563         }
1564         return false;
1565     }
1566 
isMetadataOld(DtdType dtdType2, XPathParts pathPlain)1567     public static boolean isMetadataOld(DtdType dtdType2, XPathParts pathPlain) {
1568         // TODO Don't use hard-coded list; instead add to DTD annotations
1569         final String element1 = pathPlain.getElement(1);
1570         final String element2 = pathPlain.getElement(2);
1571         final String elementN = pathPlain.getElement(-1);
1572         switch (dtdType2) {
1573         case ldml:
1574             switch (element1) {
1575             case "generation":
1576             case "metadata":
1577                 return true;
1578             }
1579             break;
1580         case ldmlBCP47:
1581             switch (element1) {
1582             case "generation":
1583             case "version":
1584                 return true;
1585             }
1586             break;
1587             ////supplementalData/transforms/transform[@source="am"][@target="am_FONIPA"][@direction="forward"]/comment
1588         case supplementalData:
1589             // these are NOT under /metadata/ but are actually metadata
1590             switch (element1) {
1591             case "generation":
1592             case "version":
1593             case "validity":
1594             case "references":
1595             case "coverageLevels":
1596                 return true;
1597             case "transforms":
1598                 return elementN.equals("comment");
1599             case "metadata":
1600                 // these ARE under /metadata/, but many others under /metadata/ are NOT actually metadata.
1601                 switch (element2) {
1602                 case "validity":
1603                 case "serialElements":
1604                 case "suppress":
1605                 case "distinguishing":
1606                 case "blocking":
1607                 case "casingData":
1608                     return true;
1609                 }
1610                 break;
1611             }
1612             break;
1613         default:
1614         }
1615         return false;
1616     }
1617 
isDeprecated(XPathParts pathPlain)1618     public boolean isDeprecated(XPathParts pathPlain) {
1619         for (int i = 0; i < pathPlain.size(); ++i) {
1620             String elementName = pathPlain.getElement(i);
1621             if (isDeprecated(elementName, "*", null)) {
1622                 return true;
1623             }
1624             for (String attribute : pathPlain.getAttributeKeys(i)) {
1625                 String attributeValue = pathPlain.getAttributeValue(i, attribute);
1626                 if (isDeprecated(elementName, attribute, attributeValue)) {
1627                     return true;
1628                 }
1629             }
1630         }
1631         return false;
1632     }
1633 
1634     public final static Splitter SPACE_SPLITTER = Splitter.on(CharMatcher.whitespace()).trimResults().omitEmptyStrings();
1635     public final static Splitter BAR_SPLITTER = Splitter.on('|').trimResults().omitEmptyStrings();
1636     public final static Splitter CR_SPLITTER = Splitter.on(CharMatcher.anyOf("\n\r")).trimResults().omitEmptyStrings();
1637 
1638     private static class XPathPartsSet {
1639         private final Set<XPathParts> list = new LinkedHashSet<>();
1640 
addElement(String element)1641         private void addElement(String element) {
1642             if (list.isEmpty()) {
1643                 list.add(new XPathParts().addElement(element));
1644             } else {
1645                 for (XPathParts item : list) {
1646                     item.addElement(element);
1647                 }
1648             }
1649         }
1650 
addAttribute(String attribute, String attributeValue)1651         private void addAttribute(String attribute, String attributeValue) {
1652             for (XPathParts item : list) {
1653                 item.addAttribute(attribute, attributeValue);
1654             }
1655         }
1656 
setElement(int i, String string)1657         private void setElement(int i, String string) {
1658             for (XPathParts item : list) {
1659                 item.setElement(i, string);
1660             }
1661         }
1662 
addAttributes(String attribute, List<String> attributeValues)1663         private void addAttributes(String attribute, List<String> attributeValues) {
1664             if (attributeValues.size() == 1) {
1665                 addAttribute(attribute, attributeValues.iterator().next());
1666             } else {
1667                 // duplicate all the items in the list with the given values
1668                 Set<XPathParts> newList = new LinkedHashSet<>();
1669                 for (XPathParts item : list) {
1670                     for (String attributeValue : attributeValues) {
1671                         XPathParts newItem = item.cloneAsThawed();
1672                         newItem.addAttribute(attribute, attributeValue);
1673                         newList.add(newItem);
1674                     }
1675                 }
1676                 list.clear();
1677                 list.addAll(newList);
1678             }
1679         }
1680 
toStrings()1681         private ImmutableSet<String> toStrings() {
1682             Builder<String> result = new ImmutableSet.Builder<>();
1683 
1684             for (XPathParts item : list) {
1685                 result.add(item.toString());
1686             }
1687             return result.build();
1688         }
1689 
1690         @Override
toString()1691         public String toString() {
1692             return list.toString();
1693         }
1694     }
1695 
getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras)1696     public Set<String> getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras) {
1697         extras.clear();
1698         Map<String, String> valueAttributes = new HashMap<>();
1699         XPathPartsSet pathResult = new XPathPartsSet();
1700         String element = null;
1701         for (int i = 0; i < pathPlain.size(); ++i) {
1702             element = pathPlain.getElement(i);
1703             pathResult.addElement(element);
1704             valueAttributes.clear();
1705             for (String attribute : pathPlain.getAttributeKeys(i)) {
1706                 AttributeStatus status = getAttributeStatus(element, attribute);
1707                 final String attributeValue = pathPlain.getAttributeValue(i, attribute);
1708                 switch (status) {
1709                 case distinguished:
1710                     AttributeType attrType = getAttributeType(element, attribute);
1711                     if (attrType == AttributeType.NMTOKENS) {
1712                         pathResult.addAttributes(attribute, SPACE_SPLITTER.splitToList(attributeValue));
1713                     } else {
1714                         pathResult.addAttribute(attribute, attributeValue);
1715                     }
1716                     break;
1717                 case value:
1718                     valueAttributes.put(attribute, attributeValue);
1719                     break;
1720                 case metadata:
1721                     break;
1722                 }
1723             }
1724             if (!valueAttributes.isEmpty()) {
1725                 boolean hasValue = hasValue(element);
1726                 // if it doesn't have a value, we construct new child elements, with _ prefix
1727                 // if it does have a value, we have to play a further trick, since
1728                 // we can't have a value and child elements at the same level.
1729                 // So we use a _ suffix on the element.
1730                 if (hasValue) {
1731                     pathResult.setElement(i, element + "_");
1732                 } else {
1733                     int debug = 0;
1734                 }
1735                 for (Entry<String, String> attributeAndValue : valueAttributes.entrySet()) {
1736                     final String attribute = attributeAndValue.getKey();
1737                     final String attributeValue = attributeAndValue.getValue();
1738 
1739                     Set<String> pathsShort = pathResult.toStrings();
1740                     AttributeType attrType = getAttributeType(element, attribute);
1741                     for (String pathShort : pathsShort) {
1742                         pathShort += "/_" + attribute;
1743                         if (attrType == AttributeType.NMTOKENS) {
1744                             for (String valuePart : SPACE_SPLITTER.split(attributeValue)) {
1745                                 extras.put(pathShort, valuePart);
1746                             }
1747                         } else {
1748                             extras.put(pathShort, attributeValue);
1749                         }
1750                     }
1751                 }
1752                 if (hasValue) {
1753                     pathResult.setElement(i, element); // restore
1754                 }
1755             }
1756         }
1757         // Only add the path if it could have a value, looking at the last element
1758         if (!hasValue(element)) {
1759             return null;
1760         }
1761         return pathResult.toStrings();
1762     }
1763 
getAttributeType(String elementName, String attributeName)1764     public AttributeType getAttributeType(String elementName, String attributeName) {
1765         Attribute attr = getAttribute(elementName, attributeName);
1766         return (attr != null) ? attr.type : null;
1767     }
1768 
getAttribute(String elementName, String attributeName)1769     public Attribute getAttribute(String elementName, String attributeName) {
1770         Element element = nameToElement.get(elementName);
1771         return (element != null) ? element.getAttributeNamed(attributeName) : null;
1772     }
1773 
1774     // TODO: add support for following to DTD annotations, and rework API
1775 
1776     static final Set<String> SPACED_VALUES = ImmutableSet.of(
1777         "idValidity",
1778         "languageGroup");
1779 
getValueSplitter(XPathParts pathPlain)1780     public static Splitter getValueSplitter(XPathParts pathPlain) {
1781         if (!Collections.disjoint(pathPlain.getElements(), SPACED_VALUES)) {
1782             return SPACE_SPLITTER;
1783         } else if (pathPlain.getElement(-1).equals("annotation")
1784             && !pathPlain.getAttributeKeys(-1).contains("tts")) {
1785             return BAR_SPLITTER;
1786         }
1787         return CR_SPLITTER;
1788     }
1789 
isComment(XPathParts pathPlain, String line)1790     public static boolean isComment(XPathParts pathPlain, String line) {
1791         if (pathPlain.contains("transform")) {
1792             if (line.startsWith("#")) {
1793                 return true;
1794             }
1795         }
1796         return false;
1797     }
1798 
isExtraSplit(String extraPath)1799     public static boolean isExtraSplit(String extraPath) {
1800         if (extraPath.endsWith("/_type") && extraPath.startsWith("//supplementalData/metaZones/mapTimezones")) {
1801             return true;
1802         }
1803         return false;
1804     }
1805 
1806     /**
1807      * Return the value status for an EAV
1808      */
getValueStatus(String elementName, String attributeName, String value)1809     public ValueStatus getValueStatus(String elementName, String attributeName, String value) {
1810         Element element = nameToElement.get(elementName);
1811         if (element == null) {
1812             return ValueStatus.invalid;
1813         }
1814         Attribute attr = element.getAttributeNamed(attributeName);
1815         if (attr == null) {
1816             return ValueStatus.invalid;
1817         }
1818         return attr.getValueStatus(value);
1819     }
1820 
1821     /**
1822      * Return element-attribute pairs with non-enumerated values, for quick checks.
1823      */
getNonEnumerated(Map<String,String> matchValues)1824     public Multimap<String, String> getNonEnumerated(Map<String,String> matchValues) {
1825         Multimap<String,String> nonEnumeratedElementToAttribute = TreeMultimap.create(); // make tree for ease of debugging
1826         for (Entry<String, Element> entry : nameToElement.entrySet()) {
1827             Element element = entry.getValue();
1828             for (Attribute attribute : element.attributes.keySet()) {
1829                 if (attribute.type != AttributeType.ENUMERATED_TYPE) {
1830                     String elementName = element.getName();
1831                     String attrName = attribute.getName();
1832                     nonEnumeratedElementToAttribute.put(elementName, attrName);
1833                     if (attribute.matchValue != null) {
1834                         matchValues.put(elementName + "\t" + attrName, attribute.matchValue.getName());
1835                     }
1836                 }
1837             }
1838         }
1839         return ImmutableSetMultimap.copyOf(nonEnumeratedElementToAttribute);
1840     }
1841 }
1842