• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.util;
2 
3 import com.google.common.base.CharMatcher;
4 import com.google.common.base.Joiner;
5 import com.google.common.base.Splitter;
6 import com.google.common.collect.ImmutableMultimap;
7 import com.google.common.collect.ImmutableSet;
8 import com.google.common.collect.ImmutableSet.Builder;
9 import com.google.common.collect.ImmutableSetMultimap;
10 import com.google.common.collect.Multimap;
11 import com.google.common.collect.TreeMultimap;
12 import com.ibm.icu.impl.Relation;
13 import com.ibm.icu.text.Transform;
14 import java.io.File;
15 import java.io.StringReader;
16 import java.util.ArrayList;
17 import java.util.Arrays;
18 import java.util.Collection;
19 import java.util.Collections;
20 import java.util.Comparator;
21 import java.util.HashMap;
22 import java.util.HashSet;
23 import java.util.Iterator;
24 import java.util.LinkedHashMap;
25 import java.util.LinkedHashSet;
26 import java.util.List;
27 import java.util.Locale;
28 import java.util.Map;
29 import java.util.Map.Entry;
30 import java.util.Set;
31 import java.util.Stack;
32 import java.util.TreeMap;
33 import java.util.concurrent.ConcurrentHashMap;
34 import java.util.concurrent.ConcurrentMap;
35 import java.util.regex.Pattern;
36 import org.unicode.cldr.util.DtdData.Element.ValueConstraint;
37 import org.unicode.cldr.util.MatchValue.LiteralMatchValue;
38 import org.unicode.cldr.util.personname.PersonNameFormatter;
39 
40 /**
41  * An immutable object that contains the structure of a DTD.
42  *
43  * @author markdavis
44  */
45 public class DtdData extends XMLFileReader.SimpleHandler {
46     private static final String COMMENT_PREFIX = System.lineSeparator() + "    ";
47     private static final boolean SHOW_ALL = CldrUtility.getProperty("show_all", false);
48     private static final boolean USE_SYNTHESIZED = false;
49 
50     private static final boolean DEBUG = false;
51     private static final Pattern FILLER = PatternCache.get("[^-a-zA-Z0-9#_:]");
52 
53     private final Relation<String, Attribute> nameToAttributes =
54             Relation.of(new TreeMap<String, Set<Attribute>>(), LinkedHashSet.class);
55     private Map<String, Element> nameToElement = new HashMap<>();
56     private MapComparator<String> elementComparator;
57     private MapComparator<String> attributeComparator;
58 
59     // TODO Make this data driven. See https://unicode-org.atlassian.net/browse/CLDR-17321
60     public static final Multimap<DtdType, String> HACK_PCDATA_ALLOWS_EMPTY =
61             ImmutableMultimap.<DtdType, String>builder()
62                     .putAll(
63                             DtdType.ldml,
64                             "nameOrderLocales",
65                             "foreignSpaceReplacement",
66                             "nativeSpaceReplacement",
67                             "language",
68                             "script",
69                             "region",
70                             "variant",
71                             "territory")
72                     .putAll(DtdType.supplementalData, "variable", "attributeValues")
73                     .build();
74 
75     public final Element ROOT;
76     public final Element PCDATA = elementFrom("#PCDATA");
77     public final Element ANY = elementFrom("ANY");
78     public final DtdType dtdType;
79     public final String version;
80     private Element lastElement;
81     private Attribute lastAttribute;
82     private Set<String> preCommentCache;
83     private DtdComparator dtdComparator;
84 
85     public enum AttributeStatus {
86         distinguished("§d"),
87         value("§v"),
88         metadata("§m︎");
89         public final String shortName;
90 
AttributeStatus(String shortName)91         AttributeStatus(String shortName) {
92             this.shortName = shortName;
93         }
94 
getShortName(AttributeStatus status)95         public static String getShortName(AttributeStatus status) {
96             return status == null ? "" : status.shortName;
97         }
98     }
99 
100     public enum Mode {
101         REQUIRED("#REQUIRED"),
102         OPTIONAL("#IMPLIED"),
103         FIXED("#FIXED"),
104         NULL("null");
105 
106         public final String source;
107 
Mode(String s)108         Mode(String s) {
109             source = s;
110         }
111 
forString(String mode)112         public static Mode forString(String mode) {
113             for (Mode value : Mode.values()) {
114                 if (value.source.equals(mode)) {
115                     return value;
116                 }
117             }
118             if (mode == null) {
119                 return NULL;
120             }
121             throw new IllegalArgumentException(mode);
122         }
123     }
124 
125     public enum AttributeType {
126         CDATA,
127         ID,
128         IDREF,
129         IDREFS,
130         ENTITY,
131         ENTITIES,
132         NMTOKEN,
133         NMTOKENS,
134         ENUMERATED_TYPE
135     }
136 
137     static final Set<String> DRAFT_ON_NON_LEAF_ALLOWED =
138             ImmutableSet.of("collation", "transform", "unitPreferenceData", "rulesetGrouping");
139 
140     public static class Attribute implements Named {
141         private static final Joiner JOINER_COMMA_SPACE = Joiner.on(", ");
142         public static final String AUG_TRAIL = "⟫";
143         public static final String AUG_LEAD = "⟪";
144         public static final String ENUM_TRAIL = "⟩";
145         public static final String ENUM_LEAD = "⟨";
146         public static final Pattern LEAD_TRAIL =
147                 Pattern.compile(
148                         "(.*["
149                                 + AUG_LEAD
150                                 + ENUM_LEAD
151                                 + "])(.*)(["
152                                 + AUG_TRAIL
153                                 + ENUM_TRAIL
154                                 + "].*)");
155         public final String name;
156         public final Element element;
157         public final Mode mode;
158         public final String defaultValue;
159         public final AttributeType type;
160         public final Map<String, Integer> values; // immutable
161         private final Set<String> commentsPre;
162         private Set<String> commentsPost;
163         private boolean isDeprecatedAttribute;
164         private boolean attributeAllowsUEscape = false;
165         public AttributeStatus attributeStatus =
166                 AttributeStatus.distinguished; // default unless reset by annotations, or for xml:
167         // attributes
168         private Set<String> deprecatedValues = Collections.emptySet();
169         public MatchValue matchValue;
170         private final Comparator<String> attributeValueComparator;
171 
Attribute( DtdType dtdType, Element element2, String aName, Mode mode2, String[] split, String value2, Set<String> firstComment)172         private Attribute(
173                 DtdType dtdType,
174                 Element element2,
175                 String aName,
176                 Mode mode2,
177                 String[] split,
178                 String value2,
179                 Set<String> firstComment) {
180             commentsPre = firstComment;
181             element = element2;
182             name = aName.intern();
183             if (name.equals("draft") // normally never permitted on elements with children, but
184                     // special cases...
185                     && dtdType == DtdType.ldml
186                     && !DRAFT_ON_NON_LEAF_ALLOWED.contains(element.getName())) {
187                 int elementChildrenCount = element.getChildren().size();
188                 if (elementChildrenCount > 1
189                         || elementChildrenCount == 1
190                                 && !element.getChildren()
191                                         .keySet()
192                                         .iterator()
193                                         .next()
194                                         .getName()
195                                         .equals("cp")) {
196                     isDeprecatedAttribute = true;
197                     if (DEBUG) {
198                         System.out.println(element.getName() + ":" + element.getChildren());
199                     }
200                 }
201             } else if (name.startsWith("xml:")) {
202                 attributeStatus = AttributeStatus.metadata;
203             }
204             mode = mode2;
205             defaultValue = value2 == null ? null : value2.intern();
206             AttributeType _type = AttributeType.ENUMERATED_TYPE;
207             Map<String, Integer> _values = Collections.emptyMap();
208             if (split.length == 1) {
209                 try {
210                     _type = AttributeType.valueOf(split[0]);
211                 } catch (Exception e) {
212                 }
213             }
214             type = _type;
215 
216             if (_type == AttributeType.ENUMERATED_TYPE) {
217                 LinkedHashMap<String, Integer> temp = new LinkedHashMap<>();
218                 for (String part : split) {
219                     if (part.length() != 0) {
220                         temp.put(part.intern(), temp.size());
221                     }
222                 }
223                 _values = Collections.unmodifiableMap(temp);
224             }
225             values = _values;
226             attributeValueComparator = getAttributeValueComparator(dtdType, element.name, name);
227         }
228 
229         @Override
toString()230         public String toString() {
231             return element.name + ":" + name;
232         }
233 
getSampleValue()234         public String getSampleValue() {
235             return type == AttributeType.ENUMERATED_TYPE
236                     ? (values.containsKey("year") ? "year" : values.keySet().iterator().next())
237                     : matchValue != null ? matchValue.getSample() : MatchValue.DEFAULT_SAMPLE;
238         }
239 
appendDtdString(StringBuilder b)240         public StringBuilder appendDtdString(StringBuilder b) {
241             Attribute a = this;
242             b.append("<!ATTLIST " + element.name + " " + a.name);
243             boolean first;
244             if (a.type == AttributeType.ENUMERATED_TYPE) {
245                 b.append(" (");
246                 first = true;
247                 for (String s : a.values.keySet()) {
248                     if (deprecatedValues.contains(s)) {
249                         continue;
250                     }
251                     if (first) {
252                         first = false;
253                     } else {
254                         b.append(" | ");
255                     }
256                     b.append(s);
257                 }
258                 b.append(")");
259             } else {
260                 b.append(' ').append(a.type);
261             }
262             if (a.mode != Mode.NULL) {
263                 b.append(" ").append(a.mode.source);
264             }
265             if (a.defaultValue != null) {
266                 b.append(" \"").append(a.defaultValue).append('"');
267             }
268             b.append(" >");
269             return b;
270         }
271 
features()272         public String features() {
273             return (type == AttributeType.ENUMERATED_TYPE
274                             ? values.keySet().toString()
275                             : type.toString())
276                     + (mode == Mode.NULL ? "" : ", mode=" + mode)
277                     + (defaultValue == null ? "" : ", default=" + defaultValue);
278         }
279 
280         @Override
getName()281         public String getName() {
282             return name;
283         }
284 
285         private static Splitter COMMA = Splitter.on(',').trimResults();
286 
addComment(String commentIn)287         public void addComment(String commentIn) {
288             if (commentIn.startsWith("@")) {
289                 switch (commentIn) {
290                     case "@METADATA":
291                         attributeStatus = AttributeStatus.metadata;
292                         break;
293                     case "@VALUE":
294                         attributeStatus = AttributeStatus.value;
295                         break;
296                     case "@DEPRECATED":
297                         isDeprecatedAttribute = true;
298                         break;
299                     case "@ALLOWS_UESC":
300                         attributeAllowsUEscape = true;
301                         break;
302 
303                     default:
304                         int colonPos = commentIn.indexOf(':');
305                         if (colonPos < 0) {
306                             throw new IllegalArgumentException(
307                                     element.name
308                                             + " "
309                                             + name
310                                             + "= : Unrecognized ATTLIST annotation: "
311                                             + commentIn);
312                         }
313                         String command = commentIn.substring(0, colonPos);
314                         String argument = commentIn.substring(colonPos + 1);
315                         switch (command) {
316                             case "@DEPRECATED":
317                                 deprecatedValues =
318                                         Collections.unmodifiableSet(
319                                                 new HashSet<>(COMMA.splitToList(argument)));
320                                 break;
321                             case "@MATCH":
322                                 if (matchValue != null) {
323                                     throw new IllegalArgumentException(
324                                             element.name
325                                                     + " "
326                                                     + name
327                                                     + "= : Conflicting @MATCH: "
328                                                     + matchValue.getName()
329                                                     + " & "
330                                                     + argument);
331                                 }
332                                 matchValue = MatchValue.of(argument);
333                                 break;
334                             default:
335                                 throw new IllegalArgumentException(
336                                         element.name
337                                                 + " "
338                                                 + name
339                                                 + "= : Unrecognized ATTLIST annotation: "
340                                                 + commentIn);
341                         }
342                 }
343                 return;
344             }
345             commentsPost = addUnmodifiable(commentsPost, commentIn.trim());
346         }
347 
348         /** Special version of identity; only considers name and name of element */
349         @Override
equals(Object obj)350         public boolean equals(Object obj) {
351             if (!(obj instanceof Attribute)) {
352                 return false;
353             }
354             Attribute that = (Attribute) obj;
355             return name.equals(that.name)
356                     && element.name.equals(
357                             that.element.name) // don't use plain element: circularity
358             // not relevant to identity
359             //                && Objects.equals(comment, that.comment)
360             //                && mode.equals(that.mode)
361             //                && Objects.equals(defaultValue, that.defaultValue)
362             //                && type.equals(that.type)
363             //                && values.equals(that.values)
364             ;
365         }
366 
367         /** Special version of identity; only considers name and name of element */
368         @Override
hashCode()369         public int hashCode() {
370             return name.hashCode() * 37
371                     + element.name.hashCode() // don't use plain element: circularity
372             // not relevant to identity
373             //                ) * 37 + Objects.hashCode(comment)) * 37
374             //                + mode.hashCode()) * 37
375             //                + Objects.hashCode(defaultValue)) * 37
376             //                + type.hashCode()) * 37
377             //                + values.hashCode()
378             ;
379         }
380 
isDeprecated()381         public boolean isDeprecated() {
382             return isDeprecatedAttribute;
383         }
384 
allowsUEscape()385         public boolean allowsUEscape() {
386             return attributeAllowsUEscape;
387         }
388 
isDeprecatedValue(String value)389         public boolean isDeprecatedValue(String value) {
390             return deprecatedValues.contains(value);
391         }
392 
getStatus()393         public AttributeStatus getStatus() {
394             return attributeStatus;
395         }
396 
getValueStatus(String value)397         public ValueStatus getValueStatus(String value) {
398             return deprecatedValues.contains(value)
399                     ? ValueStatus.invalid
400                     : type == AttributeType.ENUMERATED_TYPE
401                             ? (values.containsKey(value) ? ValueStatus.valid : ValueStatus.invalid)
402                             : matchValue == null
403                                     ? ValueStatus.unknown
404                                     : matchValue.is(value)
405                                             ? ValueStatus.valid
406                                             : ValueStatus.invalid;
407         }
408 
getMatchString()409         public String getMatchString() {
410             return type == AttributeType.ENUMERATED_TYPE
411                     ? ENUM_LEAD + JOINER_COMMA_SPACE.join(values.keySet()) + ENUM_TRAIL
412                     : matchValue != null ? AUG_LEAD + matchValue.toString() + AUG_TRAIL : "";
413         }
414 
getMatchLiterals()415         public Set<String> getMatchLiterals() {
416             if (type == AttributeType.ENUMERATED_TYPE) {
417                 return values.keySet();
418             } else if (matchValue != null && matchValue instanceof LiteralMatchValue) {
419                 return ((LiteralMatchValue) matchValue).getItems();
420             }
421             return null;
422         }
423 
getMatchingName(Map<Attribute, Integer> attributes)424         public Attribute getMatchingName(Map<Attribute, Integer> attributes) {
425             for (Attribute attribute : attributes.keySet()) {
426                 if (name.equals(attribute.getName())) {
427                     return attribute;
428                 }
429             }
430             return null;
431         }
432     }
433 
434     public enum ValueStatus {
435         invalid,
436         unknown,
437         valid
438     }
439 
DtdData(DtdType type, String version)440     private DtdData(DtdType type, String version) {
441         this.dtdType = type;
442         this.ROOT = elementFrom(type.rootElement());
443         this.version = version;
444     }
445 
addAttribute(String eName, String aName, String type, String mode, String value)446     private void addAttribute(String eName, String aName, String type, String mode, String value) {
447         Attribute a =
448                 new Attribute(
449                         dtdType,
450                         nameToElement.get(eName),
451                         aName,
452                         Mode.forString(mode),
453                         FILLER.split(type),
454                         value,
455                         preCommentCache);
456         preCommentCache = null;
457         getAttributesFromName().put(aName, a);
458         CldrUtility.putNew(a.element.attributes, a, a.element.attributes.size());
459         lastElement = null;
460         lastAttribute = a;
461     }
462 
463     public enum ElementType {
464         EMPTY,
465         ANY,
466         PCDATA("(#PCDATA)"),
467         CHILDREN;
468         public final String source;
469 
ElementType(String s)470         private ElementType(String s) {
471             source = s;
472         }
473 
ElementType()474         private ElementType() {
475             source = name();
476         }
477     }
478 
479     interface Named {
getName()480         String getName();
481     }
482 
483     public enum ElementStatus {
484         regular,
485         metadata
486     }
487 
488     public static class Element implements Named {
489         public enum ValueConstraint {
490             empty,
491             nonempty,
492             any
493         }
494 
495         public final String name;
496         private String rawModel;
497         private ElementType type;
498         private final Map<Element, Integer> children = new LinkedHashMap<>();
499         private final Map<Attribute, Integer> attributes = new LinkedHashMap<>();
500         private Set<String> commentsPre;
501         private Set<String> commentsPost;
502         private String model;
503         private boolean isOrderedElement;
504         private boolean isDeprecatedElement;
505         private boolean isTechPreviewElement;
506         private ElementStatus elementStatus = ElementStatus.regular;
507         private ValueConstraint valueConstraint = ValueConstraint.nonempty;
508 
Element(String name2)509         private Element(String name2) {
510             name = name2.intern();
511         }
512 
setChildren(DtdData dtdData, String model, Set<String> precomments)513         private void setChildren(DtdData dtdData, String model, Set<String> precomments) {
514             this.commentsPre = precomments;
515             rawModel = model;
516             this.model = clean(model);
517             valueConstraint = ValueConstraint.empty;
518             if (model.equals("EMPTY")) {
519                 type = ElementType.EMPTY;
520                 return;
521             }
522             type = ElementType.CHILDREN;
523             for (String part : FILLER.split(model)) {
524                 if (part.length() != 0) {
525                     if (part.equals("#PCDATA")) {
526                         type = ElementType.PCDATA;
527                         if (HACK_PCDATA_ALLOWS_EMPTY.get(dtdData.dtdType).contains(name)) {
528                             // TODO move to @ annotation in .dtd file
529                             valueConstraint = ValueConstraint.any;
530                         } else {
531                             valueConstraint = ValueConstraint.nonempty;
532                         }
533                     } else if (part.equals("ANY")) {
534                         type = ElementType.ANY;
535                     } else {
536                         CldrUtility.putNew(children, dtdData.elementFrom(part), children.size());
537                     }
538                 }
539             }
540             if ((type == ElementType.CHILDREN) == (children.size() == 0)
541                     && !model.startsWith("(#PCDATA|cp")) {
542                 throw new IllegalArgumentException(
543                         "CLDR does not permit Mixed content. " + name + ":" + model);
544             }
545         }
546 
547         static final Pattern CLEANER1 = PatternCache.get("([,|(])(?=\\S)");
548         static final Pattern CLEANER2 = PatternCache.get("(?=\\S)([|)])");
549 
clean(String model2)550         private String clean(String model2) {
551             // (x) -> ( x );
552             // x,y -> x, y
553             // x|y -> x | y
554             String result = CLEANER1.matcher(model2).replaceAll("$1 ");
555             result = CLEANER2.matcher(result).replaceAll(" $1");
556             return result.equals(model2) ? model2 : result; // for debugging
557         }
558 
containsAttribute(String string)559         public boolean containsAttribute(String string) {
560             for (Attribute a : attributes.keySet()) {
561                 if (a.name.equals(string)) {
562                     return true;
563                 }
564             }
565             return false;
566         }
567 
568         @Override
toString()569         public String toString() {
570             return name;
571         }
572 
toDtdString()573         public String toDtdString() {
574             return "<!ELEMENT " + name + " " + getRawModel() + " >";
575         }
576 
getType()577         public ElementType getType() {
578             return type;
579         }
580 
getChildren()581         public Map<Element, Integer> getChildren() {
582             return Collections.unmodifiableMap(children);
583         }
584 
getAttributes()585         public Map<Attribute, Integer> getAttributes() {
586             return Collections.unmodifiableMap(attributes);
587         }
588 
589         @Override
getName()590         public String getName() {
591             return name;
592         }
593 
getChildNamed(String string)594         public Element getChildNamed(String string) {
595             for (Element e : children.keySet()) {
596                 if (e.name.equals(string)) {
597                     return e;
598                 }
599             }
600             return null;
601         }
602 
getAttributeNamed(String string)603         public Attribute getAttributeNamed(String string) {
604             for (Attribute a : attributes.keySet()) {
605                 if (a.name.equals(string)) {
606                     return a;
607                 }
608             }
609             return null;
610         }
611 
addComment(String addition)612         public void addComment(String addition) {
613             if (addition.startsWith("@")) {
614                 // there are exactly 4 cases: deprecated, ordered, techPreview and metadata
615                 switch (addition) {
616                     case "@ORDERED":
617                         isOrderedElement = true;
618                         break;
619                     case "@DEPRECATED":
620                         isDeprecatedElement = true;
621                         break;
622                     case "@METADATA":
623                         elementStatus = ElementStatus.metadata;
624                         break;
625                     case "@TECHPREVIEW":
626                         isTechPreviewElement = true;
627                         break;
628                     default:
629                         if (addition.startsWith("@MATCH") || addition.startsWith("@VALUE")) {
630                             // Try to catch this case
631                             throw new IllegalArgumentException(
632                                     name
633                                             + ": Unrecognized ELEMENT annotation (this isn't ATTLIST!): "
634                                             + addition);
635                         } else {
636                             throw new IllegalArgumentException(
637                                     name + ": Unrecognized ELEMENT annotation: " + addition);
638                         }
639                 }
640                 return;
641             }
642             commentsPost = addUnmodifiable(commentsPost, addition.trim());
643         }
644 
645         /** Special version of equals. Only the name is considered in the identity. */
646         @Override
equals(Object obj)647         public boolean equals(Object obj) {
648             if (!(obj instanceof Element)) {
649                 return false;
650             }
651             Element that = (Element) obj;
652             return name.equals(that.name)
653             // not relevant to the identity of the object
654             //                && Objects.equals(comment, that.comment)
655             //                && type == that.type
656             //                && attributes.equals(that.attributes)
657             //                && children.equals(that.children)
658             ;
659         }
660 
661         /** Special version of hashcode. Only the name is considered in the identity. */
662         @Override
hashCode()663         public int hashCode() {
664             return name.hashCode()
665             // not relevant to the identity of the object
666             // * 37 + Objects.hashCode(comment)
667             // ) * 37 + Objects.hashCode(type)
668             //                ) * 37 + attributes.hashCode()
669             //                ) * 37 + children.hashCode()
670             ;
671         }
672 
isDeprecated()673         public boolean isDeprecated() {
674             return isDeprecatedElement;
675         }
676 
isOrdered()677         public boolean isOrdered() {
678             return isOrderedElement;
679         }
680 
isTechPreview()681         public boolean isTechPreview() {
682             return isTechPreviewElement;
683         }
684 
getElementStatus()685         public ElementStatus getElementStatus() {
686             return elementStatus;
687         }
688 
getValueConstraint()689         public ValueConstraint getValueConstraint() {
690             return valueConstraint;
691         }
692 
693         /**
694          * @return the rawModel
695          */
getRawModel()696         public String getRawModel() {
697             return rawModel;
698         }
699     }
700 
elementFrom(String name)701     private Element elementFrom(String name) {
702         Element result = nameToElement.get(name);
703         if (result == null) {
704             nameToElement.put(name, result = new Element(name));
705         }
706         return result;
707     }
708 
addElement(String name2, String model)709     private void addElement(String name2, String model) {
710         Element element = elementFrom(name2);
711         element.setChildren(this, model, preCommentCache);
712         preCommentCache = null;
713         lastElement = element;
714         lastAttribute = null;
715     }
716 
addComment(String comment)717     private void addComment(String comment) {
718         comment = comment.trim();
719         if (preCommentCache != null || comment.startsWith("#")) { // the precomments are "sticky"
720             if (comment.startsWith("@")) {
721                 throw new IllegalArgumentException(
722                         "@ annotation comment must follow element or attribute, without intervening # comment");
723             }
724             preCommentCache = addUnmodifiable(preCommentCache, comment);
725         } else if (lastElement != null) {
726             lastElement.addComment(comment);
727         } else if (lastAttribute != null) {
728             lastAttribute.addComment(comment);
729         } else {
730             if (comment.startsWith("@")) {
731                 throw new IllegalArgumentException(
732                         "@ annotation comment must follow element or attribute, without intervening # comment");
733             }
734             preCommentCache = addUnmodifiable(preCommentCache, comment);
735         }
736     }
737 
738     // TODO hide this
739     /**
740      * @deprecated
741      */
742     @Deprecated
743     @Override
handleElementDecl(String name, String model)744     public void handleElementDecl(String name, String model) {
745         if (SHOW_ALL) {
746             // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?,
747             // contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?,
748             // listPatterns?, collations?, posix?, segmentations?, rbnf?, annotations?, metadata?,
749             // references?, special*))) >
750             System.out.println(System.lineSeparator() + "<!ELEMENT " + name + " " + model + " >");
751         }
752         addElement(name, model);
753     }
754 
755     // TODO hide this
756     /**
757      * @deprecated
758      */
759     @Deprecated
760     @Override
handleStartDtd(String name, String publicId, String systemId)761     public void handleStartDtd(String name, String publicId, String systemId) {
762         DtdType explicitDtdType = DtdType.valueOf(name);
763         if (explicitDtdType != dtdType && explicitDtdType != dtdType.rootType) {
764             throw new IllegalArgumentException("Mismatch in dtdTypes");
765         }
766     }
767 
768     /**
769      * @deprecated
770      */
771     @Deprecated
772     @Override
handleAttributeDecl( String eName, String aName, String type, String mode, String value)773     public void handleAttributeDecl(
774             String eName, String aName, String type, String mode, String value) {
775         if (SHOW_ALL) {
776             // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true |
777             // false ) #IMPLIED >
778             // <!ATTLIST version number CDATA #REQUIRED >
779             // <!ATTLIST version cldrVersion CDATA #FIXED "27" >
780 
781             System.out.println(
782                     "<!ATTLIST "
783                             + eName
784                             + " "
785                             + aName
786                             + " "
787                             + type
788                             + " "
789                             + mode
790                             + (value == null ? "" : " \"" + value + "\"")
791                             + " >");
792         }
793         // HACK for 1.1.1
794         if (eName.equals("draft")) {
795             eName = "week";
796         }
797         addAttribute(eName, aName, type, mode, value);
798     }
799 
800     /**
801      * @deprecated
802      */
803     @Deprecated
804     @Override
handleComment(String path, String comment)805     public void handleComment(String path, String comment) {
806         if (comment.contains("Copyright")) {
807             // Zap the copyright comment, replace it with the current one.
808             comment = CldrUtility.getCopyrightString();
809         }
810         if (SHOW_ALL) {
811             // <!-- true and false are deprecated. -->
812             System.out.println("<!-- " + comment.trim() + " -->");
813         }
814         addComment(comment);
815     }
816 
817     // TODO hide this
818     /**
819      * @deprecated
820      */
821     @Deprecated
822     @Override
handleEndDtd()823     public void handleEndDtd() {
824         throw new XMLFileReader.AbortException();
825     }
826 
827     /**
828      * Note that it always gets the trunk version
829      *
830      * @deprecated depends on static config, use {@link DtdData#getInstance(DtdType, File)} instead
831      */
832     @Deprecated
getInstance(DtdType type)833     public static DtdData getInstance(DtdType type) {
834         return getInstance(type, CLDRConfig.getInstance().getCldrBaseDirectory());
835     }
836 
837     /** Special form using version, used only by tests, etc. */
getInstance(DtdType type, String version)838     public static DtdData getInstance(DtdType type, String version) {
839         // Map out versions that had no DTD
840         if (version != null) {
841             switch (version) {
842                 case "1.1.1":
843                     version = "1.1";
844                     break;
845                 case "1.4.1":
846                     version = "1.4";
847                     break;
848                 case "1.5.1":
849                     version = "1.5.0.1";
850                     break;
851                 default:
852             }
853         }
854         File directory =
855                 version == null
856                         ? CLDRConfig.getInstance().getCldrBaseDirectory()
857                         : new File(CLDRPaths.ARCHIVE_DIRECTORY + "/cldr-" + version);
858 
859         return getInstance(type, version, directory);
860     }
861 
862     private static final ConcurrentMap<Pair<DtdType, File>, DtdData> CACHE =
863             new ConcurrentHashMap<>();
864 
865     /**
866      * Normal version of DtdData Get a DtdData, given the CLDR root directory.
867      *
868      * @param type which DtdType to return
869      * @param directory the CLDR Root directory, which contains the "common" directory.
870      * @return
871      */
getInstance(DtdType type, File directory)872     public static DtdData getInstance(DtdType type, File directory) {
873         Pair<DtdType, File> key = new Pair<>(type, directory);
874         DtdData data = CACHE.computeIfAbsent(key, k -> getInstance(type, null, directory));
875         return data;
876     }
877 
getInstance(DtdType type, String version, File directory)878     private static DtdData getInstance(DtdType type, String version, File directory) {
879         DtdData simpleHandler = new DtdData(type, version);
880         XMLFileReader xfr = new XMLFileReader().setHandler(simpleHandler);
881         if (type != type.rootType) {
882             // read the real first, then add onto it.
883             readFile(type.rootType, xfr, directory);
884         }
885         readFile(type, xfr, directory);
886         // HACK
887         if (type == DtdType.ldmlICU) {
888             Element special = simpleHandler.nameToElement.get("special");
889             for (String extraElementName :
890                     Arrays.asList(
891                             "icu:breakIteratorData",
892                             "icu:UCARules",
893                             "icu:scripts",
894                             "icu:transforms",
895                             "icu:ruleBasedNumberFormats",
896                             "icu:isLeapMonth",
897                             "icu:version",
898                             "icu:breakDictionaryData",
899                             "icu:depends")) {
900                 Element extraElement = simpleHandler.nameToElement.get(extraElementName);
901                 special.children.put(extraElement, special.children.size());
902             }
903         }
904         if (simpleHandler.ROOT.children.size() == 0) {
905             throw new IllegalArgumentException(
906                     "Internal Error: DtdData.getInstance("
907                             + type
908                             + ", ...): readFile() failed to return any children!");
909             // should never happen
910         }
911         simpleHandler.finish();
912         simpleHandler.freeze();
913         return simpleHandler;
914     }
915 
finish()916     private void finish() {
917         dtdComparator = new DtdComparator();
918     }
919 
readFile(DtdType type, XMLFileReader xfr, File directory)920     public static void readFile(DtdType type, XMLFileReader xfr, File directory) {
921         File file = new File(directory, type.dtdPath);
922         StringReader s =
923                 new StringReader(
924                         "<?xml version='1.0' encoding='UTF-8' ?>"
925                                 + "<!DOCTYPE "
926                                 + type
927                                 + " SYSTEM '"
928                                 + file.getAbsolutePath()
929                                 + "'>");
930         try {
931             xfr.read(type.toString(), s, -1, true); //  DTD_TYPE_TO_FILE.get(type)
932         } catch (IllegalArgumentException iae) {
933             // rethrow
934             throw new IllegalArgumentException("Error while reading " + type, iae);
935         }
936     }
937 
freeze()938     private void freeze() {
939         if (version == null) { // only generate for new versions
940             MergeLists<String> elementMergeList = new MergeLists<>();
941             elementMergeList.add(dtdType.toString());
942             MergeLists<String> attributeMergeList = new MergeLists<>();
943             attributeMergeList.add("_q");
944 
945             for (Element element : nameToElement.values()) {
946                 if (element.children.size() > 0) {
947                     Collection<String> names = getNames(element.children.keySet());
948                     elementMergeList.add(names);
949                     if (DEBUG) {
950                         System.out.println(element.getName() + "\t→\t" + names);
951                     }
952                 }
953                 if (element.attributes.size() > 0) {
954                     Collection<String> names = getNames(element.attributes.keySet());
955                     attributeMergeList.add(names);
956                     if (DEBUG) {
957                         System.out.println(element.getName() + "\t→\t@" + names);
958                     }
959                 }
960             }
961             List<String> elementList = elementMergeList.merge();
962             List<String> attributeList = attributeMergeList.merge();
963             if (DEBUG) {
964                 System.out.println("Element Ordering:\t" + elementList);
965                 System.out.println("Attribute Ordering:\t" + attributeList);
966             }
967             elementComparator = new MapComparator<>(elementList).setErrorOnMissing(true).freeze();
968             attributeComparator =
969                     new MapComparator<>(attributeList).setErrorOnMissing(true).freeze();
970         }
971         nameToAttributes.freeze();
972         nameToElement = Collections.unmodifiableMap(nameToElement);
973     }
974 
getNames(Collection<? extends Named> keySet)975     private Collection<String> getNames(Collection<? extends Named> keySet) {
976         List<String> result = new ArrayList<>();
977         for (Named e : keySet) {
978             result.add(e.getName());
979         }
980         return result;
981     }
982 
983     public enum DtdItem {
984         ELEMENT,
985         ATTRIBUTE,
986         ATTRIBUTE_VALUE
987     }
988 
989     public interface AttributeValueComparator {
compare(String element, String attribute, String value1, String value2)990         public int compare(String element, String attribute, String value1, String value2);
991     }
992 
getDtdComparator(AttributeValueComparator avc)993     public Comparator<String> getDtdComparator(AttributeValueComparator avc) {
994         return dtdComparator;
995     }
996 
getDtdComparator()997     public DtdComparator getDtdComparator() {
998         return dtdComparator;
999     }
1000 
1001     public class DtdComparator implements Comparator<String> {
1002         @Override
compare(String path1, String path2)1003         public int compare(String path1, String path2) {
1004             XPathParts a = XPathParts.getFrozenInstance(path1);
1005             XPathParts b = XPathParts.getFrozenInstance(path2);
1006             return xpathComparator(a, b);
1007         }
1008 
xpathComparator(XPathParts a, XPathParts b)1009         public int xpathComparator(XPathParts a, XPathParts b) {
1010             // there must always be at least one element
1011             String baseA = a.getElement(0);
1012             String baseB = b.getElement(0);
1013             if (!ROOT.name.equals(baseA) || !ROOT.name.equals(baseB)) {
1014                 throw new IllegalArgumentException(
1015                         "Comparing different DTDs: " + ROOT.name + ", " + baseA + ", " + baseB);
1016             }
1017             int min = Math.min(a.size(), b.size());
1018             Element parent = ROOT;
1019             Element elementA;
1020             for (int i = 1; i < min; ++i, parent = elementA) {
1021                 // add extra test for "fake" elements, used in diffing. they always start with _
1022                 String elementRawA = a.getElement(i);
1023                 String elementRawB = b.getElement(i);
1024                 if (elementRawA.startsWith("_")) {
1025                     return elementRawB.startsWith("_") ? elementRawA.compareTo(elementRawB) : -1;
1026                 } else if (elementRawB.startsWith("_")) {
1027                     return 1;
1028                 }
1029                 //
1030                 elementA = nameToElement.get(elementRawA);
1031                 Element elementB = nameToElement.get(elementRawB);
1032                 if (elementA != elementB) {
1033                     int aa = parent.children.get(elementA);
1034                     int bb = parent.children.get(elementB);
1035                     return aa - bb;
1036                 }
1037                 int countA = a.getAttributeCount(i);
1038                 int countB = b.getAttributeCount(i);
1039                 if (countA == 0 && countB == 0) {
1040                     continue;
1041                 }
1042                 // we have two ways to compare the attributes. One based on the dtd,
1043                 // and one based on explicit comparators
1044 
1045                 // at this point the elements are the same and correspond to elementA
1046                 // in the dtd
1047 
1048                 // Handle the special added elements
1049                 String aqValue = a.getAttributeValue(i, "_q");
1050                 if (aqValue != null) {
1051                     String bqValue = b.getAttributeValue(i, "_q");
1052                     if (!aqValue.equals(bqValue)) {
1053                         int aValue = Integer.parseInt(aqValue);
1054                         int bValue = Integer.parseInt(bqValue);
1055                         return aValue - bValue;
1056                     }
1057                     --countA;
1058                     --countB;
1059                 }
1060 
1061                 attributes:
1062                 for (Entry<Attribute, Integer> attr : elementA.attributes.entrySet()) {
1063                     Attribute main = attr.getKey();
1064                     String valueA = a.getAttributeValue(i, main.name);
1065                     String valueB = b.getAttributeValue(i, main.name);
1066                     if (valueA == null) {
1067                         if (valueB != null) {
1068                             return -1;
1069                         }
1070                     } else if (valueB == null) {
1071                         return 1;
1072                     } else if (valueA.equals(valueB)) {
1073                         --countA;
1074                         --countB;
1075                         if (countA == 0 && countB == 0) {
1076                             break attributes;
1077                         }
1078                         continue; // TODO
1079                     } else if (main.attributeValueComparator != null) {
1080                         return main.attributeValueComparator.compare(valueA, valueB);
1081                     } else if (main.values.size() != 0) {
1082                         int aa = main.values.get(valueA);
1083                         int bb = main.values.get(valueB);
1084                         return aa - bb;
1085                     } else {
1086                         return valueA.compareTo(valueB);
1087                     }
1088                 }
1089                 if (countA != 0 || countB != 0) {
1090                     throw new IllegalArgumentException();
1091                 }
1092             }
1093             return a.size() - b.size();
1094         }
1095     }
1096 
getAttributeComparator()1097     public MapComparator<String> getAttributeComparator() {
1098         return attributeComparator;
1099     }
1100 
getElementComparator()1101     public MapComparator<String> getElementComparator() {
1102         return elementComparator;
1103     }
1104 
getAttributesFromName()1105     public Relation<String, Attribute> getAttributesFromName() {
1106         return nameToAttributes;
1107     }
1108 
getElementFromName()1109     public Map<String, Element> getElementFromName() {
1110         return nameToElement;
1111     }
1112 
1113     @Override
toString()1114     public String toString() {
1115         StringBuilder b = new StringBuilder();
1116         // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?,
1117         // contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?,
1118         // listPatterns?, collations?, posix?, segmentations?, rbnf?, metadata?, references?,
1119         // special*))) >
1120         // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false
1121         // ) #IMPLIED > <!-- true and false are deprecated. -->
1122         Seen seen = new Seen(dtdType);
1123         seen.seenElements.add(ANY);
1124         seen.seenElements.add(PCDATA);
1125         toString(ROOT, b, seen);
1126 
1127         // Hack for ldmlIcu: catch the items that are not mentioned in the original
1128         int currentEnd = b.length();
1129         for (Element e : nameToElement.values()) {
1130             toString(e, b, seen);
1131         }
1132         if (currentEnd != b.length()) {
1133             b.insert(
1134                     currentEnd,
1135                     System.lineSeparator()
1136                             + System.lineSeparator()
1137                             + "<!-- Elements not reachable from root! -->"
1138                             + System.lineSeparator());
1139         }
1140         return b.toString();
1141     }
1142 
1143     static final class Seen {
1144         Set<Element> seenElements = new HashSet<>();
1145         Set<Attribute> seenAttributes = new HashSet<>();
1146 
Seen(DtdType dtdType)1147         public Seen(DtdType dtdType) {
1148             if (dtdType.rootType == dtdType) {
1149                 return;
1150             }
1151             DtdData otherData = DtdData.getInstance(dtdType.rootType);
1152             walk(otherData, otherData.ROOT);
1153             seenElements.remove(otherData.nameToElement.get("special"));
1154         }
1155 
walk(DtdData otherData, Element current)1156         private void walk(DtdData otherData, Element current) {
1157             seenElements.add(current);
1158             seenAttributes.addAll(current.attributes.keySet());
1159             for (Element e : current.children.keySet()) {
1160                 walk(otherData, e);
1161             }
1162         }
1163     }
1164 
getDescendents(Element start, Set<Element> toAddTo)1165     public Set<Element> getDescendents(Element start, Set<Element> toAddTo) {
1166         if (!toAddTo.contains(start)) {
1167             toAddTo.add(start);
1168             for (Element e : start.children.keySet()) {
1169                 getDescendents(e, toAddTo);
1170             }
1171         }
1172         return toAddTo;
1173     }
1174 
toString(Element current, StringBuilder b, Seen seen)1175     private void toString(Element current, StringBuilder b, Seen seen) {
1176         boolean first = true;
1177         if (seen.seenElements.contains(current)) {
1178             return;
1179         }
1180         seen.seenElements.add(current);
1181         boolean elementDeprecated = isDeprecated(current.name, "*", "*");
1182 
1183         showComments(b, current.commentsPre, true);
1184         b.append("\n\n<!ELEMENT " + current.name + " " + current.model + " >");
1185         if (USE_SYNTHESIZED) {
1186             Element aliasElement = getElementFromName().get("alias");
1187             // b.append(current.rawChildren);
1188             if (!current.children.isEmpty()) {
1189                 LinkedHashSet<Element> elements = new LinkedHashSet<>(current.children.keySet());
1190                 boolean hasAlias = aliasElement != null && elements.remove(aliasElement);
1191                 // boolean hasSpecial = specialElement != null && elements.remove(specialElement);
1192                 if (hasAlias) {
1193                     b.append("(alias |");
1194                 }
1195                 b.append("(");
1196                 // <!ELEMENT transformNames ( alias | (transformName | special)* ) >
1197                 // <!ELEMENT layout ( alias | (orientation*, inList*, inText*, special*) ) >
1198 
1199                 for (Element e : elements) {
1200                     if (first) {
1201                         first = false;
1202                     } else {
1203                         b.append(", ");
1204                     }
1205                     b.append(e.name);
1206                     if (e.type != ElementType.PCDATA) {
1207                         b.append("*");
1208                     }
1209                 }
1210                 if (hasAlias) {
1211                     b.append(")");
1212                 }
1213                 b.append(")");
1214             } else {
1215                 b.append(current.type == null ? "???" : current.type.source);
1216             }
1217             b.append(">");
1218         }
1219         showComments(b, current.commentsPost, false);
1220         if (isOrdered(current.name)) {
1221             b.append(COMMENT_PREFIX + "<!--@ORDERED-->");
1222         }
1223         if (isTechPreview(current.name)) {
1224             b.append(COMMENT_PREFIX + "<!--@TECHPREVIEW-->");
1225         }
1226         if (current.getElementStatus() != ElementStatus.regular) {
1227             b.append(
1228                     COMMENT_PREFIX
1229                             + "<!--@"
1230                             + current.getElementStatus().toString().toUpperCase(Locale.ROOT)
1231                             + "-->");
1232         }
1233         if (elementDeprecated) {
1234             b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->");
1235         }
1236 
1237         LinkedHashSet<String> deprecatedValues = new LinkedHashSet<>();
1238 
1239         for (Attribute a : current.attributes.keySet()) {
1240             if (seen.seenAttributes.contains(a)) {
1241                 continue;
1242             }
1243             seen.seenAttributes.add(a);
1244             boolean attributeDeprecated =
1245                     elementDeprecated || isDeprecated(current.name, a.name, "*");
1246             boolean attributeUEscaped = allowsUEscape(current.name, a.name, "*");
1247             deprecatedValues.clear();
1248 
1249             showComments(b, a.commentsPre, true);
1250             b.append("\n<!ATTLIST " + current.name + " " + a.name);
1251             if (a.type == AttributeType.ENUMERATED_TYPE) {
1252                 b.append(" (");
1253                 first = true;
1254                 for (String s : a.values.keySet()) {
1255                     if (first) {
1256                         first = false;
1257                     } else {
1258                         b.append(" | ");
1259                     }
1260                     b.append(s);
1261                     if (!attributeDeprecated && isDeprecated(current.name, a.name, s)) {
1262                         deprecatedValues.add(s);
1263                     }
1264                 }
1265                 b.append(")");
1266             } else {
1267                 b.append(' ').append(a.type);
1268             }
1269             if (a.mode != Mode.NULL) {
1270                 b.append(" ").append(a.mode.source);
1271             }
1272             if (a.defaultValue != null) {
1273                 b.append(" \"").append(a.defaultValue).append('"');
1274             }
1275             b.append(" >");
1276             showComments(b, a.commentsPost, false);
1277             //            if (attributeDeprecated != deprecatedComment) {
1278             //                System.out.println("*** BAD DEPRECATION ***" + a);
1279             //            }
1280             if (a.matchValue != null) {
1281                 b.append(COMMENT_PREFIX + "<!--@MATCH:" + a.matchValue.getName() + "-->");
1282             }
1283             if (METADATA.contains(a.name) || a.attributeStatus == AttributeStatus.metadata) {
1284                 b.append(COMMENT_PREFIX + "<!--@METADATA-->");
1285             } else if (!isDistinguishing(current.name, a.name)) {
1286                 b.append(COMMENT_PREFIX + "<!--@VALUE-->");
1287             }
1288             if (attributeDeprecated) {
1289                 b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->");
1290             } else if (!deprecatedValues.isEmpty()) {
1291                 b.append(
1292                         COMMENT_PREFIX
1293                                 + "<!--@DEPRECATED:"
1294                                 + Joiner.on(", ").join(deprecatedValues)
1295                                 + "-->");
1296             }
1297             if (attributeUEscaped) {
1298                 b.append(COMMENT_PREFIX + "<!--@ALLOWS_UESC-->");
1299             }
1300         }
1301         if (current.children.size() > 0) {
1302             for (Element e : current.children.keySet()) {
1303                 toString(e, b, seen);
1304             }
1305         }
1306     }
1307 
showComments(StringBuilder b, Set<String> comments, boolean separate)1308     private void showComments(StringBuilder b, Set<String> comments, boolean separate) {
1309         if (comments == null) {
1310             return;
1311         }
1312         if (separate && b.length() != 0) {
1313             b.append(System.lineSeparator());
1314         }
1315         for (String c : comments) {
1316             boolean deprecatedComment = false; // the following served its purpose...
1317             // c.toLowerCase(Locale.ENGLISH).contains("deprecat");
1318             if (!deprecatedComment) {
1319                 if (separate) {
1320                     // special handling for very first comment
1321                     if (b.length() == 0) {
1322                         b.append("<!--")
1323                                 .append(System.lineSeparator())
1324                                 .append(c)
1325                                 .append(System.lineSeparator())
1326                                 .append("-->");
1327                         continue;
1328                     }
1329                     b.append(System.lineSeparator());
1330                 } else {
1331                     b.append(COMMENT_PREFIX);
1332                 }
1333                 b.append("<!-- ").append(c).append(" -->");
1334             }
1335         }
1336     }
1337 
removeFirst(Collection<T> elements, Transform<T, Boolean> matcher)1338     public static <T> T removeFirst(Collection<T> elements, Transform<T, Boolean> matcher) {
1339         for (Iterator<T> it = elements.iterator(); it.hasNext(); ) {
1340             T item = it.next();
1341             if (matcher.transform(item) == Boolean.TRUE) {
1342                 it.remove();
1343                 return item;
1344             }
1345         }
1346         return null;
1347     }
1348 
getElements()1349     public Set<Element> getElements() {
1350         return new LinkedHashSet<>(nameToElement.values());
1351     }
1352 
getAttributes()1353     public Set<Attribute> getAttributes() {
1354         return new LinkedHashSet<>(nameToAttributes.values());
1355     }
1356 
isDistinguishing(String elementName, String attribute)1357     public boolean isDistinguishing(String elementName, String attribute) {
1358         return getAttributeStatus(elementName, attribute) == AttributeStatus.distinguished;
1359     }
1360 
1361     static final Set<String> METADATA =
1362             new HashSet<>(Arrays.asList("references", "standard", "draft"));
1363 
addUnmodifiable(Set<String> comment, String addition)1364     static final Set<String> addUnmodifiable(Set<String> comment, String addition) {
1365         if (comment == null) {
1366             return Collections.singleton(addition);
1367         } else {
1368             comment = new LinkedHashSet<>(comment);
1369             comment.add(addition);
1370             return Collections.unmodifiableSet(comment);
1371         }
1372     }
1373 
1374     public class IllegalByDtdException extends RuntimeException {
1375         private static final long serialVersionUID = 1L;
1376         public final String elementName;
1377         public final String attributeName;
1378         public final String attributeValue;
1379 
IllegalByDtdException( String elementName, String attributeName, String attributeValue)1380         public IllegalByDtdException(
1381                 String elementName, String attributeName, String attributeValue) {
1382             this.elementName = elementName;
1383             this.attributeName = attributeName;
1384             this.attributeValue = attributeValue;
1385         }
1386 
1387         @Override
getMessage()1388         public String getMessage() {
1389             return "Dtd "
1390                     + dtdType
1391                     + " doesn’t allow "
1392                     + "element="
1393                     + elementName
1394                     + (attributeName == null ? "" : ", attribute: " + attributeName)
1395                     + (attributeValue == null ? "" : ", attributeValue: " + attributeValue);
1396         }
1397     }
1398 
1399     // @SuppressWarnings("unused")
isDeprecated(String elementName, String attributeName, String attributeValue)1400     public boolean isDeprecated(String elementName, String attributeName, String attributeValue) {
1401         Element element = getElementThrowingIfNull(elementName, null, null);
1402         if (element.isDeprecatedElement) {
1403             return true;
1404         }
1405         if ("*".equals(attributeName) || "_q".equals(attributeName)) {
1406             return false;
1407         }
1408         Attribute attribute = element.getAttributeNamed(attributeName);
1409         if (attribute == null) {
1410             throw new IllegalByDtdException(elementName, attributeName, attributeValue);
1411         } else if (attribute.isDeprecatedAttribute) {
1412             return true;
1413         }
1414         return attribute.deprecatedValues.contains(
1415                 attributeValue); // don't need special test for "*"
1416     }
1417 
allowsUEscape(String elementName, String attributeName, String attributeValue)1418     public boolean allowsUEscape(String elementName, String attributeName, String attributeValue) {
1419         Element element = getElementThrowingIfNull(elementName, null, null);
1420         Attribute attribute = element.getAttributeNamed(attributeName);
1421         if (attribute == null) {
1422             throw new IllegalByDtdException(elementName, attributeName, attributeValue);
1423         } else if (attribute.allowsUEscape()) {
1424             return true;
1425         }
1426         return false;
1427     }
1428 
1429     /**
1430      * Returns whether an element (specified by its full name) is ordered. This method understands
1431      * all elements in the DTDs used (including the ICU extensions), but will throw
1432      * IllegalByDtdException for unknown elements. See CLDR-8614 for more background.
1433      */
isOrdered(String elementName)1434     public boolean isOrdered(String elementName) {
1435         Element element = getElementThrowingIfNull(elementName, null, null);
1436         return element.isOrdered();
1437     }
1438 
getElementThrowingIfNull( String elementName, String attributeName, String value)1439     public Element getElementThrowingIfNull(
1440             String elementName, String attributeName, String value) {
1441         Element element = nameToElement.get(elementName);
1442         if (element == null) {
1443             throw new IllegalByDtdException(elementName, attributeName, value);
1444         }
1445         return element;
1446     }
1447 
1448     /**
1449      * Returns whether an element (specified by its full name) is a tech preview. This method
1450      * understands all elements in the DTDs used (including the ICU extensions), but will throw
1451      * IllegalByDtdException for unknown elements. See CLDR-8614 for more background.
1452      */
isTechPreview(String elementName)1453     public boolean isTechPreview(String elementName) {
1454         Element element = getElementThrowingIfNull(elementName, null, null);
1455         return element.isTechPreview();
1456     }
1457 
getAttributeStatus(String elementName, String attributeName)1458     public AttributeStatus getAttributeStatus(String elementName, String attributeName) {
1459         if ("_q".equals(attributeName)) {
1460             return AttributeStatus.distinguished; // special case
1461         }
1462         Element element = nameToElement.get(elementName);
1463         if (element == null) {
1464             if (elementName.startsWith("icu:")) {
1465                 return AttributeStatus.distinguished;
1466             }
1467             throw new IllegalByDtdException(elementName, attributeName, null);
1468         }
1469         Attribute attribute = element.getAttributeNamed(attributeName);
1470         if (attribute == null) {
1471             if (elementName.startsWith("icu:")) {
1472                 return AttributeStatus.distinguished;
1473             }
1474             throw new IllegalByDtdException(elementName, attributeName, null);
1475         }
1476         return attribute.attributeStatus;
1477     }
1478 
1479     // The default is a map comparator, which compares numbers as numbers, and strings with UCA
1480     private static MapComparator<String> valueOrdering =
1481             new MapComparator<String>().setErrorOnMissing(false).freeze();
1482 
1483     static MapComparator<String> dayValueOrder =
1484             new MapComparator<String>()
1485                     .add("sun", "mon", "tue", "wed", "thu", "fri", "sat")
1486                     .freeze();
1487     static MapComparator<String> dayPeriodOrder =
1488             new MapComparator<String>()
1489                     .add(
1490                             "midnight",
1491                             "am",
1492                             "noon",
1493                             "pm",
1494                             "morning1",
1495                             "morning2",
1496                             "afternoon1",
1497                             "afternoon2",
1498                             "evening1",
1499                             "evening2",
1500                             "night1",
1501                             "night2",
1502                             // The ones on the following line are no longer used actively. Can be
1503                             // removed later?
1504                             "earlyMorning",
1505                             "morning",
1506                             "midDay",
1507                             "afternoon",
1508                             "evening",
1509                             "night",
1510                             "weeHours")
1511                     .freeze();
1512     static MapComparator<String> dateTimeFormatOrder =
1513             new MapComparator<String>().add("standard", "atTime").freeze();
1514     static MapComparator<String> listPatternOrder =
1515             new MapComparator<String>().add("start", "middle", "end", "2", "3").freeze();
1516     static MapComparator<String> widthOrder =
1517             new MapComparator<String>()
1518                     .add("abbreviated", "narrow", "short", "wide", "all")
1519                     .freeze();
1520     static MapComparator<String> lengthOrder =
1521             new MapComparator<String>().add("full", "long", "medium", "short").freeze();
1522     static MapComparator<String> dateFieldOrder =
1523             new MapComparator<String>()
1524                     .add(
1525                             "era",
1526                             "era-short",
1527                             "era-narrow",
1528                             "year",
1529                             "year-short",
1530                             "year-narrow",
1531                             "quarter",
1532                             "quarter-short",
1533                             "quarter-narrow",
1534                             "month",
1535                             "month-short",
1536                             "month-narrow",
1537                             "week",
1538                             "week-short",
1539                             "week-narrow",
1540                             "weekOfMonth",
1541                             "weekOfMonth-short",
1542                             "weekOfMonth-narrow",
1543                             "day",
1544                             "day-short",
1545                             "day-narrow",
1546                             "dayOfYear",
1547                             "dayOfYear-short",
1548                             "dayOfYear-narrow",
1549                             "weekday",
1550                             "weekday-short",
1551                             "weekday-narrow",
1552                             "weekdayOfMonth",
1553                             "weekdayOfMonth-short",
1554                             "weekdayOfMonth-narrow",
1555                             "sun",
1556                             "sun-short",
1557                             "sun-narrow",
1558                             "mon",
1559                             "mon-short",
1560                             "mon-narrow",
1561                             "tue",
1562                             "tue-short",
1563                             "tue-narrow",
1564                             "wed",
1565                             "wed-short",
1566                             "wed-narrow",
1567                             "thu",
1568                             "thu-short",
1569                             "thu-narrow",
1570                             "fri",
1571                             "fri-short",
1572                             "fri-narrow",
1573                             "sat",
1574                             "sat-short",
1575                             "sat-narrow",
1576                             "dayperiod-short",
1577                             "dayperiod",
1578                             "dayperiod-narrow",
1579                             "hour",
1580                             "hour-short",
1581                             "hour-narrow",
1582                             "minute",
1583                             "minute-short",
1584                             "minute-narrow",
1585                             "second",
1586                             "second-short",
1587                             "second-narrow",
1588                             "zone",
1589                             "zone-short",
1590                             "zone-narrow")
1591                     .freeze();
1592     static MapComparator<String> nameFieldOrder =
1593             new MapComparator<String>().add(PersonNameFormatter.ModifiedField.ALL_SAMPLES).freeze();
1594     static MapComparator<String> orderValueOrder =
1595             new MapComparator<String>()
1596                     .add(PersonNameFormatter.Order.ALL, Object::toString)
1597                     .freeze();
1598     static MapComparator<String> lengthValueOrder =
1599             new MapComparator<String>()
1600                     .add(PersonNameFormatter.Length.ALL, Object::toString)
1601                     .freeze();
1602     static MapComparator<String> usageValueOrder =
1603             new MapComparator<String>()
1604                     .add(PersonNameFormatter.Usage.ALL, Object::toString)
1605                     .freeze();
1606     static MapComparator<String> formalityValueOrder =
1607             new MapComparator<String>()
1608                     .add(PersonNameFormatter.Formality.ALL, Object::toString)
1609                     .freeze();
1610     static MapComparator<String> sampleNameItemOrder =
1611             new MapComparator<String>()
1612                     .add(PersonNameFormatter.SampleType.ALL, Object::toString)
1613                     .freeze();
1614 
1615     // TODO We could build most of the above from the dtd data for literal values. That way they
1616     // would always be
1617     // in sync.
1618 
getUnitOrder()1619     public static MapComparator<String> getUnitOrder() {
1620         return UnitOrderHolder.INSTANCE;
1621     }
1622 
1623     private static final class UnitOrderHolder {
1624         private static final MapComparator<String> INSTANCE =
1625                 //                new MapComparator<String>()
1626                 //
1627                 // .add(Validity.getInstance().getCodeToStatus(LstrType.unit).keySet())
1628                 //                        .freeze();
1629                 //    }
1630                 new MapComparator<>(
1631                                 Arrays.asList(
1632                                         "acceleration-g-force",
1633                                         "acceleration-meter-per-square-second",
1634                                         "acceleration-meter-per-second-squared", // deprecated
1635                                         "angle-revolution",
1636                                         "angle-radian",
1637                                         "angle-degree",
1638                                         "angle-arc-minute",
1639                                         "angle-arc-second",
1640                                         "area-square-kilometer",
1641                                         "area-hectare",
1642                                         "area-square-meter",
1643                                         "area-square-centimeter",
1644                                         "area-square-mile",
1645                                         "area-acre",
1646                                         "area-square-yard",
1647                                         "area-square-foot",
1648                                         "area-square-inch",
1649                                         "area-dunam",
1650                                         "concentr-karat",
1651                                         "proportion-karat", // deprecated
1652                                         "concentr-milligram-ofglucose-per-deciliter",
1653                                         "concentr-milligram-per-deciliter",
1654                                         "concentr-millimole-per-liter",
1655                                         "concentr-item",
1656                                         "concentr-portion",
1657                                         "concentr-permillion",
1658                                         "concentr-part-per-million", // deprecated
1659                                         "concentr-percent",
1660                                         "concentr-permille",
1661                                         "concentr-permyriad",
1662                                         "concentr-mole",
1663                                         "concentr-ofglucose",
1664                                         "consumption-liter-per-kilometer",
1665                                         "consumption-liter-per-100-kilometer",
1666                                         "consumption-liter-per-100kilometers", // deprecated
1667                                         "consumption-mile-per-gallon",
1668                                         "consumption-mile-per-gallon-imperial",
1669                                         "digital-petabyte",
1670                                         "digital-terabyte",
1671                                         "digital-terabit",
1672                                         "digital-gigabyte",
1673                                         "digital-gigabit",
1674                                         "digital-megabyte",
1675                                         "digital-megabit",
1676                                         "digital-kilobyte",
1677                                         "digital-kilobit",
1678                                         "digital-byte",
1679                                         "digital-bit",
1680                                         "duration-century",
1681                                         "duration-decade",
1682                                         "duration-year",
1683                                         "duration-year-person",
1684                                         "duration-quarter",
1685                                         "duration-month",
1686                                         "duration-month-person",
1687                                         "duration-week",
1688                                         "duration-week-person",
1689                                         "duration-day",
1690                                         "duration-day-person",
1691                                         "duration-hour",
1692                                         "duration-minute",
1693                                         "duration-second",
1694                                         "duration-millisecond",
1695                                         "duration-microsecond",
1696                                         "duration-nanosecond",
1697                                         "electric-ampere",
1698                                         "electric-milliampere",
1699                                         "electric-ohm",
1700                                         "electric-volt",
1701                                         "energy-kilocalorie",
1702                                         "energy-calorie",
1703                                         "energy-foodcalorie",
1704                                         "energy-kilojoule",
1705                                         "energy-joule",
1706                                         "energy-kilowatt-hour",
1707                                         "energy-electronvolt",
1708                                         "energy-british-thermal-unit",
1709                                         "energy-therm-us",
1710                                         "force-pound-force",
1711                                         "force-newton",
1712                                         "force-kilowatt-hour-per-100-kilometer",
1713                                         "frequency-gigahertz",
1714                                         "frequency-megahertz",
1715                                         "frequency-kilohertz",
1716                                         "frequency-hertz",
1717                                         "graphics-em",
1718                                         "graphics-pixel",
1719                                         "graphics-megapixel",
1720                                         "graphics-pixel-per-centimeter",
1721                                         "graphics-pixel-per-inch",
1722                                         "graphics-dot-per-centimeter",
1723                                         "graphics-dot-per-inch",
1724                                         "graphics-dot",
1725                                         "length-earth-radius",
1726                                         "length-100-kilometer",
1727                                         "length-kilometer",
1728                                         "length-meter",
1729                                         "length-decimeter",
1730                                         "length-centimeter",
1731                                         "length-millimeter",
1732                                         "length-micrometer",
1733                                         "length-nanometer",
1734                                         "length-picometer",
1735                                         "length-mile",
1736                                         "length-yard",
1737                                         "length-foot",
1738                                         "length-inch",
1739                                         "length-parsec",
1740                                         "length-light-year",
1741                                         "length-astronomical-unit",
1742                                         "length-furlong",
1743                                         "length-fathom",
1744                                         "length-nautical-mile",
1745                                         "length-mile-scandinavian",
1746                                         "length-point",
1747                                         "length-solar-radius",
1748                                         "light-lux",
1749                                         "light-candela",
1750                                         "light-lumen",
1751                                         "light-solar-luminosity",
1752                                         "mass-tonne",
1753                                         "mass-metric-ton",
1754                                         "mass-kilogram",
1755                                         "mass-gram",
1756                                         "mass-milligram",
1757                                         "mass-microgram",
1758                                         "mass-ton",
1759                                         "mass-stone",
1760                                         "mass-pound",
1761                                         "mass-ounce",
1762                                         "mass-ounce-troy",
1763                                         "mass-carat",
1764                                         "mass-dalton",
1765                                         "mass-earth-mass",
1766                                         "mass-solar-mass",
1767                                         "mass-grain",
1768                                         "power-gigawatt",
1769                                         "power-megawatt",
1770                                         "power-kilowatt",
1771                                         "power-watt",
1772                                         "power-milliwatt",
1773                                         "power-horsepower",
1774                                         "pressure-millimeter-ofhg",
1775                                         "pressure-millimeter-of-mercury", // deprecated
1776                                         "pressure-ofhg",
1777                                         "pressure-pound-force-per-square-inch",
1778                                         "pressure-pound-per-square-inch", // deprecated
1779                                         "pressure-inch-ofhg",
1780                                         "pressure-inch-hg", // deprecated
1781                                         "pressure-bar",
1782                                         "pressure-millibar",
1783                                         "pressure-atmosphere",
1784                                         "pressure-pascal",
1785                                         "pressure-hectopascal",
1786                                         "pressure-kilopascal",
1787                                         "pressure-megapascal",
1788                                         "speed-kilometer-per-hour",
1789                                         "speed-meter-per-second",
1790                                         "speed-mile-per-hour",
1791                                         "speed-knot",
1792                                         "speed-beaufort",
1793                                         "temperature-generic",
1794                                         "temperature-celsius",
1795                                         "temperature-fahrenheit",
1796                                         "temperature-kelvin",
1797                                         "torque-pound-force-foot",
1798                                         "torque-pound-foot", // deprecated
1799                                         "torque-newton-meter",
1800                                         "volume-cubic-kilometer",
1801                                         "volume-cubic-meter",
1802                                         "volume-cubic-centimeter",
1803                                         "volume-cubic-mile",
1804                                         "volume-cubic-yard",
1805                                         "volume-cubic-foot",
1806                                         "volume-cubic-inch",
1807                                         "volume-megaliter",
1808                                         "volume-hectoliter",
1809                                         "volume-liter",
1810                                         "volume-deciliter",
1811                                         "volume-centiliter",
1812                                         "volume-milliliter",
1813                                         "volume-pint-metric",
1814                                         "volume-cup-metric",
1815                                         "volume-acre-foot",
1816                                         "volume-bushel",
1817                                         "volume-gallon",
1818                                         "volume-gallon-imperial",
1819                                         "volume-quart",
1820                                         "volume-pint",
1821                                         "volume-pint-imperial",
1822                                         "volume-cup",
1823                                         "volume-fluid-ounce",
1824                                         "volume-fluid-ounce-imperial",
1825                                         "volume-tablespoon",
1826                                         "volume-teaspoon",
1827                                         "volume-barrel",
1828                                         "volume-dessert-spoon",
1829                                         "volume-dessert-spoon-imperial",
1830                                         "volume-drop",
1831                                         "volume-dram",
1832                                         "volume-jigger",
1833                                         "volume-pinch",
1834                                         "volume-quart-imperial",
1835                                         "angle-steradian",
1836                                         "concentr-katal",
1837                                         "electric-coulomb",
1838                                         "electric-farad",
1839                                         "electric-henry",
1840                                         "electric-siemens",
1841                                         "energy-calorie-it",
1842                                         "energy-british-thermal-unit-it",
1843                                         "energy-becquerel",
1844                                         "energy-sievert",
1845                                         "energy-gray",
1846                                         "force-kilogram-force",
1847                                         "length-rod",
1848                                         "length-chain",
1849                                         "magnetic-tesla",
1850                                         "magnetic-weber",
1851                                         "temperature-rankine",
1852                                         "duration-fortnight",
1853                                         "mass-slug",
1854                                         "pressure-gasoline-energy-density",
1855                                         "length-rin",
1856                                         "length-sun",
1857                                         "length-shaku-length",
1858                                         "length-shaku-cloth",
1859                                         "length-ken",
1860                                         "length-jo-jp",
1861                                         "length-ri-jp",
1862                                         "area-bu-jp",
1863                                         "area-se-jp",
1864                                         "area-cho",
1865                                         "volume-kosaji",
1866                                         "volume-osaji",
1867                                         "volume-cup-jp",
1868                                         "volume-shaku",
1869                                         "volume-sai",
1870                                         "volume-to-jp",
1871                                         "volume-koku",
1872                                         "speed-light-speed",
1873                                         "mass-fun",
1874                                         "concentr-portion-per-1e9",
1875                                         "duration-night"))
1876                         .freeze();
1877     }
1878 
1879     static MapComparator<String> countValueOrder =
1880             new MapComparator<String>()
1881                     .add("0", "1", "zero", "one", "two", "few", "many", "other")
1882                     .freeze();
1883     static MapComparator<String> unitLengthOrder =
1884             new MapComparator<String>().add("long", "short", "narrow").freeze();
1885     static MapComparator<String> currencyFormatOrder =
1886             new MapComparator<String>().add("standard", "accounting").freeze();
1887     static Comparator<String> zoneOrder = StandardCodes.make().getTZIDComparator();
1888 
1889     static final Comparator<String> COMP = (Comparator) CLDRConfig.getInstance().getCollator();
1890 
1891     // Hack for US
1892     static final Comparator<String> UNICODE_SET_COMPARATOR =
1893             new Comparator<>() {
1894                 @Override
1895                 public int compare(String o1, String o2) {
1896                     if (o1.contains("{")) {
1897                         o1 = o1.replace("{", "");
1898                     }
1899                     if (o2.contains("{")) {
1900                         o2 = o2.replace("{", "");
1901                     }
1902                     return COMP.compare(o1, o2);
1903                 }
1904             };
1905 
getAttributeValueComparator(String element, String attribute)1906     public static Comparator<String> getAttributeValueComparator(String element, String attribute) {
1907         return getAttributeValueComparator(DtdType.ldml, element, attribute);
1908     }
1909 
getAttributeValueComparator( DtdType type, String element, String attribute)1910     static Comparator<String> getAttributeValueComparator(
1911             DtdType type, String element, String attribute) {
1912         // The default is a map comparator, which compares numbers as numbers, and strings with UCA
1913         Comparator<String> comp = valueOrdering;
1914         if (type != DtdType.ldml && type != DtdType.ldmlICU) {
1915             return comp;
1916         }
1917         if (attribute.equals("day")) { // && (element.startsWith("weekend")
1918             comp = dayValueOrder;
1919         } else if (attribute.equals("type")) {
1920             if (element.endsWith("FormatLength")) {
1921                 comp = lengthOrder;
1922             } else if (element.endsWith("Width")) {
1923                 comp = widthOrder;
1924             } else if (element.equals("day")) {
1925                 comp = dayValueOrder;
1926             } else if (element.equals("field")) {
1927                 comp = dateFieldOrder;
1928             } else if (element.equals("zone")) {
1929                 comp = zoneOrder;
1930             } else if (element.equals("listPatternPart")) {
1931                 comp = listPatternOrder;
1932             } else if (element.equals("currencyFormat")) {
1933                 comp = currencyFormatOrder;
1934             } else if (element.equals("unitLength")) {
1935                 comp = unitLengthOrder;
1936             } else if (element.equals("unit")) {
1937                 comp = getUnitOrder();
1938             } else if (element.equals("dayPeriod")) {
1939                 comp = dayPeriodOrder;
1940             } else if (element.equals("dateTimeFormat")) {
1941                 comp = dateTimeFormatOrder;
1942             } else if (element.equals("nameField")) {
1943                 comp = nameFieldOrder;
1944             }
1945         } else if (attribute.equals("order") && element.equals("personName")) {
1946             comp = orderValueOrder;
1947         } else if (attribute.equals("length") && element.equals("personName")) {
1948             comp = lengthValueOrder;
1949         } else if (attribute.equals("usage") && element.equals("personName")) {
1950             comp = usageValueOrder;
1951         } else if (attribute.equals("formality")) {
1952             comp = formalityValueOrder;
1953         } else if (attribute.equals("item") && element.equals("sampleName")) {
1954             comp = sampleNameItemOrder;
1955         } else if (attribute.equals("count") && !element.equals("minDays")) {
1956             comp = countValueOrder;
1957         } else if (attribute.equals("cp") && element.equals("annotation")) {
1958             comp = UNICODE_SET_COMPARATOR;
1959         }
1960         return comp;
1961     }
1962 
1963     /** Comparator for attributes in CLDR files */
1964     private static AttributeValueComparator ldmlAvc =
1965             new AttributeValueComparator() {
1966                 @Override
1967                 public int compare(String element, String attribute, String value1, String value2) {
1968                     Comparator<String> comp = getAttributeValueComparator(element, attribute);
1969                     return comp.compare(value1, value2);
1970                 }
1971             };
1972 
hasValue(String elementName)1973     public boolean hasValue(String elementName) {
1974         return nameToElement.get(elementName).type == ElementType.PCDATA;
1975     }
1976 
isMetadata(XPathParts pathPlain)1977     public boolean isMetadata(XPathParts pathPlain) {
1978         for (String s : pathPlain.getElements()) {
1979             Element e = getElementFromName().get(s);
1980             if (e.elementStatus == ElementStatus.metadata) {
1981                 return true;
1982             }
1983         }
1984         return false;
1985     }
1986 
isMetadataOld(DtdType dtdType2, XPathParts pathPlain)1987     public static boolean isMetadataOld(DtdType dtdType2, XPathParts pathPlain) {
1988         // TODO Don't use hard-coded list; instead add to DTD annotations
1989         final String element1 = pathPlain.getElement(1);
1990         final String element2 = pathPlain.getElement(2);
1991         final String elementN = pathPlain.getElement(-1);
1992         switch (dtdType2) {
1993             case ldml:
1994                 switch (element1) {
1995                     case "generation":
1996                     case "metadata":
1997                         return true;
1998                 }
1999                 break;
2000             case ldmlBCP47:
2001                 switch (element1) {
2002                     case "generation":
2003                     case "version":
2004                         return true;
2005                 }
2006                 break;
2007                 ////
2008                 // supplementalData/transforms/transform[@source="am"][@target="am_FONIPA"][@direction="forward"]/comment
2009             case supplementalData:
2010                 // these are NOT under /metadata/ but are actually metadata
2011                 switch (element1) {
2012                     case "generation":
2013                     case "version":
2014                     case "validity":
2015                     case "references":
2016                     case "coverageLevels":
2017                         return true;
2018                     case "transforms":
2019                         return elementN.equals("comment");
2020                     case "metadata":
2021                         // these ARE under /metadata/, but many others under /metadata/ are NOT
2022                         // actually metadata.
2023                         switch (element2) {
2024                             case "validity":
2025                             case "serialElements":
2026                             case "suppress":
2027                             case "distinguishing":
2028                             case "blocking":
2029                             case "casingData":
2030                                 return true;
2031                         }
2032                         break;
2033                 }
2034                 break;
2035             default:
2036         }
2037         return false;
2038     }
2039 
isDeprecated(XPathParts pathPlain)2040     public boolean isDeprecated(XPathParts pathPlain) {
2041         for (int i = 0; i < pathPlain.size(); ++i) {
2042             String elementName = pathPlain.getElement(i);
2043             if (isDeprecated(elementName, "*", null)) {
2044                 return true;
2045             }
2046             for (String attribute : pathPlain.getAttributeKeys(i)) {
2047                 String attributeValue = pathPlain.getAttributeValue(i, attribute);
2048                 if (isDeprecated(elementName, attribute, attributeValue)) {
2049                     return true;
2050                 }
2051             }
2052         }
2053         return false;
2054     }
2055 
2056     public static final Splitter SPACE_SPLITTER =
2057             Splitter.on(CharMatcher.whitespace()).trimResults().omitEmptyStrings();
2058     public static final Splitter BAR_SPLITTER = Splitter.on('|').trimResults().omitEmptyStrings();
2059     public static final Splitter CR_SPLITTER =
2060             Splitter.on(CharMatcher.anyOf("\n\r")).trimResults().omitEmptyStrings();
2061 
2062     private static class XPathPartsSet {
2063         private final Set<XPathParts> list = new LinkedHashSet<>();
2064 
addElement(String element)2065         private void addElement(String element) {
2066             if (list.isEmpty()) {
2067                 list.add(new XPathParts().addElement(element));
2068             } else {
2069                 for (XPathParts item : list) {
2070                     item.addElement(element);
2071                 }
2072             }
2073         }
2074 
addAttribute(String attribute, String attributeValue)2075         private void addAttribute(String attribute, String attributeValue) {
2076             for (XPathParts item : list) {
2077                 item.addAttribute(attribute, attributeValue);
2078             }
2079         }
2080 
setElement(int i, String string)2081         private void setElement(int i, String string) {
2082             for (XPathParts item : list) {
2083                 item.setElement(i, string);
2084             }
2085         }
2086 
addAttributes(String attribute, List<String> attributeValues)2087         private void addAttributes(String attribute, List<String> attributeValues) {
2088             if (attributeValues.size() == 1) {
2089                 addAttribute(attribute, attributeValues.iterator().next());
2090             } else {
2091                 // duplicate all the items in the list with the given values
2092                 Set<XPathParts> newList = new LinkedHashSet<>();
2093                 for (XPathParts item : list) {
2094                     for (String attributeValue : attributeValues) {
2095                         XPathParts newItem = item.cloneAsThawed();
2096                         newItem.addAttribute(attribute, attributeValue);
2097                         newList.add(newItem);
2098                     }
2099                 }
2100                 list.clear();
2101                 list.addAll(newList);
2102             }
2103         }
2104 
toStrings()2105         private ImmutableSet<String> toStrings() {
2106             Builder<String> result = new ImmutableSet.Builder<>();
2107 
2108             for (XPathParts item : list) {
2109                 result.add(item.toString());
2110             }
2111             return result.build();
2112         }
2113 
2114         @Override
toString()2115         public String toString() {
2116             return list.toString();
2117         }
2118     }
2119 
getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras)2120     public Set<String> getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras) {
2121         extras.clear();
2122         Map<String, String> valueAttributes = new HashMap<>();
2123         XPathPartsSet pathResult = new XPathPartsSet();
2124         String element = null;
2125         for (int i = 0; i < pathPlain.size(); ++i) {
2126             element = pathPlain.getElement(i);
2127             pathResult.addElement(element);
2128             valueAttributes.clear();
2129             for (String attribute : pathPlain.getAttributeKeys(i)) {
2130                 AttributeStatus status = getAttributeStatus(element, attribute);
2131                 final String attributeValue = pathPlain.getAttributeValue(i, attribute);
2132                 switch (status) {
2133                     case distinguished:
2134                         AttributeType attrType = getAttributeType(element, attribute);
2135                         if (attrType == AttributeType.NMTOKENS) {
2136                             pathResult.addAttributes(
2137                                     attribute, SPACE_SPLITTER.splitToList(attributeValue));
2138                         } else {
2139                             pathResult.addAttribute(attribute, attributeValue);
2140                         }
2141                         break;
2142                     case value:
2143                         valueAttributes.put(attribute, attributeValue);
2144                         break;
2145                     case metadata:
2146                         break;
2147                 }
2148             }
2149             if (!valueAttributes.isEmpty()) {
2150                 boolean hasValue = hasValue(element);
2151                 // if it doesn't have a value, we construct new child elements, with _ prefix
2152                 // if it does have a value, we have to play a further trick, since
2153                 // we can't have a value and child elements at the same level.
2154                 // So we use a _ suffix on the element.
2155                 if (hasValue) {
2156                     pathResult.setElement(i, element + "_");
2157                 } else {
2158                     int debug = 0;
2159                 }
2160                 for (Entry<String, String> attributeAndValue : valueAttributes.entrySet()) {
2161                     final String attribute = attributeAndValue.getKey();
2162                     final String attributeValue = attributeAndValue.getValue();
2163 
2164                     Set<String> pathsShort = pathResult.toStrings();
2165                     AttributeType attrType = getAttributeType(element, attribute);
2166                     for (String pathShort : pathsShort) {
2167                         pathShort += "/_" + attribute;
2168                         if (attrType == AttributeType.NMTOKENS) {
2169                             for (String valuePart : SPACE_SPLITTER.split(attributeValue)) {
2170                                 extras.put(pathShort, valuePart);
2171                             }
2172                         } else {
2173                             extras.put(pathShort, attributeValue);
2174                         }
2175                     }
2176                 }
2177                 if (hasValue) {
2178                     pathResult.setElement(i, element); // restore
2179                 }
2180             }
2181         }
2182         // Only add the path if it could have a value, looking at the last element
2183         if (!hasValue(element)) {
2184             return null;
2185         }
2186         return pathResult.toStrings();
2187     }
2188 
getAttributeType(String elementName, String attributeName)2189     public AttributeType getAttributeType(String elementName, String attributeName) {
2190         Attribute attr = getAttribute(elementName, attributeName);
2191         return (attr != null) ? attr.type : null;
2192     }
2193 
getAttribute(String elementName, String attributeName)2194     public Attribute getAttribute(String elementName, String attributeName) {
2195         Element element = nameToElement.get(elementName);
2196         return (element != null) ? element.getAttributeNamed(attributeName) : null;
2197     }
2198 
2199     // TODO: add support for following to DTD annotations, and rework API
2200 
2201     static final Set<String> SPACED_VALUES = ImmutableSet.of("idValidity", "languageGroup");
2202 
getValueSplitter(XPathParts pathPlain)2203     public static Splitter getValueSplitter(XPathParts pathPlain) {
2204         if (!Collections.disjoint(pathPlain.getElements(), SPACED_VALUES)) {
2205             return SPACE_SPLITTER;
2206         } else if (pathPlain.getElement(-1).equals("annotation")
2207                 && !pathPlain.getAttributeKeys(-1).contains("tts")) {
2208             return BAR_SPLITTER;
2209         }
2210         return CR_SPLITTER;
2211     }
2212 
isComment(XPathParts pathPlain, String line)2213     public static boolean isComment(XPathParts pathPlain, String line) {
2214         if (pathPlain.contains("transform")) {
2215             if (line.startsWith("#")) {
2216                 return true;
2217             }
2218         }
2219         return false;
2220     }
2221 
isExtraSplit(String extraPath)2222     public static boolean isExtraSplit(String extraPath) {
2223         if (extraPath.endsWith("/_type")
2224                 && extraPath.startsWith("//supplementalData/metaZones/mapTimezones")) {
2225             return true;
2226         }
2227         return false;
2228     }
2229 
2230     /** Return the value status for an EAV */
getValueStatus(String elementName, String attributeName, String value)2231     public ValueStatus getValueStatus(String elementName, String attributeName, String value) {
2232         Element element = nameToElement.get(elementName);
2233         if (element == null) {
2234             return ValueStatus.invalid;
2235         }
2236         Attribute attr = element.getAttributeNamed(attributeName);
2237         if (attr == null) {
2238             return ValueStatus.invalid;
2239         }
2240         return attr.getValueStatus(value);
2241     }
2242 
2243     /** Return element-attribute pairs with non-enumerated values, for quick checks. */
getNonEnumerated(Map<String, String> matchValues)2244     public Multimap<String, String> getNonEnumerated(Map<String, String> matchValues) {
2245         Multimap<String, String> nonEnumeratedElementToAttribute =
2246                 TreeMultimap.create(); // make tree for ease of debugging
2247         for (Entry<String, Element> entry : nameToElement.entrySet()) {
2248             Element element = entry.getValue();
2249             for (Attribute attribute : element.attributes.keySet()) {
2250                 if (attribute.type != AttributeType.ENUMERATED_TYPE) {
2251                     String elementName = element.getName();
2252                     String attrName = attribute.getName();
2253                     nonEnumeratedElementToAttribute.put(elementName, attrName);
2254                     if (attribute.matchValue != null) {
2255                         matchValues.put(
2256                                 elementName + "\t" + attrName, attribute.matchValue.getName());
2257                     }
2258                 }
2259             }
2260         }
2261         return ImmutableSetMultimap.copyOf(nonEnumeratedElementToAttribute);
2262     }
2263 
2264     /** Get the value constraint on the last element in a path */
getValueConstraint(String xpath)2265     public static ValueConstraint getValueConstraint(String xpath) {
2266         return getElement(xpath, -1).getValueConstraint();
2267     }
2268 
2269     /** Get an element from a path and element index. */
getElement(String xpath, int elementIndex)2270     public static Element getElement(String xpath, int elementIndex) {
2271         XPathParts parts = XPathParts.getFrozenInstance(xpath);
2272         return DtdData.getInstance(DtdType.valueOf(parts.getElement(0)))
2273                 .getElementFromName()
2274                 .get(parts.getElement(elementIndex));
2275     }
2276 
2277     public static class DtdGuide {
2278         public interface DtdVisitor {
2279             /** Return false if all children should be skipped */
visit( DtdType dtdType, Stack<Element> ancestors, Element child, Attribute attribute)2280             public boolean visit(
2281                     DtdType dtdType, Stack<Element> ancestors, Element child, Attribute attribute);
2282         }
2283 
2284         private Set<Element> seenElements = new HashSet<>();
2285         private DtdVisitor dtdVisitor;
2286         private DtdType dtdType;
2287         private Stack<Element> ancestors = new Stack<>();
2288         private boolean skipDeprecated;
2289 
DtdGuide(boolean skipDeprecated, DtdVisitor dtdVisitor)2290         public DtdGuide(boolean skipDeprecated, DtdVisitor dtdVisitor) {
2291             this.dtdVisitor = dtdVisitor;
2292             this.skipDeprecated = skipDeprecated;
2293             process(DtdType.values());
2294         }
2295 
process(DtdType... dtdTypes)2296         public void process(DtdType... dtdTypes) {
2297             for (DtdType dt : dtdTypes.length != 0 ? dtdTypes : DtdType.values()) {
2298                 dtdType = dt;
2299                 process(getInstance(dtdType).ROOT);
2300             }
2301         }
2302 
process(Element element)2303         private void process(Element element) {
2304             if (seenElements.contains(element) || !skipDeprecated && element.isDeprecated()) {
2305                 return;
2306             }
2307             seenElements.add(element);
2308             for (Attribute attribute : element.getAttributes().keySet()) {
2309                 if (!skipDeprecated && attribute.isDeprecated()) {
2310                     continue;
2311                 }
2312                 if (!dtdVisitor.visit(dtdType, ancestors, element, attribute)) {
2313                     return;
2314                 }
2315             }
2316             ancestors.push(element);
2317             for (Element child : element.getChildren().keySet()) {
2318                 process(child);
2319             }
2320             ancestors.pop();
2321         }
2322     }
2323 }
2324