• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.util;
2 
3 import java.io.File;
4 import java.io.StringReader;
5 import java.util.ArrayList;
6 import java.util.Arrays;
7 import java.util.Collection;
8 import java.util.Collections;
9 import java.util.Comparator;
10 import java.util.HashMap;
11 import java.util.HashSet;
12 import java.util.Iterator;
13 import java.util.LinkedHashMap;
14 import java.util.LinkedHashSet;
15 import java.util.List;
16 import java.util.Locale;
17 import java.util.Map;
18 import java.util.Map.Entry;
19 import java.util.Set;
20 import java.util.TreeMap;
21 import java.util.concurrent.ConcurrentHashMap;
22 import java.util.concurrent.ConcurrentMap;
23 import java.util.regex.Pattern;
24 
25 import com.google.common.base.CharMatcher;
26 import com.google.common.base.Joiner;
27 import com.google.common.base.Splitter;
28 import com.google.common.collect.ImmutableSet;
29 import com.google.common.collect.ImmutableSet.Builder;
30 import com.google.common.collect.ImmutableSetMultimap;
31 import com.google.common.collect.Multimap;
32 import com.google.common.collect.TreeMultimap;
33 import com.ibm.icu.impl.Relation;
34 import com.ibm.icu.text.Transform;
35 
36 /**
37  * An immutable object that contains the structure of a DTD.
38  * @author markdavis
39  */
40 public class DtdData extends XMLFileReader.SimpleHandler {
41     private static final String COMMENT_PREFIX = System.lineSeparator() + "    ";
42     private static final boolean SHOW_ALL = CldrUtility.getProperty("show_all", false);
43     private static final boolean USE_SYNTHESIZED = false;
44 
45     private static final boolean DEBUG = false;
46     private static final Pattern FILLER = PatternCache.get("[^-a-zA-Z0-9#_:]");
47 
48     private final Relation<String, Attribute> nameToAttributes = Relation.of(new TreeMap<String, Set<Attribute>>(), LinkedHashSet.class);
49     private Map<String, Element> nameToElement = new HashMap<>();
50     private MapComparator<String> elementComparator;
51     private MapComparator<String> attributeComparator;
52 
53     public final Element ROOT;
54     public final Element PCDATA = elementFrom("#PCDATA");
55     public final Element ANY = elementFrom("ANY");
56     public final DtdType dtdType;
57     public final String version;
58     private Element lastElement;
59     private Attribute lastAttribute;
60     private Set<String> preCommentCache;
61     private DtdComparator dtdComparator;
62 
63     public enum AttributeStatus {
64         distinguished ("§d"),
65         value ("§v"),
66         metadata ("§m︎");
67         public final String shortName;
AttributeStatus(String shortName)68         AttributeStatus(String shortName) {
69             this.shortName = shortName;
70         }
getShortName(AttributeStatus status)71         public static String getShortName(AttributeStatus status) {
72             return status == null ? "" : status.shortName;
73         }
74     }
75 
76     public enum Mode {
77         REQUIRED("#REQUIRED"), OPTIONAL("#IMPLIED"), FIXED("#FIXED"), NULL("null");
78 
79         public final String source;
80 
Mode(String s)81         Mode(String s) {
82             source = s;
83         }
84 
forString(String mode)85         public static Mode forString(String mode) {
86             for (Mode value : Mode.values()) {
87                 if (value.source.equals(mode)) {
88                     return value;
89                 }
90             }
91             if (mode == null) {
92                 return NULL;
93             }
94             throw new IllegalArgumentException(mode);
95         }
96     }
97 
98     public enum AttributeType {
99         CDATA, ID, IDREF, IDREFS, ENTITY, ENTITIES, NMTOKEN, NMTOKENS, ENUMERATED_TYPE
100     }
101 
102     static final Set<String> DRAFT_ON_NON_LEAF_ALLOWED = ImmutableSet.of("collation", "transform", "unitPreferenceData", "rulesetGrouping");
103 
104     public static class Attribute implements Named {
105         private static final Joiner JOINER_COMMA_SPACE = Joiner.on(", ");
106         public static final String AUG_TRAIL = "⟫";
107         public static final String AUG_LEAD = "⟪";
108         public static final String ENUM_TRAIL = "⟩";
109         public static final String ENUM_LEAD = "⟨";
110         public static final Pattern LEAD_TRAIL = Pattern.compile("(.*[" + AUG_LEAD + ENUM_LEAD + "])(.*)([" + AUG_TRAIL + ENUM_TRAIL + "].*)");
111         public final String name;
112         public final Element element;
113         public final Mode mode;
114         public final String defaultValue;
115         public final AttributeType type;
116         public final Map<String, Integer> values;
117         private final Set<String> commentsPre;
118         private Set<String> commentsPost;
119         private boolean isDeprecatedAttribute;
120         public AttributeStatus attributeStatus = AttributeStatus.distinguished; // default unless reset by annotations
121         private Set<String> deprecatedValues = Collections.emptySet();
122         public MatchValue matchValue;
123         private final Comparator<String> attributeValueComparator;
124 
Attribute(DtdType dtdType, Element element2, String aName, Mode mode2, String[] split, String value2, Set<String> firstComment)125         private Attribute(DtdType dtdType, Element element2, String aName, Mode mode2, String[] split, String value2, Set<String> firstComment) {
126             commentsPre = firstComment;
127             element = element2;
128             name = aName.intern();
129             if (name.equals("draft") // normally never permitted on elements with children, but special cases...
130                 && !DRAFT_ON_NON_LEAF_ALLOWED.contains(element.getName())) {
131                 int elementChildrenCount = element.getChildren().size();
132                 if (elementChildrenCount > 1
133                     || elementChildrenCount == 1 && !element.getChildren().keySet().iterator().next().getName().equals("cp")) {
134                     isDeprecatedAttribute = true;
135                     if (DEBUG) {
136                         System.out.println(element.getName() + ":" + element.getChildren());
137                     }
138                 }
139             }
140             mode = mode2;
141             defaultValue = value2 == null ? null
142                 : value2.intern();
143             AttributeType _type = AttributeType.ENUMERATED_TYPE;
144             Map<String, Integer> _values = Collections.emptyMap();
145             if (split.length == 1) {
146                 try {
147                     _type = AttributeType.valueOf(split[0]);
148                 } catch (Exception e) {
149                 }
150             }
151             type = _type;
152 
153             if (_type == AttributeType.ENUMERATED_TYPE) {
154                 LinkedHashMap<String, Integer> temp = new LinkedHashMap<>();
155                 for (String part : split) {
156                     if (part.length() != 0) {
157                         temp.put(part.intern(), temp.size());
158                     }
159                 }
160                 _values = Collections.unmodifiableMap(temp);
161             }
162             values = _values;
163             attributeValueComparator = getAttributeValueComparator(dtdType, element.name, name);
164         }
165 
166         @Override
toString()167         public String toString() {
168             return element.name + ":" + name;
169         }
170 
getSampleValue()171         public String getSampleValue() {
172             return type == AttributeType.ENUMERATED_TYPE  ? (values.containsKey("year") ? "year" : values.keySet().iterator().next())
173                 : matchValue != null ? matchValue.getSample()
174                     : MatchValue.DEFAULT_SAMPLE;
175         }
176 
appendDtdString(StringBuilder b)177         public StringBuilder appendDtdString(StringBuilder b) {
178             Attribute a = this;
179             b.append("<!ATTLIST " + element.name + " " + a.name);
180             boolean first;
181             if (a.type == AttributeType.ENUMERATED_TYPE) {
182                 b.append(" (");
183                 first = true;
184                 for (String s : a.values.keySet()) {
185                     if (deprecatedValues.contains(s)) {
186                         continue;
187                     }
188                     if (first) {
189                         first = false;
190                     } else {
191                         b.append(" | ");
192                     }
193                     b.append(s);
194                 }
195                 b.append(")");
196             } else {
197                 b.append(' ').append(a.type);
198             }
199             if (a.mode != Mode.NULL) {
200                 b.append(" ").append(a.mode.source);
201             }
202             if (a.defaultValue != null) {
203                 b.append(" \"").append(a.defaultValue).append('"');
204             }
205             b.append(" >");
206             return b;
207         }
208 
features()209         public String features() {
210             return (type == AttributeType.ENUMERATED_TYPE ? values.keySet().toString() : type.toString())
211                 + (mode == Mode.NULL ? "" : ", mode=" + mode)
212                 + (defaultValue == null ? "" : ", default=" + defaultValue);
213         }
214 
215         @Override
getName()216         public String getName() {
217             return name;
218         }
219 
220         private static Splitter COMMA = Splitter.on(',').trimResults();
221 
addComment(String commentIn)222         public void addComment(String commentIn) {
223             if (commentIn.startsWith("@")) {
224                 // there are exactly 2 cases: deprecated and ordered
225                 switch (commentIn) {
226                 case "@METADATA":
227                     attributeStatus = AttributeStatus.metadata;
228                     break;
229                 case "@VALUE":
230                     attributeStatus = AttributeStatus.value;
231                     break;
232                 case "@DEPRECATED":
233                     isDeprecatedAttribute = true;
234                     break;
235                 default:
236                     int colonPos = commentIn.indexOf(':');
237                     if (colonPos < 0) {
238                         throw new IllegalArgumentException("Unrecognized annotation: " + commentIn);
239                     }
240                     String command = commentIn.substring(0, colonPos);
241                     String argument = commentIn.substring(colonPos + 1);
242                     switch(command) {
243                     case "@DEPRECATED":
244                         deprecatedValues = Collections.unmodifiableSet(new HashSet<>(COMMA.splitToList(argument)));
245                         break;
246                     case "@MATCH":
247                         if (matchValue != null) {
248                             throw new IllegalArgumentException("Conflicting @MATCH: " + matchValue.getName() + " & " + argument);
249                         }
250                         matchValue = MatchValue.of(argument);
251                         break;
252                     default:
253                         throw new IllegalArgumentException("Unrecognized annotation: " + commentIn);
254                     }
255                 }
256                 return;
257             }
258             commentsPost = addUnmodifiable(commentsPost, commentIn.trim());
259         }
260 
261         /**
262          * Special version of identity; only considers name and name of element
263          */
264         @Override
equals(Object obj)265         public boolean equals(Object obj) {
266             if (!(obj instanceof Attribute)) {
267                 return false;
268             }
269             Attribute that = (Attribute) obj;
270             return name.equals(that.name)
271                 && element.name.equals(that.element.name) // don't use plain element: circularity
272                 // not relevant to identity
273                 //                && Objects.equals(comment, that.comment)
274                 //                && mode.equals(that.mode)
275                 //                && Objects.equals(defaultValue, that.defaultValue)
276                 //                && type.equals(that.type)
277                 //                && values.equals(that.values)
278                 ;
279         }
280 
281         /**
282          * Special version of identity; only considers name and name of element
283          */
284         @Override
hashCode()285         public int hashCode() {
286             return name.hashCode() * 37
287                 + element.name.hashCode() // don't use plain element: circularity
288                 // not relevant to identity
289                 //                ) * 37 + Objects.hashCode(comment)) * 37
290                 //                + mode.hashCode()) * 37
291                 //                + Objects.hashCode(defaultValue)) * 37
292                 //                + type.hashCode()) * 37
293                 //                + values.hashCode()
294                 ;
295         }
296 
isDeprecated()297         public boolean isDeprecated() {
298             return isDeprecatedAttribute;
299         }
300 
isDeprecatedValue(String value)301         public boolean isDeprecatedValue(String value) {
302             return deprecatedValues.contains(value);
303         }
304 
getStatus()305         public AttributeStatus getStatus() {
306             return attributeStatus;
307         }
308 
getValueStatus(String value)309         public ValueStatus getValueStatus(String value) {
310             return deprecatedValues.contains(value) ? ValueStatus.invalid
311                 : type == AttributeType.ENUMERATED_TYPE  ? (values.containsKey(value) ? ValueStatus.valid  : ValueStatus.invalid)
312                     : matchValue == null ? ValueStatus.unknown
313                         : matchValue.is(value) ? ValueStatus.valid
314                             : ValueStatus.invalid;
315         }
316 
getMatchString()317         public String getMatchString() {
318             return type == AttributeType.ENUMERATED_TYPE ? ENUM_LEAD + JOINER_COMMA_SPACE.join(values.keySet()) + ENUM_TRAIL
319                 : matchValue != null ? AUG_LEAD + matchValue.toString() + AUG_TRAIL
320                     : "";
321         }
322 
getMatchingName(Map<Attribute, Integer> attributes)323         public Attribute getMatchingName(Map<Attribute, Integer> attributes) {
324             for (Attribute attribute : attributes.keySet()) {
325                 if (name.equals(attribute.getName())) {
326                     return attribute;
327                 }
328             }
329             return null;
330         }
331 
332     }
333 
334     public enum ValueStatus {invalid, unknown, valid}
335 
DtdData(DtdType type, String version)336     private DtdData(DtdType type, String version) {
337         this.dtdType = type;
338         this.ROOT = elementFrom(type.rootType.toString());
339         this.version = version;
340     }
341 
addAttribute(String eName, String aName, String type, String mode, String value)342     private void addAttribute(String eName, String aName, String type, String mode, String value) {
343         Attribute a = new Attribute(dtdType, nameToElement.get(eName), aName, Mode.forString(mode), FILLER.split(type), value, preCommentCache);
344         preCommentCache = null;
345         getAttributesFromName().put(aName, a);
346         CldrUtility.putNew(a.element.attributes, a, a.element.attributes.size());
347         lastElement = null;
348         lastAttribute = a;
349     }
350 
351     public enum ElementType {
352         EMPTY, ANY, PCDATA("(#PCDATA)"), CHILDREN;
353         public final String source;
354 
ElementType(String s)355         private ElementType(String s) {
356             source = s;
357         }
358 
ElementType()359         private ElementType() {
360             source = name();
361         }
362     }
363 
364     interface Named {
getName()365         String getName();
366     }
367 
368     public enum ElementStatus {
369         regular, metadata
370     }
371 
372     public static class Element implements Named {
373         public final String name;
374         private String rawModel;
375         private ElementType type;
376         private final Map<Element, Integer> children = new LinkedHashMap<>();
377         private final Map<Attribute, Integer> attributes = new LinkedHashMap<>();
378         private Set<String> commentsPre;
379         private Set<String> commentsPost;
380         private String model;
381         private boolean isOrderedElement;
382         private boolean isDeprecatedElement;
383         private ElementStatus elementStatus = ElementStatus.regular;
384 
Element(String name2)385         private Element(String name2) {
386             name = name2.intern();
387         }
388 
setChildren(DtdData dtdData, String model, Set<String> precomments)389         private void setChildren(DtdData dtdData, String model, Set<String> precomments) {
390             this.commentsPre = precomments;
391             rawModel = model;
392             this.model = clean(model);
393             if (model.equals("EMPTY")) {
394                 type = ElementType.EMPTY;
395                 return;
396             }
397             type = ElementType.CHILDREN;
398             for (String part : FILLER.split(model)) {
399                 if (part.length() != 0) {
400                     if (part.equals("#PCDATA")) {
401                         type = ElementType.PCDATA;
402                     } else if (part.equals("ANY")) {
403                         type = ElementType.ANY;
404                     } else {
405                         CldrUtility.putNew(children, dtdData.elementFrom(part), children.size());
406                     }
407                 }
408             }
409             if ((type == ElementType.CHILDREN) == (children.size() == 0)
410                 && !model.startsWith("(#PCDATA|cp")) {
411                 throw new IllegalArgumentException("CLDR does not permit Mixed content. " + name + ":" + model);
412             }
413         }
414 
415         static final Pattern CLEANER1 = PatternCache.get("([,|(])(?=\\S)");
416         static final Pattern CLEANER2 = PatternCache.get("(?=\\S)([|)])");
417 
clean(String model2)418         private String clean(String model2) {
419             // (x) -> ( x );
420             // x,y -> x, y
421             // x|y -> x | y
422             String result = CLEANER1.matcher(model2).replaceAll("$1 ");
423             result = CLEANER2.matcher(result).replaceAll(" $1");
424             return result.equals(model2)
425                 ? model2
426                     : result; // for debugging
427         }
428 
containsAttribute(String string)429         public boolean containsAttribute(String string) {
430             for (Attribute a : attributes.keySet()) {
431                 if (a.name.equals(string)) {
432                     return true;
433                 }
434             }
435             return false;
436         }
437 
438         @Override
toString()439         public String toString() {
440             return name;
441         }
442 
toDtdString()443         public String toDtdString() {
444             return "<!ELEMENT " + name + " " + getRawModel() + " >";
445         }
446 
getType()447         public ElementType getType() {
448             return type;
449         }
450 
getChildren()451         public Map<Element, Integer> getChildren() {
452             return Collections.unmodifiableMap(children);
453         }
454 
getAttributes()455         public Map<Attribute, Integer> getAttributes() {
456             return Collections.unmodifiableMap(attributes);
457         }
458 
459         @Override
getName()460         public String getName() {
461             return name;
462         }
463 
getChildNamed(String string)464         public Element getChildNamed(String string) {
465             for (Element e : children.keySet()) {
466                 if (e.name.equals(string)) {
467                     return e;
468                 }
469             }
470             return null;
471         }
472 
getAttributeNamed(String string)473         public Attribute getAttributeNamed(String string) {
474             for (Attribute a : attributes.keySet()) {
475                 if (a.name.equals(string)) {
476                     return a;
477                 }
478             }
479             return null;
480         }
481 
addComment(String addition)482         public void addComment(String addition) {
483             if (addition.startsWith("@")) {
484                 // there are exactly 3 cases: deprecated, ordered, and metadata
485                 switch (addition) {
486                 case "@ORDERED":
487                     isOrderedElement = true;
488                     break;
489                 case "@DEPRECATED":
490                     isDeprecatedElement = true;
491                     break;
492                 case "@METADATA":
493                     elementStatus = ElementStatus.metadata;
494                     break;
495                 default:
496                     throw new IllegalArgumentException("Unrecognized annotation: " + addition);
497                 }
498                 return;
499             }
500             commentsPost = addUnmodifiable(commentsPost, addition.trim());
501         }
502 
503         /**
504          * Special version of equals. Only the name is considered in the identity.
505          */
506         @Override
equals(Object obj)507         public boolean equals(Object obj) {
508             if (!(obj instanceof Element)) {
509                 return false;
510             }
511             Element that = (Element) obj;
512             return name.equals(that.name)
513                 // not relevant to the identity of the object
514                 //                && Objects.equals(comment, that.comment)
515                 //                && type == that.type
516                 //                && attributes.equals(that.attributes)
517                 //                && children.equals(that.children)
518                 ;
519         }
520 
521         /**
522          * Special version of hashcode. Only the name is considered in the identity.
523          */
524         @Override
hashCode()525         public int hashCode() {
526             return name.hashCode()
527                 // not relevant to the identity of the object
528                 // * 37 + Objects.hashCode(comment)
529                 //) * 37 + Objects.hashCode(type)
530                 //                ) * 37 + attributes.hashCode()
531                 //                ) * 37 + children.hashCode()
532                 ;
533         }
534 
isDeprecated()535         public boolean isDeprecated() {
536             return isDeprecatedElement;
537         }
538 
isOrdered()539         public boolean isOrdered() {
540             return isOrderedElement;
541         }
542 
getElementStatus()543         public ElementStatus getElementStatus() {
544             return elementStatus;
545         }
546 
547         /**
548          * @return the rawModel
549          */
getRawModel()550         public String getRawModel() {
551             return rawModel;
552         }
553     }
554 
elementFrom(String name)555     private Element elementFrom(String name) {
556         Element result = nameToElement.get(name);
557         if (result == null) {
558             nameToElement.put(name, result = new Element(name));
559         }
560         return result;
561     }
562 
addElement(String name2, String model)563     private void addElement(String name2, String model) {
564         Element element = elementFrom(name2);
565         element.setChildren(this, model, preCommentCache);
566         preCommentCache = null;
567         lastElement = element;
568         lastAttribute = null;
569     }
570 
addComment(String comment)571     private void addComment(String comment) {
572         comment = comment.trim();
573         if (preCommentCache != null || comment.startsWith("#")) { // the precomments are "sticky"
574             if (comment.startsWith("@")) {
575                 throw new IllegalArgumentException("@ annotation comment must follow element or attribute, without intervening # comment");
576             }
577             preCommentCache = addUnmodifiable(preCommentCache, comment);
578         } else if (lastElement != null) {
579             lastElement.addComment(comment);
580         } else if (lastAttribute != null) {
581             lastAttribute.addComment(comment);
582         } else {
583             if (comment.startsWith("@")) {
584                 throw new IllegalArgumentException("@ annotation comment must follow element or attribute, without intervening # comment");
585             }
586             preCommentCache = addUnmodifiable(preCommentCache, comment);
587         }
588     }
589 
590     // TODO hide this
591     /**
592      * @deprecated
593      */
594     @Deprecated
595     @Override
handleElementDecl(String name, String model)596     public void handleElementDecl(String name, String model) {
597         if (SHOW_ALL) {
598             // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, listPatterns?, collations?, posix?, segmentations?, rbnf?, annotations?, metadata?, references?, special*))) >
599             System.out.println(System.lineSeparator() + "<!ELEMENT " + name + " " + model + " >");
600         }
601         addElement(name, model);
602     }
603 
604     // TODO hide this
605     /**
606      * @deprecated
607      */
608     @Deprecated
609     @Override
handleStartDtd(String name, String publicId, String systemId)610     public void handleStartDtd(String name, String publicId, String systemId) {
611         DtdType explicitDtdType = DtdType.valueOf(name);
612         if (explicitDtdType != dtdType && explicitDtdType != dtdType.rootType) {
613             throw new IllegalArgumentException("Mismatch in dtdTypes");
614         }
615     }
616 
617     /**
618      * @deprecated
619      */
620     @Deprecated
621     @Override
handleAttributeDecl(String eName, String aName, String type, String mode, String value)622     public void handleAttributeDecl(String eName, String aName, String type, String mode, String value) {
623         if (SHOW_ALL) {
624             // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false ) #IMPLIED >
625             // <!ATTLIST version number CDATA #REQUIRED >
626             // <!ATTLIST version cldrVersion CDATA #FIXED "27" >
627 
628             System.out.println("<!ATTLIST " + eName
629                 + " " + aName
630                 + " " + type
631                 + " " + mode
632                 + (value == null ? "" : " \"" + value + "\"")
633                 + " >");
634         }
635         // HACK for 1.1.1
636         if (eName.equals("draft")) {
637             eName = "week";
638         }
639         addAttribute(eName, aName, type, mode, value);
640     }
641 
642     /**
643      * @deprecated
644      */
645     @Deprecated
646     @Override
handleComment(String path, String comment)647     public void handleComment(String path, String comment) {
648         if (comment.contains("Copyright")) {
649             // Zap the copyright comment, replace it with the current one.
650             comment = CldrUtility.getCopyrightString();
651         }
652         if (SHOW_ALL) {
653             // <!-- true and false are deprecated. -->
654             System.out.println("<!-- " + comment.trim() + " -->");
655         }
656         addComment(comment);
657     }
658 
659     // TODO hide this
660     /**
661      * @deprecated
662      */
663     @Deprecated
664     @Override
handleEndDtd()665     public void handleEndDtd() {
666         throw new XMLFileReader.AbortException();
667     }
668 
669     /**
670      * Note that it always gets the trunk version
671      * @deprecated depends on static config, use {@link DtdData#getInstance(DtdType, File)} instead
672      */
673     @Deprecated
getInstance(DtdType type)674     public static DtdData getInstance(DtdType type) {
675         return getInstance(type, CLDRConfig.getInstance().getCldrBaseDirectory());
676     }
677 
678     /**
679      * Special form using version, used only by tests, etc.
680      */
getInstance(DtdType type, String version)681     public static DtdData getInstance(DtdType type, String version) {
682         File directory = version == null ? CLDRConfig.getInstance().getCldrBaseDirectory()
683             : new File(CLDRPaths.ARCHIVE_DIRECTORY + "/cldr-" + version);
684 
685         return getInstance(type, version, directory);
686     }
687 
688     private static final ConcurrentMap<Pair<DtdType, File>, DtdData> CACHE = new ConcurrentHashMap<>();
689 
690     /**
691      * Normal version of DtdData
692      * Get a DtdData, given the CLDR root directory.
693      * @param type which DtdType to return
694      * @param directory the CLDR Root directory, which contains the "common" directory.
695      * @return
696      */
getInstance(DtdType type, File directory)697     public static DtdData getInstance(DtdType type, File directory) {
698         Pair<DtdType, File> key = new Pair<>(type, directory);
699         DtdData data = CACHE.computeIfAbsent(key, k -> getInstance(type, null, directory));
700         return data;
701     }
702 
getInstance(DtdType type, String version, File directory)703     private static DtdData getInstance(DtdType type, String version, File directory) {
704         DtdData simpleHandler = new DtdData(type, version);
705         XMLFileReader xfr = new XMLFileReader().setHandler(simpleHandler);
706         if (type != type.rootType) {
707             // read the real first, then add onto it.
708             readFile(type.rootType, xfr, directory);
709         }
710         readFile(type, xfr, directory);
711         // HACK
712         if (type == DtdType.ldmlICU) {
713             Element special = simpleHandler.nameToElement.get("special");
714             for (String extraElementName : Arrays.asList(
715                 "icu:breakIteratorData",
716                 "icu:UCARules",
717                 "icu:scripts",
718                 "icu:transforms",
719                 "icu:ruleBasedNumberFormats",
720                 "icu:isLeapMonth",
721                 "icu:version",
722                 "icu:breakDictionaryData",
723                 "icu:depends")) {
724                 Element extraElement = simpleHandler.nameToElement.get(extraElementName);
725                 special.children.put(extraElement, special.children.size());
726             }
727         }
728         if (simpleHandler.ROOT.children.size() == 0) {
729             throw new IllegalArgumentException(); // should never happen
730         }
731         simpleHandler.finish();
732         simpleHandler.freeze();
733         return simpleHandler;
734     }
735 
finish()736     private void finish() {
737         dtdComparator = new DtdComparator();
738     }
739 
readFile(DtdType type, XMLFileReader xfr, File directory)740     public static void readFile(DtdType type, XMLFileReader xfr, File directory) {
741         File file = new File(directory, type.dtdPath);
742         StringReader s = new StringReader("<?xml version='1.0' encoding='UTF-8' ?>"
743             + "<!DOCTYPE " + type
744             + " SYSTEM '" + file.getAbsolutePath() + "'>");
745         xfr.read(type.toString(), s, -1, true); //  DTD_TYPE_TO_FILE.get(type)
746     }
747 
freeze()748     private void freeze() {
749         if (version == null) { // only generate for new versions
750             MergeLists<String> elementMergeList = new MergeLists<>();
751             elementMergeList.add(dtdType.toString());
752             MergeLists<String> attributeMergeList = new MergeLists<>();
753             attributeMergeList.add("_q");
754 
755             for (Element element : nameToElement.values()) {
756                 if (element.children.size() > 0) {
757                     Collection<String> names = getNames(element.children.keySet());
758                     elementMergeList.add(names);
759                     if (DEBUG) {
760                         System.out.println(element.getName() + "\t→\t" + names);
761                     }
762                 }
763                 if (element.attributes.size() > 0) {
764                     Collection<String> names = getNames(element.attributes.keySet());
765                     attributeMergeList.add(names);
766                     if (DEBUG) {
767                         System.out.println(element.getName() + "\t→\t@" + names);
768                     }
769                 }
770             }
771             List<String> elementList = elementMergeList.merge();
772             List<String> attributeList = attributeMergeList.merge();
773             if (DEBUG) {
774                 System.out.println("Element Ordering:\t" + elementList);
775                 System.out.println("Attribute Ordering:\t" + attributeList);
776             }
777             elementComparator = new MapComparator<>(elementList).setErrorOnMissing(true).freeze();
778             attributeComparator = new MapComparator<>(attributeList).setErrorOnMissing(true).freeze();
779         }
780         nameToAttributes.freeze();
781         nameToElement = Collections.unmodifiableMap(nameToElement);
782     }
783 
getNames(Collection<? extends Named> keySet)784     private Collection<String> getNames(Collection<? extends Named> keySet) {
785         List<String> result = new ArrayList<>();
786         for (Named e : keySet) {
787             result.add(e.getName());
788         }
789         return result;
790     }
791 
792     public enum DtdItem {
793         ELEMENT, ATTRIBUTE, ATTRIBUTE_VALUE
794     }
795 
796     public interface AttributeValueComparator {
compare(String element, String attribute, String value1, String value2)797         public int compare(String element, String attribute, String value1, String value2);
798     }
799 
getDtdComparator(AttributeValueComparator avc)800     public Comparator<String> getDtdComparator(AttributeValueComparator avc) {
801         return dtdComparator;
802     }
803 
getDtdComparator()804     public DtdComparator getDtdComparator() {
805         return dtdComparator;
806     }
807 
808     public class DtdComparator implements Comparator<String> {
809         @Override
compare(String path1, String path2)810         public int compare(String path1, String path2) {
811             XPathParts a = XPathParts.getFrozenInstance(path1);
812             XPathParts b = XPathParts.getFrozenInstance(path2);
813             return xpathComparator(a, b);
814         }
815 
xpathComparator(XPathParts a, XPathParts b)816         public int xpathComparator(XPathParts a, XPathParts b) {
817             // there must always be at least one element
818             String baseA = a.getElement(0);
819             String baseB = b.getElement(0);
820             if (!ROOT.name.equals(baseA) || !ROOT.name.equals(baseB)) {
821                 throw new IllegalArgumentException("Comparing different DTDs: " + ROOT.name + ", " + baseA + ", " + baseB);
822             }
823             int min = Math.min(a.size(), b.size());
824             Element parent = ROOT;
825             Element elementA;
826             for (int i = 1; i < min; ++i, parent = elementA) {
827                 // add extra test for "fake" elements, used in diffing. they always start with _
828                 String elementRawA = a.getElement(i);
829                 String elementRawB = b.getElement(i);
830                 if (elementRawA.startsWith("_")) {
831                     return elementRawB.startsWith("_") ? elementRawA.compareTo(elementRawB) : -1;
832                 } else if (elementRawB.startsWith("_")) {
833                     return 1;
834                 }
835                 //
836                 elementA = nameToElement.get(elementRawA);
837                 Element elementB = nameToElement.get(elementRawB);
838                 if (elementA != elementB) {
839                     int aa = parent.children.get(elementA);
840                     int bb = parent.children.get(elementB);
841                     return aa - bb;
842                 }
843                 int countA = a.getAttributeCount(i);
844                 int countB = b.getAttributeCount(i);
845                 if (countA == 0 && countB == 0) {
846                     continue;
847                 }
848                 // we have two ways to compare the attributes. One based on the dtd,
849                 // and one based on explicit comparators
850 
851                 // at this point the elements are the same and correspond to elementA
852                 // in the dtd
853 
854                 // Handle the special added elements
855                 String aqValue = a.getAttributeValue(i, "_q");
856                 if (aqValue != null) {
857                     String bqValue = b.getAttributeValue(i, "_q");
858                     if (!aqValue.equals(bqValue)) {
859                         int aValue = Integer.parseInt(aqValue);
860                         int bValue = Integer.parseInt(bqValue);
861                         return aValue - bValue;
862                     }
863                     --countA;
864                     --countB;
865                 }
866 
867                 attributes: for (Entry<Attribute, Integer> attr : elementA.attributes.entrySet()) {
868                     Attribute main = attr.getKey();
869                     String valueA = a.getAttributeValue(i, main.name);
870                     String valueB = b.getAttributeValue(i, main.name);
871                     if (valueA == null) {
872                         if (valueB != null) {
873                             return -1;
874                         }
875                     } else if (valueB == null) {
876                         return 1;
877                     } else if (valueA.equals(valueB)) {
878                         --countA;
879                         --countB;
880                         if (countA == 0 && countB == 0) {
881                             break attributes;
882                         }
883                         continue; // TODO
884                     } else if (main.attributeValueComparator != null) {
885                         return main.attributeValueComparator.compare(valueA, valueB);
886                     } else if (main.values.size() != 0) {
887                         int aa = main.values.get(valueA);
888                         int bb = main.values.get(valueB);
889                         return aa - bb;
890                     } else {
891                         return valueA.compareTo(valueB);
892                     }
893                 }
894                 if (countA != 0 || countB != 0) {
895                     throw new IllegalArgumentException();
896                 }
897             }
898             return a.size() - b.size();
899         }
900     }
901 
getAttributeComparator()902     public MapComparator<String> getAttributeComparator() {
903         return attributeComparator;
904     }
905 
906 
getElementComparator()907     public MapComparator<String> getElementComparator() {
908         return elementComparator;
909     }
910 
getAttributesFromName()911     public Relation<String, Attribute> getAttributesFromName() {
912         return nameToAttributes;
913     }
914 
getElementFromName()915     public Map<String, Element> getElementFromName() {
916         return nameToElement;
917     }
918 
919     @Override
toString()920     public String toString() {
921         StringBuilder b = new StringBuilder();
922         // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, listPatterns?, collations?, posix?, segmentations?, rbnf?, metadata?, references?, special*))) >
923         // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false ) #IMPLIED > <!-- true and false are deprecated. -->
924         Seen seen = new Seen(dtdType);
925         seen.seenElements.add(ANY);
926         seen.seenElements.add(PCDATA);
927         toString(ROOT, b, seen);
928 
929         // Hack for ldmlIcu: catch the items that are not mentioned in the original
930         int currentEnd = b.length();
931         for (Element e : nameToElement.values()) {
932             toString(e, b, seen);
933         }
934         if (currentEnd != b.length()) {
935             b.insert(currentEnd,
936                 System.lineSeparator() + System.lineSeparator()
937                 + "<!-- Elements not reachable from root! -->"
938                 + System.lineSeparator());
939         }
940         return b.toString();
941     }
942 
943     static final class Seen {
944         Set<Element> seenElements = new HashSet<>();
945         Set<Attribute> seenAttributes = new HashSet<>();
946 
Seen(DtdType dtdType)947         public Seen(DtdType dtdType) {
948             if (dtdType.rootType == dtdType) {
949                 return;
950             }
951             DtdData otherData = DtdData.getInstance(dtdType.rootType);
952             walk(otherData, otherData.ROOT);
953             seenElements.remove(otherData.nameToElement.get("special"));
954         }
955 
walk(DtdData otherData, Element current)956         private void walk(DtdData otherData, Element current) {
957             seenElements.add(current);
958             seenAttributes.addAll(current.attributes.keySet());
959             for (Element e : current.children.keySet()) {
960                 walk(otherData, e);
961             }
962         }
963     }
964 
getDescendents(Element start, Set<Element> toAddTo)965     public Set<Element> getDescendents(Element start, Set<Element> toAddTo) {
966         if (!toAddTo.contains(start)) {
967             toAddTo.add(start);
968             for (Element e : start.children.keySet()) {
969                 getDescendents(e, toAddTo);
970             }
971         }
972         return toAddTo;
973     }
974 
toString(Element current, StringBuilder b, Seen seen)975     private void toString(Element current, StringBuilder b, Seen seen) {
976         boolean first = true;
977         if (seen.seenElements.contains(current)) {
978             return;
979         }
980         seen.seenElements.add(current);
981         boolean elementDeprecated = isDeprecated(current.name, "*", "*");
982 
983         showComments(b, current.commentsPre, true);
984         b.append("\n\n<!ELEMENT " + current.name + " " + current.model + " >");
985         if (USE_SYNTHESIZED) {
986             Element aliasElement = getElementFromName().get("alias");
987             //b.append(current.rawChildren);
988             if (!current.children.isEmpty()) {
989                 LinkedHashSet<Element> elements = new LinkedHashSet<>(current.children.keySet());
990                 boolean hasAlias = aliasElement != null && elements.remove(aliasElement);
991                 //boolean hasSpecial = specialElement != null && elements.remove(specialElement);
992                 if (hasAlias) {
993                     b.append("(alias |");
994                 }
995                 b.append("(");
996                 // <!ELEMENT transformNames ( alias | (transformName | special)* ) >
997                 // <!ELEMENT layout ( alias | (orientation*, inList*, inText*, special*) ) >
998 
999                 for (Element e : elements) {
1000                     if (first) {
1001                         first = false;
1002                     } else {
1003                         b.append(", ");
1004                     }
1005                     b.append(e.name);
1006                     if (e.type != ElementType.PCDATA) {
1007                         b.append("*");
1008                     }
1009                 }
1010                 if (hasAlias) {
1011                     b.append(")");
1012                 }
1013                 b.append(")");
1014             } else {
1015                 b.append(current.type == null ? "???" : current.type.source);
1016             }
1017             b.append(">");
1018         }
1019         showComments(b, current.commentsPost, false);
1020         if (isOrdered(current.name)) {
1021             b.append(COMMENT_PREFIX + "<!--@ORDERED-->");
1022         }
1023         if (current.getElementStatus() != ElementStatus.regular) {
1024             b.append(COMMENT_PREFIX + "<!--@"
1025                 + current.getElementStatus().toString().toUpperCase(Locale.ROOT)
1026                 + "-->");
1027         }
1028         if (elementDeprecated) {
1029             b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->");
1030         }
1031 
1032         LinkedHashSet<String> deprecatedValues = new LinkedHashSet<>();
1033 
1034         for (Attribute a : current.attributes.keySet()) {
1035             if (seen.seenAttributes.contains(a)) {
1036                 continue;
1037             }
1038             seen.seenAttributes.add(a);
1039             boolean attributeDeprecated = elementDeprecated || isDeprecated(current.name, a.name, "*");
1040 
1041             deprecatedValues.clear();
1042 
1043             showComments(b, a.commentsPre, true);
1044             b.append("\n<!ATTLIST " + current.name + " " + a.name);
1045             if (a.type == AttributeType.ENUMERATED_TYPE) {
1046                 b.append(" (");
1047                 first = true;
1048                 for (String s : a.values.keySet()) {
1049                     if (first) {
1050                         first = false;
1051                     } else {
1052                         b.append(" | ");
1053                     }
1054                     b.append(s);
1055                     if (!attributeDeprecated && isDeprecated(current.name, a.name, s)) {
1056                         deprecatedValues.add(s);
1057                     }
1058                 }
1059                 b.append(")");
1060             } else {
1061                 b.append(' ').append(a.type);
1062             }
1063             if (a.mode != Mode.NULL) {
1064                 b.append(" ").append(a.mode.source);
1065             }
1066             if (a.defaultValue != null) {
1067                 b.append(" \"").append(a.defaultValue).append('"');
1068             }
1069             b.append(" >");
1070             showComments(b, a.commentsPost, false);
1071 //            if (attributeDeprecated != deprecatedComment) {
1072 //                System.out.println("*** BAD DEPRECATION ***" + a);
1073 //            }
1074             if (a.matchValue != null) {
1075                 b.append(COMMENT_PREFIX + "<!--@MATCH:" + a.matchValue.getName() + "-->");
1076             }
1077             if (METADATA.contains(a.name) || a.attributeStatus == AttributeStatus.metadata) {
1078                 b.append(COMMENT_PREFIX + "<!--@METADATA-->");
1079             } else if (!isDistinguishing(current.name, a.name)) {
1080                 b.append(COMMENT_PREFIX + "<!--@VALUE-->");
1081             }
1082             if (attributeDeprecated) {
1083                 b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->");
1084             } else if (!deprecatedValues.isEmpty()) {
1085                 b.append(COMMENT_PREFIX + "<!--@DEPRECATED:" + Joiner.on(", ")
1086                     .join(deprecatedValues) + "-->");
1087             }
1088         }
1089         if (current.children.size() > 0) {
1090             for (Element e : current.children.keySet()) {
1091                 toString(e, b, seen);
1092             }
1093         }
1094     }
1095 
showComments(StringBuilder b, Set<String> comments, boolean separate)1096     private void showComments(StringBuilder b, Set<String> comments, boolean separate) {
1097         if (comments == null) {
1098             return;
1099         }
1100         if (separate && b.length() != 0) {
1101             b.append(System.lineSeparator());
1102         }
1103         for (String c : comments) {
1104             boolean deprecatedComment = false; // the following served its purpose... c.toLowerCase(Locale.ENGLISH).contains("deprecat");
1105             if (!deprecatedComment) {
1106                 if (separate) {
1107                     // special handling for very first comment
1108                     if (b.length() == 0) {
1109                         b.append("<!--")
1110                         .append(System.lineSeparator())
1111                         .append(c)
1112                         .append(System.lineSeparator())
1113                         .append("-->");
1114                         continue;
1115                     }
1116                     b.append(System.lineSeparator());
1117                 } else {
1118                     b.append(COMMENT_PREFIX);
1119                 }
1120                 b.append("<!-- ").append(c).append(" -->");
1121             }
1122         }
1123     }
1124 
removeFirst(Collection<T> elements, Transform<T, Boolean> matcher)1125     public static <T> T removeFirst(Collection<T> elements, Transform<T, Boolean> matcher) {
1126         for (Iterator<T> it = elements.iterator(); it.hasNext();) {
1127             T item = it.next();
1128             if (matcher.transform(item) == Boolean.TRUE) {
1129                 it.remove();
1130                 return item;
1131             }
1132         }
1133         return null;
1134     }
1135 
getElements()1136     public Set<Element> getElements() {
1137         return new LinkedHashSet<>(nameToElement.values());
1138     }
1139 
getAttributes()1140     public Set<Attribute> getAttributes() {
1141         return new LinkedHashSet<>(nameToAttributes.values());
1142     }
1143 
isDistinguishing(String elementName, String attribute)1144     public boolean isDistinguishing(String elementName, String attribute) {
1145         return getAttributeStatus(elementName, attribute) == AttributeStatus.distinguished;
1146     }
1147 
1148     static final Set<String> METADATA = new HashSet<>(Arrays.asList("references", "standard", "draft"));
1149 
addUnmodifiable(Set<String> comment, String addition)1150     static final Set<String> addUnmodifiable(Set<String> comment, String addition) {
1151         if (comment == null) {
1152             return Collections.singleton(addition);
1153         } else {
1154             comment = new LinkedHashSet<>(comment);
1155             comment.add(addition);
1156             return Collections.unmodifiableSet(comment);
1157         }
1158     }
1159 
1160     public class IllegalByDtdException extends RuntimeException {
1161         private static final long serialVersionUID = 1L;
1162         public final String elementName;
1163         public final String attributeName;
1164         public final String attributeValue;
1165 
IllegalByDtdException(String elementName, String attributeName, String attributeValue)1166         public IllegalByDtdException(String elementName, String attributeName, String attributeValue) {
1167             this.elementName = elementName;
1168             this.attributeName = attributeName;
1169             this.attributeValue = attributeValue;
1170         }
1171 
1172         @Override
getMessage()1173         public String getMessage() {
1174             return "Dtd " + dtdType
1175                 + " doesn’t allow "
1176                 + "element=" + elementName
1177                 + (attributeName == null ? "" : ", attribute: " + attributeName)
1178                 + (attributeValue == null ? "" : ", attributeValue: " + attributeValue);
1179         }
1180     }
1181 
1182     //@SuppressWarnings("unused")
isDeprecated(String elementName, String attributeName, String attributeValue)1183     public boolean isDeprecated(String elementName, String attributeName, String attributeValue) {
1184         Element element = nameToElement.get(elementName);
1185         if (element == null) {
1186             throw new IllegalByDtdException(elementName, attributeName, attributeValue);
1187         } else if (element.isDeprecatedElement) {
1188             return true;
1189         }
1190         if ("*".equals(attributeName) || "_q".equals(attributeName)) {
1191             return false;
1192         }
1193         Attribute attribute = element.getAttributeNamed(attributeName);
1194         if (attribute == null) {
1195             throw new IllegalByDtdException(elementName, attributeName, attributeValue);
1196         } else if (attribute.isDeprecatedAttribute) {
1197             return true;
1198         }
1199         return attribute.deprecatedValues.contains(attributeValue); // don't need special test for "*"
1200     }
1201 
isOrdered(String elementName)1202     public boolean isOrdered(String elementName) {
1203         Element element = nameToElement.get(elementName);
1204         if (element == null) {
1205             if (elementName.startsWith("icu:")) {
1206                 return false;
1207             }
1208             throw new IllegalByDtdException(elementName, null, null);
1209         }
1210         return element.isOrderedElement;
1211     }
1212 
getAttributeStatus(String elementName, String attributeName)1213     public AttributeStatus getAttributeStatus(String elementName, String attributeName) {
1214         if ("_q".equals(attributeName)) {
1215             return AttributeStatus.distinguished; // special case
1216         }
1217         Element element = nameToElement.get(elementName);
1218         if (element == null) {
1219             if (elementName.startsWith("icu:")) {
1220                 return AttributeStatus.distinguished;
1221             }
1222             throw new IllegalByDtdException(elementName, attributeName, null);
1223         }
1224         Attribute attribute = element.getAttributeNamed(attributeName);
1225         if (attribute == null) {
1226             if (elementName.startsWith("icu:")) {
1227                 return AttributeStatus.distinguished;
1228             }
1229             throw new IllegalByDtdException(elementName, attributeName, null);
1230         }
1231         return attribute.attributeStatus;
1232     }
1233 
1234     // The default is a map comparator, which compares numbers as numbers, and strings with UCA
1235     private static MapComparator<String> valueOrdering = new MapComparator<String>().setErrorOnMissing(false).freeze();
1236 
1237     static MapComparator<String> dayValueOrder = new MapComparator<String>().add(
1238         "sun", "mon", "tue", "wed", "thu", "fri", "sat").freeze();
1239     static MapComparator<String> dayPeriodOrder = new MapComparator<String>().add(
1240         "midnight", "am", "noon", "pm",
1241         "morning1", "morning2", "afternoon1", "afternoon2", "evening1", "evening2", "night1", "night2",
1242         // The ones on the following line are no longer used actively. Can be removed later?
1243         "earlyMorning", "morning", "midDay", "afternoon", "evening", "night", "weeHours").freeze();
1244     static MapComparator<String> listPatternOrder = new MapComparator<String>().add(
1245         "start", "middle", "end", "2", "3").freeze();
1246     static MapComparator<String> widthOrder = new MapComparator<String>().add(
1247         "abbreviated", "narrow", "short", "wide", "all").freeze();
1248     static MapComparator<String> lengthOrder = new MapComparator<String>().add(
1249         "full", "long", "medium", "short").freeze();
1250     static MapComparator<String> dateFieldOrder = new MapComparator<String>().add(
1251         "era", "era-short", "era-narrow",
1252         "year", "year-short", "year-narrow",
1253         "quarter", "quarter-short", "quarter-narrow",
1254         "month", "month-short", "month-narrow",
1255         "week", "week-short", "week-narrow",
1256         "weekOfMonth", "weekOfMonth-short", "weekOfMonth-narrow",
1257         "day", "day-short", "day-narrow",
1258         "dayOfYear", "dayOfYear-short", "dayOfYear-narrow",
1259         "weekday", "weekday-short", "weekday-narrow",
1260         "weekdayOfMonth", "weekdayOfMonth-short", "weekdayOfMonth-narrow",
1261         "sun", "sun-short", "sun-narrow",
1262         "mon", "mon-short", "mon-narrow",
1263         "tue", "tue-short", "tue-narrow",
1264         "wed", "wed-short", "wed-narrow",
1265         "thu", "thu-short", "thu-narrow",
1266         "fri", "fri-short", "fri-narrow",
1267         "sat", "sat-short", "sat-narrow",
1268         "dayperiod-short", "dayperiod", "dayperiod-narrow",
1269         "hour", "hour-short", "hour-narrow",
1270         "minute", "minute-short", "minute-narrow",
1271         "second", "second-short", "second-narrow",
1272         "zone", "zone-short", "zone-narrow").freeze();
1273 
1274     /* TODO: change this to be data-file driven. Can do with new Unit preferences info; also put them in a more meaningful order (metric vs other; size) */
1275 
1276     public static final MapComparator<String> unitOrder = new MapComparator<String>().add(
1277         "acceleration-g-force", "acceleration-meter-per-square-second",
1278         "acceleration-meter-per-second-squared", // deprecated
1279         "angle-revolution", "angle-radian", "angle-degree", "angle-arc-minute", "angle-arc-second",
1280         "area-square-kilometer", "area-hectare", "area-square-meter", "area-square-centimeter",
1281         "area-square-mile", "area-acre", "area-square-yard", "area-square-foot", "area-square-inch",
1282         "area-dunam",
1283         "concentr-karat",
1284         "proportion-karat",  // deprecated
1285         "concentr-milligram-ofglucose-per-deciliter",
1286         "concentr-milligram-per-deciliter",
1287         "concentr-millimole-per-liter",
1288         "concentr-item",
1289         "concentr-portion",
1290         "concentr-permillion",
1291         "concentr-part-per-million",  // deprecated
1292         "concentr-percent", "concentr-permille", "concentr-permyriad",
1293         "concentr-mole",
1294         "concentr-ofglucose",
1295         "consumption-liter-per-kilometer", "consumption-liter-per-100-kilometer",
1296         "consumption-liter-per-100kilometers", // deprecated
1297         "consumption-mile-per-gallon", "consumption-mile-per-gallon-imperial",
1298         "digital-petabyte", "digital-terabyte", "digital-terabit", "digital-gigabyte", "digital-gigabit",
1299         "digital-megabyte", "digital-megabit", "digital-kilobyte", "digital-kilobit",
1300         "digital-byte", "digital-bit",
1301         "duration-century", "duration-decade",
1302         "duration-year", "duration-year-person",
1303         "duration-month", "duration-month-person",
1304         "duration-week", "duration-week-person",
1305         "duration-day", "duration-day-person",
1306         "duration-hour", "duration-minute", "duration-second",
1307         "duration-millisecond", "duration-microsecond", "duration-nanosecond",
1308         "electric-ampere", "electric-milliampere", "electric-ohm", "electric-volt",
1309         "energy-kilocalorie", "energy-calorie", "energy-foodcalorie", "energy-kilojoule", "energy-joule", "energy-kilowatt-hour",
1310         "energy-electronvolt",
1311         "energy-british-thermal-unit",
1312         "energy-therm-us",
1313         "force-pound-force",
1314         "force-newton",
1315         "force-kilowatt-hour-per-100-kilometer",
1316         "frequency-gigahertz", "frequency-megahertz", "frequency-kilohertz", "frequency-hertz",
1317         "graphics-em", "graphics-pixel", "graphics-megapixel",
1318         "graphics-pixel-per-centimeter", "graphics-pixel-per-inch",
1319         "graphics-dot-per-centimeter", "graphics-dot-per-inch",
1320         "graphics-dot",
1321         "length-earth-radius",
1322         "length-100-kilometer",
1323         "length-kilometer", "length-meter", "length-decimeter", "length-centimeter",
1324         "length-millimeter", "length-micrometer", "length-nanometer", "length-picometer",
1325         "length-mile", "length-yard", "length-foot", "length-inch",
1326         "length-parsec", "length-light-year", "length-astronomical-unit",
1327         "length-furlong", "length-fathom",
1328         "length-nautical-mile", "length-mile-scandinavian",
1329         "length-point",
1330         "length-solar-radius",
1331         "light-lux",
1332         "light-candela",
1333         "light-lumen",
1334         "light-solar-luminosity",
1335         "mass-metric-ton", "mass-kilogram", "mass-gram", "mass-milligram", "mass-microgram",
1336         "mass-ton", "mass-stone", "mass-pound", "mass-ounce",
1337         "mass-ounce-troy", "mass-carat",
1338         "mass-dalton",
1339         "mass-earth-mass",
1340         "mass-solar-mass",
1341 
1342         "mass-grain",
1343 
1344         "power-gigawatt", "power-megawatt", "power-kilowatt", "power-watt", "power-milliwatt",
1345         "power-horsepower",
1346         "pressure-millimeter-ofhg",
1347         "pressure-millimeter-of-mercury", // deprecated
1348         "pressure-ofhg",
1349         "pressure-pound-force-per-square-inch",
1350         "pressure-pound-per-square-inch", // deprecated
1351         "pressure-inch-ofhg",
1352         "pressure-inch-hg",  // deprecated
1353         "pressure-bar", "pressure-millibar", "pressure-atmosphere",
1354         "pressure-pascal",
1355         "pressure-hectopascal",
1356         "pressure-kilopascal",
1357         "pressure-megapascal",
1358         "speed-kilometer-per-hour", "speed-meter-per-second", "speed-mile-per-hour", "speed-knot",
1359         "temperature-generic", "temperature-celsius", "temperature-fahrenheit", "temperature-kelvin",
1360         "torque-pound-force-foot",
1361         "torque-pound-foot", // deprecated
1362         "torque-newton-meter",
1363         "volume-cubic-kilometer", "volume-cubic-meter", "volume-cubic-centimeter",
1364         "volume-cubic-mile", "volume-cubic-yard", "volume-cubic-foot", "volume-cubic-inch",
1365         "volume-megaliter", "volume-hectoliter", "volume-liter", "volume-deciliter", "volume-centiliter", "volume-milliliter",
1366         "volume-pint-metric", "volume-cup-metric",
1367         "volume-acre-foot",
1368         "volume-bushel", "volume-gallon", "volume-gallon-imperial", "volume-quart", "volume-pint", "volume-cup",
1369         "volume-fluid-ounce", "volume-fluid-ounce-imperial", "volume-tablespoon", "volume-teaspoon",
1370         "volume-barrel",
1371 
1372         "volume-dessert-spoon",
1373         "volume-dessert-spoon-imperial",
1374         "volume-drop",
1375         "volume-dram",
1376         "volume-jigger",
1377         "volume-pinch",
1378         "volume-quart-imperial"
1379        // "volume-pint-imperial"
1380         ).freeze();
1381 
1382     static MapComparator<String> countValueOrder = new MapComparator<String>().add(
1383         "0", "1", "zero", "one", "two", "few", "many", "other").freeze();
1384     static MapComparator<String> unitLengthOrder = new MapComparator<String>().add(
1385         "long", "short", "narrow").freeze();
1386     static MapComparator<String> currencyFormatOrder = new MapComparator<String>().add(
1387         "standard", "accounting").freeze();
1388     static Comparator<String> zoneOrder = StandardCodes.make().getTZIDComparator();
1389 
1390     static final Comparator<String> COMP = (Comparator) CLDRConfig.getInstance().getCollator();
1391 
1392     // Hack for US
1393     static final Comparator<String> UNICODE_SET_COMPARATOR = new Comparator<String>() {
1394         @Override
1395         public int compare(String o1, String o2) {
1396             if (o1.contains("{")) {
1397                 o1 = o1.replace("{", "");
1398             }
1399             if (o2.contains("{")) {
1400                 o2 = o2.replace("{", "");
1401             }
1402             return COMP.compare(o1, o2);
1403         }
1404 
1405     };
1406 
getAttributeValueComparator(String element, String attribute)1407     public static Comparator<String> getAttributeValueComparator(String element, String attribute) {
1408         return getAttributeValueComparator(DtdType.ldml, element, attribute);
1409     }
1410 
getAttributeValueComparator(DtdType type, String element, String attribute)1411     static Comparator<String> getAttributeValueComparator(DtdType type, String element, String attribute) {
1412         // The default is a map comparator, which compares numbers as numbers, and strings with UCA
1413         Comparator<String> comp = valueOrdering;
1414         if (type != DtdType.ldml && type != DtdType.ldmlICU) {
1415             return comp;
1416         }
1417         if (attribute.equals("day")) { // && (element.startsWith("weekend")
1418             comp = dayValueOrder;
1419         } else if (attribute.equals("type")) {
1420             if (element.endsWith("FormatLength")) {
1421                 comp = lengthOrder;
1422             } else if (element.endsWith("Width")) {
1423                 comp = widthOrder;
1424             } else if (element.equals("day")) {
1425                 comp = dayValueOrder;
1426             } else if (element.equals("field")) {
1427                 comp = dateFieldOrder;
1428             } else if (element.equals("zone")) {
1429                 comp = zoneOrder;
1430             } else if (element.equals("listPatternPart")) {
1431                 comp = listPatternOrder;
1432             } else if (element.equals("currencyFormat")) {
1433                 comp = currencyFormatOrder;
1434             } else if (element.equals("unitLength")) {
1435                 comp = unitLengthOrder;
1436             } else if (element.equals("unit")) {
1437                 comp = unitOrder;
1438             } else if (element.equals("dayPeriod")) {
1439                 comp = dayPeriodOrder;
1440             }
1441         } else if (attribute.equals("count") && !element.equals("minDays")) {
1442             comp = countValueOrder;
1443         } else if (attribute.equals("cp") && element.equals("annotation")) {
1444             comp = UNICODE_SET_COMPARATOR;
1445         }
1446         return comp;
1447     }
1448 
1449     /**
1450      * Comparator for attributes in CLDR files
1451      */
1452     private static AttributeValueComparator ldmlAvc = new AttributeValueComparator() {
1453         @Override
1454         public int compare(String element, String attribute, String value1, String value2) {
1455             Comparator<String> comp = getAttributeValueComparator(element, attribute);
1456             return comp.compare(value1, value2);
1457         }
1458     };
1459 
hasValue(String elementName)1460     public boolean hasValue(String elementName) {
1461         return nameToElement.get(elementName).type == ElementType.PCDATA;
1462     }
1463 
isMetadata(XPathParts pathPlain)1464     public boolean isMetadata(XPathParts pathPlain) {
1465         for (String s : pathPlain.getElements()) {
1466             Element e = getElementFromName().get(s);
1467             if (e.elementStatus == ElementStatus.metadata) {
1468                 return true;
1469             }
1470         }
1471         return false;
1472     }
1473 
isMetadataOld(DtdType dtdType2, XPathParts pathPlain)1474     public static boolean isMetadataOld(DtdType dtdType2, XPathParts pathPlain) {
1475         // TODO Don't use hard-coded list; instead add to DTD annotations
1476         final String element1 = pathPlain.getElement(1);
1477         final String element2 = pathPlain.getElement(2);
1478         final String elementN = pathPlain.getElement(-1);
1479         switch (dtdType2) {
1480         case ldml:
1481             switch (element1) {
1482             case "generation":
1483             case "metadata":
1484                 return true;
1485             }
1486             break;
1487         case ldmlBCP47:
1488             switch (element1) {
1489             case "generation":
1490             case "version":
1491                 return true;
1492             }
1493             break;
1494             ////supplementalData/transforms/transform[@source="am"][@target="am_FONIPA"][@direction="forward"]/comment
1495         case supplementalData:
1496             // these are NOT under /metadata/ but are actually metadata
1497             switch (element1) {
1498             case "generation":
1499             case "version":
1500             case "validity":
1501             case "references":
1502             case "coverageLevels":
1503                 return true;
1504             case "transforms":
1505                 return elementN.equals("comment");
1506             case "metadata":
1507                 // these ARE under /metadata/, but many others under /metadata/ are NOT actually metadata.
1508                 switch (element2) {
1509                 case "validity":
1510                 case "serialElements":
1511                 case "suppress":
1512                 case "distinguishing":
1513                 case "blocking":
1514                 case "casingData":
1515                     return true;
1516                 }
1517                 break;
1518             }
1519             break;
1520         default:
1521         }
1522         return false;
1523     }
1524 
isDeprecated(XPathParts pathPlain)1525     public boolean isDeprecated(XPathParts pathPlain) {
1526         for (int i = 0; i < pathPlain.size(); ++i) {
1527             String elementName = pathPlain.getElement(i);
1528             if (isDeprecated(elementName, "*", null)) {
1529                 return true;
1530             }
1531             for (String attribute : pathPlain.getAttributeKeys(i)) {
1532                 String attributeValue = pathPlain.getAttributeValue(i, attribute);
1533                 if (isDeprecated(elementName, attribute, attributeValue)) {
1534                     return true;
1535                 }
1536             }
1537         }
1538         return false;
1539     }
1540 
1541     public final static Splitter SPACE_SPLITTER = Splitter.on(CharMatcher.whitespace()).trimResults().omitEmptyStrings();
1542     public final static Splitter BAR_SPLITTER = Splitter.on('|').trimResults().omitEmptyStrings();
1543     public final static Splitter CR_SPLITTER = Splitter.on(CharMatcher.anyOf("\n\r")).trimResults().omitEmptyStrings();
1544 
1545     private static class XPathPartsSet {
1546         private final Set<XPathParts> list = new LinkedHashSet<>();
1547 
addElement(String element)1548         private void addElement(String element) {
1549             if (list.isEmpty()) {
1550                 list.add(new XPathParts().addElement(element));
1551             } else {
1552                 for (XPathParts item : list) {
1553                     item.addElement(element);
1554                 }
1555             }
1556         }
1557 
addAttribute(String attribute, String attributeValue)1558         private void addAttribute(String attribute, String attributeValue) {
1559             for (XPathParts item : list) {
1560                 item.addAttribute(attribute, attributeValue);
1561             }
1562         }
1563 
setElement(int i, String string)1564         private void setElement(int i, String string) {
1565             for (XPathParts item : list) {
1566                 item.setElement(i, string);
1567             }
1568         }
1569 
addAttributes(String attribute, List<String> attributeValues)1570         private void addAttributes(String attribute, List<String> attributeValues) {
1571             if (attributeValues.size() == 1) {
1572                 addAttribute(attribute, attributeValues.iterator().next());
1573             } else {
1574                 // duplicate all the items in the list with the given values
1575                 Set<XPathParts> newList = new LinkedHashSet<>();
1576                 for (XPathParts item : list) {
1577                     for (String attributeValue : attributeValues) {
1578                         XPathParts newItem = item.cloneAsThawed();
1579                         newItem.addAttribute(attribute, attributeValue);
1580                         newList.add(newItem);
1581                     }
1582                 }
1583                 list.clear();
1584                 list.addAll(newList);
1585             }
1586         }
1587 
toStrings()1588         private ImmutableSet<String> toStrings() {
1589             Builder<String> result = new ImmutableSet.Builder<>();
1590 
1591             for (XPathParts item : list) {
1592                 result.add(item.toString());
1593             }
1594             return result.build();
1595         }
1596 
1597         @Override
toString()1598         public String toString() {
1599             return list.toString();
1600         }
1601     }
1602 
getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras)1603     public Set<String> getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras) {
1604         extras.clear();
1605         Map<String, String> valueAttributes = new HashMap<>();
1606         XPathPartsSet pathResult = new XPathPartsSet();
1607         String element = null;
1608         for (int i = 0; i < pathPlain.size(); ++i) {
1609             element = pathPlain.getElement(i);
1610             pathResult.addElement(element);
1611             valueAttributes.clear();
1612             for (String attribute : pathPlain.getAttributeKeys(i)) {
1613                 AttributeStatus status = getAttributeStatus(element, attribute);
1614                 final String attributeValue = pathPlain.getAttributeValue(i, attribute);
1615                 switch (status) {
1616                 case distinguished:
1617                     AttributeType attrType = getAttributeType(element, attribute);
1618                     if (attrType == AttributeType.NMTOKENS) {
1619                         pathResult.addAttributes(attribute, SPACE_SPLITTER.splitToList(attributeValue));
1620                     } else {
1621                         pathResult.addAttribute(attribute, attributeValue);
1622                     }
1623                     break;
1624                 case value:
1625                     valueAttributes.put(attribute, attributeValue);
1626                     break;
1627                 case metadata:
1628                     break;
1629                 }
1630             }
1631             if (!valueAttributes.isEmpty()) {
1632                 boolean hasValue = hasValue(element);
1633                 // if it doesn't have a value, we construct new child elements, with _ prefix
1634                 // if it does have a value, we have to play a further trick, since
1635                 // we can't have a value and child elements at the same level.
1636                 // So we use a _ suffix on the element.
1637                 if (hasValue) {
1638                     pathResult.setElement(i, element + "_");
1639                 } else {
1640                     int debug = 0;
1641                 }
1642                 for (Entry<String, String> attributeAndValue : valueAttributes.entrySet()) {
1643                     final String attribute = attributeAndValue.getKey();
1644                     final String attributeValue = attributeAndValue.getValue();
1645 
1646                     Set<String> pathsShort = pathResult.toStrings();
1647                     AttributeType attrType = getAttributeType(element, attribute);
1648                     for (String pathShort : pathsShort) {
1649                         pathShort += "/_" + attribute;
1650                         if (attrType == AttributeType.NMTOKENS) {
1651                             for (String valuePart : SPACE_SPLITTER.split(attributeValue)) {
1652                                 extras.put(pathShort, valuePart);
1653                             }
1654                         } else {
1655                             extras.put(pathShort, attributeValue);
1656                         }
1657                     }
1658                 }
1659                 if (hasValue) {
1660                     pathResult.setElement(i, element); // restore
1661                 }
1662             }
1663         }
1664         // Only add the path if it could have a value, looking at the last element
1665         if (!hasValue(element)) {
1666             return null;
1667         }
1668         return pathResult.toStrings();
1669     }
1670 
getAttributeType(String elementName, String attributeName)1671     public AttributeType getAttributeType(String elementName, String attributeName) {
1672         Attribute attr = getAttribute(elementName, attributeName);
1673         return (attr != null) ? attr.type : null;
1674     }
1675 
getAttribute(String elementName, String attributeName)1676     public Attribute getAttribute(String elementName, String attributeName) {
1677         Element element = nameToElement.get(elementName);
1678         return (element != null) ? element.getAttributeNamed(attributeName) : null;
1679     }
1680 
1681     // TODO: add support for following to DTD annotations, and rework API
1682 
1683     static final Set<String> SPACED_VALUES = ImmutableSet.of(
1684         "idValidity",
1685         "languageGroup");
1686 
getValueSplitter(XPathParts pathPlain)1687     public static Splitter getValueSplitter(XPathParts pathPlain) {
1688         if (!Collections.disjoint(pathPlain.getElements(), SPACED_VALUES)) {
1689             return SPACE_SPLITTER;
1690         } else if (pathPlain.getElement(-1).equals("annotation")
1691             && !pathPlain.getAttributeKeys(-1).contains("tts")) {
1692             return BAR_SPLITTER;
1693         }
1694         return CR_SPLITTER;
1695     }
1696 
isComment(XPathParts pathPlain, String line)1697     public static boolean isComment(XPathParts pathPlain, String line) {
1698         if (pathPlain.contains("transform")) {
1699             if (line.startsWith("#")) {
1700                 return true;
1701             }
1702         }
1703         return false;
1704     }
1705 
isExtraSplit(String extraPath)1706     public static boolean isExtraSplit(String extraPath) {
1707         if (extraPath.endsWith("/_type") && extraPath.startsWith("//supplementalData/metaZones/mapTimezones")) {
1708             return true;
1709         }
1710         return false;
1711     }
1712 
1713     /**
1714      * Return the value status for an EAV
1715      */
getValueStatus(String elementName, String attributeName, String value)1716     public ValueStatus getValueStatus(String elementName, String attributeName, String value) {
1717         Element element = nameToElement.get(elementName);
1718         if (element == null) {
1719             return ValueStatus.invalid;
1720         }
1721         Attribute attr = element.getAttributeNamed(attributeName);
1722         if (attr == null) {
1723             return ValueStatus.invalid;
1724         }
1725         return attr.getValueStatus(value);
1726     }
1727 
1728     /**
1729      * Return element-attribute pairs with non-enumerated values, for quick checks.
1730      */
getNonEnumerated(Map<String,String> matchValues)1731     public Multimap<String, String> getNonEnumerated(Map<String,String> matchValues) {
1732         Multimap<String,String> nonEnumeratedElementToAttribute = TreeMultimap.create(); // make tree for ease of debugging
1733         for (Entry<String, Element> entry : nameToElement.entrySet()) {
1734             Element element = entry.getValue();
1735             for (Attribute attribute : element.attributes.keySet()) {
1736                 if (attribute.type != AttributeType.ENUMERATED_TYPE) {
1737                     String elementName = element.getName();
1738                     String attrName = attribute.getName();
1739                     nonEnumeratedElementToAttribute.put(elementName, attrName);
1740                     if (attribute.matchValue != null) {
1741                         matchValues.put(elementName + "\t" + attrName, attribute.matchValue.getName());
1742                     }
1743                 }
1744             }
1745         }
1746         return ImmutableSetMultimap.copyOf(nonEnumeratedElementToAttribute);
1747     }
1748 }
1749