1 package org.unicode.cldr.api; 2 3 import static com.google.common.base.Preconditions.checkArgument; 4 import static com.google.common.collect.ImmutableMap.toImmutableMap; 5 import static java.util.function.Function.identity; 6 import static org.unicode.cldr.util.DtdData.AttributeStatus.distinguished; 7 import static org.unicode.cldr.util.DtdData.AttributeStatus.value; 8 import static org.unicode.cldr.util.DtdData.Mode.OPTIONAL; 9 10 import java.nio.file.Path; 11 import java.nio.file.Paths; 12 import java.util.Arrays; 13 import java.util.Comparator; 14 import java.util.function.Predicate; 15 import java.util.stream.Stream; 16 17 import org.unicode.cldr.util.DtdData; 18 import org.unicode.cldr.util.DtdData.Attribute; 19 import org.unicode.cldr.util.DtdData.Element; 20 import org.unicode.cldr.util.DtdType; 21 22 import com.google.common.collect.ImmutableList; 23 import com.google.common.collect.ImmutableMap; 24 25 /** 26 * Data types for non-locale based CLDR data. For the canonical specification for LDML data can 27 * be found at <a href="https://unicode.org/reports/tr35">Unicode Locale Data Markup Language<\a>. 28 * 29 * <p>This enum is largely a wrapper for functionality found in the underlying CLDR classes, but 30 * repackaged for convenience and to minimize surface area (and to avoid anyone needing to import 31 * classes from outside the "api" package). 32 */ 33 public enum CldrDataType { 34 /** 35 * Non-locale based BCP47 data, typically associated with international identifiers such as 36 * currency symbols, timezone identifiers etc. 37 */ 38 BCP47(DtdType.ldmlBCP47), 39 /** 40 * Non-locale based supplemental data, typically associated with character tables (e.g. for 41 * break iterator). 42 */ 43 SUPPLEMENTAL(DtdType.supplementalData), 44 /** 45 * Locale based LDML data consisting of internationalization information and translations on a 46 * per locale basis. LDML data for one locale may be inherited from other locales. 47 */ 48 LDML(DtdType.ldml, DtdType.ldmlICU); 49 50 private static final ImmutableMap<String, CldrDataType> NAME_MAP = 51 Arrays.stream(values()).collect(toImmutableMap(t -> t.mainType.name(), identity())); 52 53 /** 54 * Returns a CLDR data type given its XML name (the root element name in a CLDR path). 55 * 56 * @param name the XML path root (e.g. "ldml" or "supplementalData"). 57 * @return the associated data type instance. 58 */ forXmlName(String name)59 public static CldrDataType forXmlName(String name) { 60 CldrDataType type = NAME_MAP.get(name); 61 checkArgument(type != null, "unsupported DTD type: %s", name); 62 return type; 63 } 64 forRawType(DtdType rawType)65 static CldrDataType forRawType(DtdType rawType) { 66 return forXmlName(rawType.name()); 67 } 68 69 private final DtdType mainType; 70 private final ImmutableList<DtdType> extraTypes; 71 private final Comparator<String> elementComparator; 72 private final Comparator<String> attributeComparator; 73 CldrDataType(DtdType mainType, DtdType... extraTypes)74 CldrDataType(DtdType mainType, DtdType... extraTypes) { 75 this.mainType = mainType; 76 this.extraTypes = ImmutableList.copyOf(extraTypes); 77 // There's no need to cache the DtdData instance since getInstance() already does that. 78 DtdData dtd = DtdData.getInstance(mainType); 79 // Note that the function passed in to the wrapped comparators needs to be fast, since it's 80 // called for each comparison. We assume getElementFromName() and getAttributesFromName() 81 // are efficient, and if not we'll need to cache. 82 this.elementComparator = 83 wrapToHandleUnknownNames( 84 dtd.getElementComparator(), 85 dtd.getElementFromName()::containsKey); 86 this.attributeComparator = 87 wrapToHandleUnknownNames( 88 dtd.getAttributeComparator(), 89 dtd.getAttributesFromName()::containsKey); 90 } 91 getLdmlName()92 String getLdmlName() { 93 return mainType.name(); 94 } 95 getSourceDirectories()96 Stream<Path> getSourceDirectories() { 97 return mainType.directories.stream().map(Paths::get); 98 } 99 100 /** 101 * Returns all elements known for this DTD type in undefined order. This can include elements 102 * in external namespaces (e.g. "icu:xxx"). 103 */ getElements()104 Stream<Element> getElements() { 105 Stream<Element> elements = elementsFrom(mainType); 106 if (!extraTypes.isEmpty()) { 107 elements = 108 Stream.concat(elements, extraTypes.stream().flatMap(CldrDataType::elementsFrom)); 109 } 110 return elements; 111 } 112 elementsFrom(DtdType dataType)113 private static Stream<Element> elementsFrom(DtdType dataType) { 114 // NOTE: DO NOT call getElements() here because it makes a new set every time!! 115 return DtdData.getInstance(dataType).getElementFromName().values().stream(); 116 } 117 getAttribute(String elementName, String attributeName)118 Attribute getAttribute(String elementName, String attributeName) { 119 Attribute attr = DtdData.getInstance(mainType).getAttribute(elementName, attributeName); 120 if (attr == null) { 121 for (DtdType t : extraTypes) { 122 attr = DtdData.getInstance(t).getAttribute(elementName, attributeName); 123 if (attr != null) { 124 break; 125 } 126 } 127 } 128 return attr; 129 } 130 getElementComparator()131 Comparator<String> getElementComparator() { 132 return elementComparator; 133 } 134 getAttributeComparator()135 Comparator<String> getAttributeComparator() { 136 return attributeComparator; 137 } 138 139 // Unknown elements outside the DTD (such as "//ldml/special" icu:xxx elements) are not 140 // handled properly by the underlying element/attribute name comparators (they throw an 141 // exception) so we have to detect these cases first and handle them manually (even though 142 // they are very rare). Assume that: 143 // * known DTD elements come before any unknown ones, and 144 // * unknown element names can be sorted lexicographically using their qualified name. wrapToHandleUnknownNames( Comparator<String> compare, Predicate<String> isKnown)145 private static Comparator<String> wrapToHandleUnknownNames( 146 Comparator<String> compare, Predicate<String> isKnown) { 147 // This code should only return "signum" values for ordering (i.e. {-1, 0, 1}). 148 return (lname, rname) -> { 149 if (isKnown.test(lname)) { 150 return isKnown.test(rname) ? compare.compare(lname, rname) : -1; 151 } else { 152 return isKnown.test(rname) ? 1 : lname.compareTo(rname); 153 } 154 }; 155 } 156 157 // We shouldn't need to check special cases (e.g. "_q") here because this should only be being 158 // called _after_ those have been filtered out. 159 // The only time that both these methods return false should be for known attributes that are 160 // either marked as deprecated or as metatadata attributes. 161 boolean isDistinguishingAttribute(String elementName, String attributeName) { 162 Attribute attribute = getAttribute(elementName, attributeName); 163 if (attribute != null) { 164 return attribute.attributeStatus == distinguished && !attribute.isDeprecated(); 165 } 166 // This can happen if attribute keys are speculatively generated, which sometimes happens 167 // in transformation logic. Ideally this would end up being an error. 168 return false; 169 } 170 171 /** Returns whether the specified attribute is a "value" attribute. */ 172 boolean isValueAttribute(String elementName, String attributeName) { 173 Attribute attribute = getAttribute(elementName, attributeName); 174 if (attribute != null) { 175 return attribute.attributeStatus == value && !attribute.isDeprecated(); 176 } 177 return true; 178 } 179 180 /** Returns whether the specified attribute is a "value" attribute. */ 181 boolean isValueAttribute(AttributeKey key) { 182 return isValueAttribute(key.getElementName(), key.getAttributeName()); 183 } 184 185 /** 186 * Returns whether the specified attribute is optional. Attributes unknown to the DTD are also 187 * considered optional, which can happen if attribute keys are speculatively generated, which 188 * sometimes happens in transformation logic. 189 */ 190 boolean isOptionalAttribute(AttributeKey key) { 191 Attribute attribute = getAttribute(key.getElementName(), key.getAttributeName()); 192 return attribute == null || attribute.mode == OPTIONAL; 193 } 194 } 195