• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 package org.unicode.icu.tool.cldrtoicu.mapper;
4 
5 import static com.google.common.base.Ascii.toLowerCase;
6 import static com.google.common.base.Preconditions.checkState;
7 import static org.unicode.cldr.api.AttributeKey.keyOf;
8 import static org.unicode.cldr.api.CldrData.PathOrder.DTD;
9 import static org.unicode.cldr.api.CldrDataType.BCP47;
10 
11 import java.util.LinkedHashMap;
12 import java.util.List;
13 import java.util.Map;
14 import java.util.Map.Entry;
15 import java.util.Optional;
16 import java.util.Set;
17 
18 import org.unicode.cldr.api.AttributeKey;
19 import org.unicode.cldr.api.CldrData;
20 import org.unicode.cldr.api.CldrDataSupplier;
21 import org.unicode.cldr.api.CldrDataType;
22 import org.unicode.cldr.api.CldrPath;
23 import org.unicode.cldr.api.CldrValue;
24 import org.unicode.icu.tool.cldrtoicu.IcuData;
25 import org.unicode.icu.tool.cldrtoicu.RbPath;
26 import org.unicode.icu.tool.cldrtoicu.CldrDataProcessor;
27 
28 import com.google.common.annotations.VisibleForTesting;
29 import com.google.common.base.Ascii;
30 import com.google.common.collect.ImmutableList;
31 import com.google.common.collect.ImmutableMap;
32 import com.google.common.collect.Sets;
33 
34 /**
35  * A mapper to collect BCP-47 data from {@link CldrDataType#BCP47 BCP47} data under paths
36  * matching:
37  * <pre>{@code
38  *   //ldmlBCP47/keyword/key[@name=*]/type[@name=*]
39  * }</pre>
40  */
41 public final class Bcp47Mapper {
42     // Other attributes (e.g. "alias") are value attributes and don't need to be matched here.
43     private static final AttributeKey KEY_NAME = keyOf("key", "name");
44     private static final AttributeKey KEY_ALIAS = keyOf("key", "alias");
45     private static final AttributeKey KEY_VALUE_TYPE = keyOf("key", "valueType");
46 
47     private static final AttributeKey TYPE_NAME = keyOf("type", "name");
48     private static final AttributeKey TYPE_ALIASES = keyOf("type", "alias");
49     private static final AttributeKey PREFERRED_TYPE_NAME = keyOf("type", "preferred");
50 
51     // Deprecation of the data is not the same as deprecation of attributes themselves. This
52     // deprecation relates to identifying data which exists, but is not longer the right way to
53     // represent things (which means it can be important for clients to know about).
54     private static final AttributeKey KEY_DEPRECATED = keyOf("key", "deprecated");
55     private static final AttributeKey TYPE_DEPRECATED = keyOf("type", "deprecated");
56 
57     // Attributes that can be emitted under the /keyInfo or /typeInfo paths for auxiliary
58     // information in the ICU data. If the value is equal to the declared default, it is ignored.
59     // NOTE: The need for hard-coded default values is a hack because there's not nice way (yet)
60     // to determine the default for implicit values via the DTD. Ideally this would be automatic
61     // and the AttributeKey class would be able to have a method like "isDefault(String value)".
62     private static final ImmutableMap<AttributeKey, String> INFO_ATTRIBUTES =
63         ImmutableMap.of(KEY_VALUE_TYPE, "", KEY_DEPRECATED, "false", TYPE_DEPRECATED, "false");
64 
65     private static final RbPath RB_KEYMAP = RbPath.of("keyMap");
66     private static final RbPath RB_TYPE_ALIAS = RbPath.of("typeAlias", "timezone:alias");
67     private static final RbPath RB_MAP_ALIAS = RbPath.of("typeMap", "timezone:alias");
68     private static final RbPath RB_BCP_ALIAS = RbPath.of("bcpTypeAlias", "tz:alias");
69 
70     private static final CldrDataProcessor<Bcp47Mapper> BCP47_PROCESSOR;
71     static {
72         CldrDataProcessor.Builder<Bcp47Mapper> processor = CldrDataProcessor.builder();
73         processor
74             .addAction("//ldmlBCP47/keyword/key[@name=*]", (m, p) -> m.new ValueCollector(p))
75             .addValueAction("type[@name=*]", ValueCollector::collect);
76         BCP47_PROCESSOR = processor.build();
77     }
78 
79     /**
80      * Processes data from the given supplier to generate Timezone and BCP-47 ICU data.
81      *
82      * @param src the CLDR data supplier to process.
83      * @return A list of IcuData instances containing BCP-47 data to be written to files.
84      */
process(CldrDataSupplier src)85     public static ImmutableList<IcuData> process(CldrDataSupplier src) {
86         return process(src.getDataForType(BCP47));
87     }
88 
89     @VisibleForTesting // It's easier to supply a fake data instance than a fake supplier.
process(CldrData cldrData)90     static ImmutableList<IcuData> process(CldrData cldrData) {
91         Bcp47Mapper mapper = BCP47_PROCESSOR.process(cldrData, new Bcp47Mapper(), DTD);
92         mapper.addKeyMapValues();
93         return ImmutableList.of(mapper.keyTypeData, mapper.tzData);
94     }
95 
96     // Outer visitor which handles "key" paths by installing sub-visitor methods to process
97     // each child "type" element. Depending on the key name, values are stored in different
98     // IcuData instances.
99     private final IcuData tzData = new IcuData("timezoneTypes", false);
100     private final IcuData keyTypeData = new IcuData("keyTypeData", false);
101     // A map collecting each key and values as they are visited.
102     // TODO: Convert this to a Map<RbPath, String> which involves removing the '@' prefix hack.
103     private Map<String, String> keyMap = new LinkedHashMap<>();
104 
Bcp47Mapper()105     private Bcp47Mapper() { }
106 
107     // Post processing to add additional captured attribute values and some special cases.
addKeyMapValues()108     private void addKeyMapValues() {
109         IcuData keyData = keyTypeData;
110         // Add all the keyMap values into the IcuData file.
111         for (Entry<String, String> kmData : keyMap.entrySet()) {
112             String bcpKey = kmData.getKey();
113             String key = kmData.getValue();
114             if (bcpKey.startsWith("@")) {
115                 // Undoing the weird hack in addInfoAttributes(). This can be done better.
116                 // We use "parse()" because these are full paths, and not single elements.
117                 keyData.add(RbPath.parse(bcpKey.substring(1)), key);
118                 continue;
119             }
120             if (bcpKey.equals(key)) {
121                 // An empty value indicates that the BCP47 key is same as the legacy key.
122                 bcpKey = "";
123             }
124             keyData.add(RB_KEYMAP.extendBy(key), bcpKey);
125         }
126         // Add aliases for timezone data.
127         keyData.add(RB_TYPE_ALIAS, "/ICUDATA/timezoneTypes/typeAlias/timezone");
128         keyData.add(RB_MAP_ALIAS, "/ICUDATA/timezoneTypes/typeMap/timezone");
129         keyData.add(RB_BCP_ALIAS, "/ICUDATA/timezoneTypes/bcpTypeAlias/tz");
130     }
131 
132     private final class ValueCollector {
133         private final String keyName;
134         // Mutable data to be written into (differs depending on the key name).
135         private final IcuData icuData;
136 
ValueCollector(CldrPath prefix)137         ValueCollector(CldrPath prefix) {
138             this.keyName = Ascii.toLowerCase(KEY_NAME.valueFrom(prefix));
139             this.icuData = keyName.equals("tz") ? tzData : keyTypeData;
140         }
141 
collect(CldrValue value)142         private void collect(CldrValue value) {
143             String typeName = TYPE_NAME.valueFrom(value);
144             // Note that if a "preferred" type exists, we treat the value specially and add
145             // it only as an alias. We expected values with a preferred replacement to
146             // always be explicitly deprecated.
147             Optional<String> prefName = PREFERRED_TYPE_NAME.optionalValueFrom(value);
148             if (prefName.isPresent()) {
149                 checkState(KEY_DEPRECATED.booleanValueFrom(value, false)
150                         || TYPE_DEPRECATED.booleanValueFrom(value, false),
151                     "unexpected 'preferred' attribute for non-deprecated value: %s", value);
152                 icuData.add(RbPath.of("bcpTypeAlias", keyName, typeName), prefName.get());
153                 return;
154             }
155             // Note: There are some deprecated values which don't have a preferred
156             // replacement and these will be processed below (in particular we need to emit
157             // the fact that they are deprecated).
158 
159             // Not all key elements have an alias. E.g. in calendar.xml:
160             //     <key name="fw" description="First day of week" since="28">
161             // But we still add it as a alias to itself (which is later turned into a path with
162             // an empty value).
163             String keyAlias = toLowerCase(KEY_ALIAS.valueFrom(value, keyName));
164 
165             keyMap.put(keyName, keyAlias);
166             RbPath typeMapPrefix = RbPath.of("typeMap", keyAlias);
167 
168             List<String> typeAliases = TYPE_ALIASES.listOfValuesFrom(value);
169             if (typeAliases.isEmpty()) {
170                 // Generate type map entry using empty value (an empty value indicates same
171                 // type name is used for both BCP47 and legacy type).
172                 icuData.add(typeMapPrefix.extendBy(typeName), "");
173             } else {
174                 String mainAlias = typeAliases.get(0);
175                 icuData.add(typeMapPrefix.extendBy(quoteAlias(mainAlias)), typeName);
176                 // Put additional aliases as secondary aliases referencing the main alias.
177                 RbPath typeAliasPrefix = RbPath.of("typeAlias", keyAlias);
178                 typeAliases.stream()
179                     .skip(1)
180                     .map(Bcp47Mapper::quoteAlias)
181                     .forEach(a -> icuData.add(typeAliasPrefix.extendBy(a), mainAlias));
182             }
183             addInfoAttributes(keyName, typeName, value.getValueAttributes());
184         }
185 
186         // Add any additional attributes present to the attribute map. Note that this code was
187         // copied from largely undocumented code, and the precise reasoning for why this is
188         // needed or why it's done this way is not completely clear. It is very likely that it
189         // can be simplified.
190         //
191         // The '@' symbol added here is just a magic token that gets stripped off again in the
192         // addKeyMapValues() method, it appears to just be a way to distinguish keys added via
193         // this method vs during the collect method. A better approach might just be to have two
194         // maps.
195         // TODO: Remove the use of '@' and simplify the logic for "info" attributes (infoMap?).
addInfoAttributes( String keyName, String typeName, ImmutableMap<AttributeKey, String> attributes)196         private void addInfoAttributes(
197             String keyName, String typeName, ImmutableMap<AttributeKey, String> attributes) {
198             // Only emit deprecation for the "key" level, even if all types below that are also
199             // marked as deprecated. Only do this for a subset of attributes (INFO_ATTRIBUTES).
200             Set<AttributeKey> keys =
201                 Sets.intersection(attributes.keySet(), INFO_ATTRIBUTES.keySet());
202             for (AttributeKey a : keys) {
203                 String value = attributes.get(a);
204                 // Skip empty or default values in attributes.
205                 if (value.isEmpty() || INFO_ATTRIBUTES.get(a).equals(value)) {
206                     continue;
207                 }
208                 // The ID for the xxxInfo paths in ICU is the path fragment at which the
209                 // attribute exists. Since we only process complete paths here, we must do a
210                 // bit of reconstruction based on the element name of the attribute we are
211                 // processing. This relies on explicit knowledge that the paths are "<key>" or
212                 // "<key>/<type>". This all gets less messy if we switch to RbPath.
213                 String id =
214                     a.getElementName().equals("key") ? keyName : keyName + "/" + typeName;
215                 keyMap.put(
216                     "@" + a.getElementName() + "Info/" + a.getAttributeName() + "/" + id,
217                     value);
218             }
219         }
220     }
221 
222     /**
223      * Escapes alias values containing '/' so they can appear in resource bundle paths. This
224      * function replaces '/' with ':' and quotes the result (e.g. foo/bar -> "foo:bar").
225      *
226      * <p>This is needed for timezone "metazone" ID strings which are of the form 'Foo/Bar'
227      * in the CLDR data.
228      */
229     // TODO: Switch to RbPath and do quoting automatically when ICU data is written out.
quoteAlias(String str)230     private static String quoteAlias(String str) {
231         return str.indexOf('/') == -1 ? str : '"' + str.replace('/', ':') + '"';
232     }
233 }
234