• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.icu;
2 
3 import java.io.File;
4 import java.util.HashMap;
5 import java.util.Map;
6 import java.util.Map.Entry;
7 import java.util.Set;
8 
9 import org.xml.sax.Attributes;
10 import org.xml.sax.SAXException;
11 
12 import com.google.common.collect.ImmutableSet;
13 
14 /**
15  * A mapper that converts BCP 47 data from CLDR to the ICU data structure.
16  *
17  * @author jchye
18  */
19 public class Bcp47Mapper {
20 //    private static final String[] KEYTYPE_FILES = {
21 //        "calendar", "collation", "currency", "number", "variant"
22 //    };
23     private String sourceDir;
24 
Bcp47Mapper(String bcp47Dir)25     public Bcp47Mapper(String bcp47Dir) {
26         sourceDir = bcp47Dir;
27     }
28 
29     /**
30      * Fills an IcuData object with data of the given type.
31      */
fillFromCldr()32     public IcuData[] fillFromCldr() {
33         IcuData timezoneData = new IcuData("common/bcp47/timezone.xml", "timezoneTypes", false);
34         Map<String, String> keyMap = new HashMap<String, String>();
35         // Timezone data is put in a different file.
36         fillFromFile("timezone", timezoneData, keyMap);
37 
38         // Process the rest of the data.
39         IcuData keyTypeData = new IcuData("common/bcp47/*.xml", "keyTypeData", false);
40         for (File file : new File(sourceDir).listFiles()) {
41             final String filenameXml = file.getName();
42             if (filenameXml.endsWith(".xml") && !filenameXml.equals("timezone.xml")) {
43                 fillFromFile(filenameXml.substring(0, filenameXml.length() - 4),
44                     keyTypeData, keyMap);
45             }
46         }
47         // Add all the keyMap values into the IcuData file.
48         for (Entry<String, String> kmData : keyMap.entrySet()) {
49             String bcpKey = kmData.getKey();
50             String key = kmData.getValue();
51             if (bcpKey.startsWith("@")) {
52                 keyTypeData.add("/" + bcpKey.substring(1), key);
53                 continue;
54             }
55             if (bcpKey.equals(key)) {
56                 // empty value to indicate the BCP47 key is same with the legacy key
57                 bcpKey = "";
58             }
59             keyTypeData.add("/keyMap/" + key, bcpKey);
60         }
61         // Add aliases for timezone data.
62         keyTypeData.add("/typeAlias/timezone:alias", "/ICUDATA/timezoneTypes/typeAlias/timezone");
63         keyTypeData.add("/typeMap/timezone:alias", "/ICUDATA/timezoneTypes/typeMap/timezone");
64         keyTypeData.add("/bcpTypeAlias/tz:alias", "/ICUDATA/timezoneTypes/bcpTypeAlias/tz");
65         return new IcuData[] { timezoneData, keyTypeData };
66     }
67 
fillFromFile(String filename, IcuData icuData, Map<String, String> keyMap)68     private void fillFromFile(String filename, IcuData icuData, Map<String, String> keyMap) {
69         KeywordHandler handler = new KeywordHandler(icuData, keyMap);
70         MapperUtils.parseFile(new File(sourceDir, filename + ".xml"), handler);
71     }
72 
73     static final Set<String> SKIP_KEY_ATTRIBUTES = ImmutableSet.of(
74         "name", "alias", "description", "since", "extension");
75     static final Set<String> SKIP_TYPE_ATTRIBUTES = ImmutableSet.of(
76         "name", "alias", "description", "since", "preferred");
77 
78     /**
79      * XML parser for BCP47 data.
80      */
81     private class KeywordHandler extends MapperUtils.EmptyHandler {
82         private String typeAliasPrefix;
83         private String typeMapPrefix;
84         private String bcpTypeAliasPrefix;
85         private String lastKeyName;
86         private IcuData icuData;
87         private Map<String, String> keyMap;
88 
89         /**
90          * KeywordHandler constructor.
91          *
92          * @param icuData
93          *            the IcuData object to store the parsed data
94          * @param keyMap
95          *            a mapping of keys to their aliases. These values will
96          *            not be added to icuData by the handler
97          */
KeywordHandler(IcuData icuData, Map<String, String> keyMap)98         public KeywordHandler(IcuData icuData, Map<String, String> keyMap) {
99             this.icuData = icuData;
100             this.keyMap = keyMap;
101         }
102 
103         @Override
startElement(String uri, String localName, String qName, Attributes attr)104         public void startElement(String uri, String localName, String qName, Attributes attr) throws SAXException {
105             // Format of BCP47 file:
106             // <key name="tz" alias="timezone" description="Time zone key">
107             // <type name="adalv" alias="Europe/Andorra" description="Andorra"/>
108             // ...
109             if (attr == null) {
110                 return;
111             }
112 
113             if (qName.equals("key")) {
114                 String keyName = attr.getValue("name");
115                 if (keyName == null) {
116                     return;
117                 }
118 
119                 String keyAlias = attr.getValue("alias");
120                 if (keyAlias == null) {
121                     keyAlias = keyName;
122 //                    System.err.println(Bcp47Mapper.class.getSimpleName() + " Info: BCP47 key " + keyName
123 //                        + " didn't have the optional alias= value, mapping " + keyName + "->" + keyName);
124                 }
125 
126                 keyName = keyName.toLowerCase();
127                 keyAlias = keyAlias.toLowerCase();
128 
129                 typeAliasPrefix = "/typeAlias/" + keyAlias + '/';
130                 typeMapPrefix = "/typeMap/" + keyAlias + '/';
131                 keyMap.put(keyName, keyAlias);
132                 bcpTypeAliasPrefix = "/bcpTypeAlias/" + keyName + '/';
133                 lastKeyName = keyName;
134                 addOtherInfo(qName, attr, keyName, SKIP_KEY_ATTRIBUTES);
135             } else if (qName.equals("type")) {
136                 String typeName = attr.getValue("name");
137                 if (typeName == null) {
138                     return;
139                 }
140 
141                 // BCP47 type alias (maps deprecated type to preferred type)
142                 String preferredTypeName = attr.getValue("preferred");
143                 if (preferredTypeName != null) {
144                     icuData.add(bcpTypeAliasPrefix + typeName, preferredTypeName);
145                     return;
146                 }
147 
148                 String alias = attr.getValue("alias");
149                 if (alias == null) {
150                     // Generate type map entry using empty value
151                     // (an empty value indicates same type name
152                     // is used for both BCP47 and legacy type.
153                     icuData.add(typeMapPrefix + typeName, "");
154                 } else {
155                     String[] aliases = alias.split("\\s+");
156                     String mainAlias = aliases[0];
157                     icuData.add(typeMapPrefix + formatName(mainAlias), typeName);
158                     for (int i = 1; i < aliases.length; i++) {
159                         icuData.add(typeAliasPrefix + formatName(aliases[i]), mainAlias);
160                     }
161                 }
162                 addOtherInfo(qName, attr, lastKeyName + "/" + typeName, SKIP_KEY_ATTRIBUTES);
163             }
164         }
165 
addOtherInfo(String qName, Attributes attr, String typeName, Set<String> excludedAttributes)166         private void addOtherInfo(String qName, Attributes attr, String typeName, Set<String> excludedAttributes) {
167             for (int i = 0; i < attr.getLength(); ++i) {
168                 String name = attr.getQName(i);
169                 String value = attr.getValue(i);
170                 if (excludedAttributes.contains(name)
171                     || name.equals("deprecated") && value.equals("false")) {
172                     continue;
173                 }
174                 keyMap.put("@" + qName + "Info/" + name + "/" + typeName, value);
175             }
176         }
177 
formatName(String str)178         private String formatName(String str) {
179             if (str.indexOf('/') > -1) {
180                 str = '"' + str.replace('/', ':') + '"';
181             }
182             return str;
183         }
184     }
185 }
186