• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import java.util.Enumeration;
4 import java.util.HashMap;
5 import java.util.HashSet;
6 import java.util.LinkedHashSet;
7 import java.util.Locale;
8 import java.util.Map;
9 import java.util.Map.Entry;
10 import java.util.Set;
11 import java.util.TreeMap;
12 import java.util.TreeSet;
13 
14 import org.unicode.cldr.util.CLDRConfig;
15 import org.unicode.cldr.util.CLDRFile;
16 import org.unicode.cldr.util.CLDRTransforms;
17 import org.unicode.cldr.util.CLDRTransforms.Direction;
18 import org.unicode.cldr.util.CLDRTransforms.ParsedTransformID;
19 import org.unicode.cldr.util.CLDRTransforms.Visibility;
20 import org.unicode.cldr.util.LanguageTagParser;
21 import org.unicode.cldr.util.StandardCodes.CodeType;
22 import org.unicode.cldr.util.With;
23 
24 import com.ibm.icu.impl.Relation;
25 import com.ibm.icu.lang.UScript;
26 import com.ibm.icu.text.Transliterator;
27 
28 public class FixTransformNames {
29     CLDRConfig testInfo = ToolConfig.getToolInstance();
30 
main(String[] args)31     public static void main(String[] args) {
32         new FixTransformNames().run(args);
33     }
34 
35     Map<String, String> fieldToCode = new HashMap<String, String>();
36     Map<String, String> oldToNewVariant = new HashMap<String, String>();
37     Map<String, String> fieldToVariant = new HashMap<String, String>();
38     Map<String, String> targetToCode = new HashMap<String, String>();
39 
40     Set<String> languageCodes = new HashSet<String>();
41 
run(String[] args)42     private void run(String[] args) {
43         CLDRFile file = testInfo.getEnglish();
44         for (String lang : testInfo.getStandardCodes().getAvailableCodes(CodeType.language)) {
45             String name = file.getName(lang);
46             if (!name.equals(lang)) {
47                 fieldToCode.put(name, lang);
48                 languageCodes.add(lang);
49             }
50         }
51         fieldToCode.put("Maldivian", "dv");
52         fieldToCode.put("JapaneseKana", "und_Kana");
53         fieldToCode.put("Kirghiz", "ky");
54         fieldToCode.put("ASCII", "und-Qaaa");
55         fieldToCode.put("zh_Latn_PINYIN", "zh_Latn");
56         fieldToCode.put("zh_Latn_PINYIN", "zh_Latn");
57         fieldToCode.put("IPA", "und-fonipa");
58         fieldToCode.put("XSampa", "und-fonxsamp");
59         fieldToCode.put("Simplified", "und-Hans");
60         fieldToCode.put("Traditional", "und-Hant");
61         fieldToCode.put("ConjoiningJamo", "und-Qaaj");
62         oldToNewVariant.put("UNGEGN", "-m0-ungegn");
63         oldToNewVariant.put("BGN", "-m0-bgn");
64         addX(oldToNewVariant, "-x0-", "hex", "C Java Perl, Plain Unicode XML XML10");
65         addX(fieldToVariant, "-x0-", "", "CaseFold Lower Title Upper");
66         addX(fieldToVariant, "-x0-", "", "NFC NFD NFKC NFKD FCC FCD FullWidth Halfwidth");
67         addX(fieldToVariant, "-x0-", "", "Null Remove");
68         addX(fieldToVariant, "-x0-", "", "Accents Publishing Name");
69         //exceptions.put("Latin-ConjoiningJamo", "und-t-und-Latn-m0-conjamo"); // Conjoining Jamo - internal
70         /*
71             <transformName type="BGN">BGN</transformName>
72             <transformName type="Numeric">Numeric</transformName>
73             <transformName type="Tone">Tone</transformName>
74             <transformName type="UNGEGN">UNGEGN</transformName>
75             <transformName type="x-Accents">Accents</transformName>
76             <transformName type="x-Fullwidth">Fullwidth</transformName>
77             <transformName type="x-Halfwidth">Halfwidth</transformName>
78             <transformName type="x-Jamo">Jamo</transformName>
79             <transformName type="x-Pinyin">Pinyin</transformName>
80             <transformName type="x-Publishing">Publishing</transformName>
81 
82         ??Accents   [Any-Accents]
83         ??ConjoiningJamo    [Latin-ConjoiningJamo]
84         ??Fullwidth [Fullwidth-Halfwidth]
85         ??Halfwidth [Fullwidth-Halfwidth]
86         ??InterIndic    [Bengali-InterIndic, Devanagari-InterIndic, Gujarati-InterIndic, Gurmukhi-InterIndic, InterIndic-Bengali, InterIndic-Devanagari, InterIndic-Gujarati, InterIndic-Gurmukhi, InterIndic-Kannada, InterIndic-Latin, InterIndic-Malayalam, InterIndic-Oriya, InterIndic-Tamil, InterIndic-Telugu, Kannada-InterIndic, Latin-InterIndic, Malayalam-InterIndic, Oriya-InterIndic, Tamil-InterIndic, Telugu-InterIndic]
87         ??Jamo  [Jamo-Latin, Latin-Jamo]
88         ??Latin-Names   [Han-Latin-Names]
89         ??Lower [az-Lower, el-Lower, lt-Lower, tr-Lower]
90         ??NumericPinyin [Latin-NumericPinyin, Pinyin-NumericPinyin]
91         ??Publishing    [Any-Publishing]
92         ??Simplified    [Simplified-Traditional]
93         ??Spacedhan [Han-Spacedhan]
94         ??ThaiLogical   [Thai-ThaiLogical, ThaiLogical-Latin]
95         ??ThaiSemi  [Thai-ThaiSemi]
96         ??Title [az-Title, el-Title, lt-Title, nl-Title, tr-Title]
97         ??Traditional   [Simplified-Traditional]
98         ??Upper [az-Upper, el-Upper, lt-Upper, tr-Upper]
99 
100          */
101 
102         //CLDRTransforms transforms = CLDRTransforms.getInstance();
103         Relation<String, String> missing = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
104         Set<String> found = new TreeSet<String>();
105         Map<String, String> allFields = new TreeMap<String, String>();
106         Map<String, String> specialFields = new TreeMap<String, String>();
107         Map<String, String> allVariants = new TreeMap<String, String>();
108 
109         Set<String> internal = new TreeSet<String>();
110         Set<String> cldrIds = getCldrIds(internal);
111 
112         for (String id : CLDRTransforms.getAvailableIds()) {
113             if (id.endsWith(".xml")) {
114                 id = id.substring(0, id.length() - 4);
115             }
116             int first = id.indexOf('-');
117             int second = id.indexOf('-', first + 1);
118             String id2 = second < 0 ? id : id.substring(0, second) + "/" + id.substring(second + 1);
119             if (internal.contains(id2)) {
120                 System.out.println("*Internal:\t" + id);
121             } else if (!cldrIds.contains(id2)) {
122                 System.out.println("*Missing:\t" + id);
123             }
124         }
125         Set<String> icuOnlyIds = new TreeSet<String>();
126         for (Enumeration<String> x = Transliterator.getAvailableIDs(); x.hasMoreElements();) {
127             String icuId = x.nextElement();
128             if (!cldrIds.contains(icuId)) {
129                 icuOnlyIds.add(icuId);
130             }
131         }
132 
133         for (String id : With.in(cldrIds, icuOnlyIds)) {
134             String original = id;
135 
136             ParsedTransformID ptd = new ParsedTransformID().set(id);
137             if (!id.equals(ptd.toString())) {
138                 missing.put("ERROR\t" + id, ptd.toString());
139                 continue;
140             }
141             // und-Latn-t-und-cyrl
142             // und-Hebr-t-und-latn-m0-ungegn-1977
143 
144             String variantSource = ptd.variant;
145             String variant = getFixedVariant(variantSource);
146             if (variant.contains("?")) {
147                 missing.put(variantSource, id);
148             } else {
149                 allVariants.put(variant, variantSource);
150             }
151 
152             String source = getFixedName(ptd.source);
153             if (source.contains("?")) {
154                 if (variantSource == null) {
155                     String temp = fieldToVariant.get(ptd.source);
156                     if (temp != null) {
157                         source = "";
158                         variant = temp;
159                         specialFields.put(source + "/" + variant, ptd.source);
160                     } else {
161                         missing.put(ptd.source, id);
162                     }
163                 } else {
164                     missing.put(ptd.source, id);
165                 }
166             } else {
167                 allFields.put(source, ptd.source);
168             }
169             String target = getFixedName(ptd.target);
170             if (target.contains("?")) {
171                 if (variantSource == null) {
172                     String temp = fieldToVariant.get(ptd.target);
173                     if (temp != null) {
174                         target = "und";
175                         variant = temp;
176                         specialFields.put(target + "/" + variant, ptd.target);
177                     } else {
178                         missing.put(ptd.target, id);
179                     }
180                 } else {
181                     missing.put(ptd.target, id);
182                 }
183             } else {
184                 allFields.put(target, ptd.target);
185             }
186             String bcp47 = target + "-t" + (source.isEmpty() ? "" : "-" + source) + variant;
187 
188             if (bcp47.contains("?")) {
189                 continue;
190             }
191             found.add(bcp47 + "\t" + getName(target) + "\t" + getName(source) + "\t" + variant + "\t" + original);
192         }
193 
194         System.out.println("\nAll Fields");
195         for (Entry<String, String> s : allFields.entrySet()) {
196             System.out.println(s.getKey() + "\t" + getName(s.getKey()) + "\t" + s.getValue());
197         }
198         System.out.println("\nSpecial Fields");
199         for (Entry<String, String> s : specialFields.entrySet()) {
200             System.out.println(s.getKey() + "\t" + s.getValue());
201         }
202         System.out.println("\nAll Variants");
203         for (Entry<String, String> s : allVariants.entrySet()) {
204             System.out.println(s.getKey() + "\t" + s.getValue());
205         }
206         System.out.println("\nFound IDs");
207         for (String s : found) {
208             System.out.println(s);
209         }
210         System.out.println("\nUnconverted");
211         for (Entry<String, Set<String>> s : missing.keyValuesSet()) {
212             System.out.println(s.getKey() + "\t" + s.getValue());
213         }
214     }
215 
216     private void addX(Map<String, String> oldToNewVariant2, String type, String prefix, String items) {
217         for (String part : items.split("\\s+")) {
218             String target = prefix + part.toLowerCase(Locale.ENGLISH);
219             if (target.length() > 8) {
220                 target = target.substring(0, 8);
221             }
222             oldToNewVariant2.put(part, type + target);
223         }
224     }
225 
226     LanguageTagParser ltp = new LanguageTagParser();
227     CLDRFile english = testInfo.getEnglish();
228 
getName(String target)229     private String getName(String target) {
230         if (target.equals("und")) {
231             return "Any";
232         }
233         ltp.set(target);
234         if (ltp.getLanguage().equals("und")) {
235             String result = "";
236             result = add(result, CLDRFile.SCRIPT_NAME, ltp.getScript());
237             result = add(result, CLDRFile.TERRITORY_NAME, ltp.getRegion());
238             for (String v : ltp.getVariants()) {
239                 result = add(result, CLDRFile.VARIANT_NAME, v);
240             }
241             return result;
242         }
243         return english.getName(target.replace('-', '_'));
244     }
245 
add(String result, int type, String code)246     private String add(String result, int type, String code) {
247         if (code.isEmpty()) {
248             return result;
249         }
250         if (result.length() != 0) {
251             result += ", ";
252         }
253         String temp = english.getName(type, code);
254         if (type == CLDRFile.SCRIPT_NAME && fieldToCode.containsKey(temp)) {
255             temp += "*";
256         }
257         return result + (temp == null ? code : temp);
258     }
259 
getFixedVariant(String variant)260     private String getFixedVariant(String variant) {
261         if (variant == null || variant.isEmpty()) {
262             return "";
263         }
264         String fixedVariant = oldToNewVariant.get(variant);
265         if (fixedVariant != null) {
266             return fixedVariant;
267         }
268         return "??" + variant;
269     }
270 
getCldrIds(Set<String> internal)271     private Set<String> getCldrIds(Set<String> internal) {
272         Set<String> result = new LinkedHashSet<String>();
273         for (String s : CLDRTransforms.getAvailableIds()) {
274             //String dir;
275             ParsedTransformID directionInfo = new ParsedTransformID();
276             //String rules = CLDRTransforms.getIcuRulesFromXmlFile(CLDRTransforms.TRANSFORM_DIR, s, directionInfo);
277             Set<String> store = directionInfo.getVisibility() == Visibility.external ? result : internal;
278             if (directionInfo.getDirection() != Direction.backward) {
279                 store.add(directionInfo.getId());
280             }
281             if (directionInfo.getDirection() != Direction.forward) {
282                 store.add(directionInfo.getBackwardId());
283             }
284         }
285         return result;
286     }
287 
getFixedName(String field)288     private String getFixedName(String field) {
289         String variant = "";
290         if (field.equals("Any")) {
291             return "und";
292         }
293         if (field.contains("_FONIPA")) {
294             field = field.replace("_FONIPA", "");
295             variant = "-fonipa";
296         }
297         if (field.equals("es_419")
298             || field.equals("ja_Latn")
299             || field.equals("zh_Latn")
300             || field.equals("und-Latn")) {
301             return field.replace("_", "-");
302         }
303         int source = UScript.getCodeFromName(field);
304         if (languageCodes.contains(field)) {
305             return field + variant;
306         }
307         String name;
308         try {
309             name = UScript.getShortName(source);
310             return "und-" + name + variant;
311         } catch (Exception e) {
312             name = fieldToCode.get(field);
313             if (name != null) {
314                 return name + variant;
315             }
316         }
317         return "??" + field;
318     }
319 
320 }
321