1 package org.unicode.cldr.tool; 2 3 import java.util.Enumeration; 4 import java.util.HashMap; 5 import java.util.HashSet; 6 import java.util.LinkedHashSet; 7 import java.util.Locale; 8 import java.util.Map; 9 import java.util.Map.Entry; 10 import java.util.Set; 11 import java.util.TreeMap; 12 import java.util.TreeSet; 13 14 import org.unicode.cldr.util.CLDRConfig; 15 import org.unicode.cldr.util.CLDRFile; 16 import org.unicode.cldr.util.CLDRTransforms; 17 import org.unicode.cldr.util.CLDRTransforms.Direction; 18 import org.unicode.cldr.util.CLDRTransforms.ParsedTransformID; 19 import org.unicode.cldr.util.CLDRTransforms.Visibility; 20 import org.unicode.cldr.util.LanguageTagParser; 21 import org.unicode.cldr.util.StandardCodes.CodeType; 22 import org.unicode.cldr.util.With; 23 24 import com.ibm.icu.impl.Relation; 25 import com.ibm.icu.lang.UScript; 26 import com.ibm.icu.text.Transliterator; 27 28 public class FixTransformNames { 29 CLDRConfig testInfo = ToolConfig.getToolInstance(); 30 main(String[] args)31 public static void main(String[] args) { 32 new FixTransformNames().run(args); 33 } 34 35 Map<String, String> fieldToCode = new HashMap<String, String>(); 36 Map<String, String> oldToNewVariant = new HashMap<String, String>(); 37 Map<String, String> fieldToVariant = new HashMap<String, String>(); 38 Map<String, String> targetToCode = new HashMap<String, String>(); 39 40 Set<String> languageCodes = new HashSet<String>(); 41 run(String[] args)42 private void run(String[] args) { 43 CLDRFile file = testInfo.getEnglish(); 44 for (String lang : testInfo.getStandardCodes().getAvailableCodes(CodeType.language)) { 45 String name = file.getName(lang); 46 if (!name.equals(lang)) { 47 fieldToCode.put(name, lang); 48 languageCodes.add(lang); 49 } 50 } 51 fieldToCode.put("Maldivian", "dv"); 52 fieldToCode.put("JapaneseKana", "und_Kana"); 53 fieldToCode.put("Kirghiz", "ky"); 54 fieldToCode.put("ASCII", "und-Qaaa"); 55 fieldToCode.put("zh_Latn_PINYIN", "zh_Latn"); 56 fieldToCode.put("zh_Latn_PINYIN", "zh_Latn"); 57 fieldToCode.put("IPA", "und-fonipa"); 58 fieldToCode.put("XSampa", "und-fonxsamp"); 59 fieldToCode.put("Simplified", "und-Hans"); 60 fieldToCode.put("Traditional", "und-Hant"); 61 fieldToCode.put("ConjoiningJamo", "und-Qaaj"); 62 oldToNewVariant.put("UNGEGN", "-m0-ungegn"); 63 oldToNewVariant.put("BGN", "-m0-bgn"); 64 addX(oldToNewVariant, "-x0-", "hex", "C Java Perl, Plain Unicode XML XML10"); 65 addX(fieldToVariant, "-x0-", "", "CaseFold Lower Title Upper"); 66 addX(fieldToVariant, "-x0-", "", "NFC NFD NFKC NFKD FCC FCD FullWidth Halfwidth"); 67 addX(fieldToVariant, "-x0-", "", "Null Remove"); 68 addX(fieldToVariant, "-x0-", "", "Accents Publishing Name"); 69 //exceptions.put("Latin-ConjoiningJamo", "und-t-und-Latn-m0-conjamo"); // Conjoining Jamo - internal 70 /* 71 <transformName type="BGN">BGN</transformName> 72 <transformName type="Numeric">Numeric</transformName> 73 <transformName type="Tone">Tone</transformName> 74 <transformName type="UNGEGN">UNGEGN</transformName> 75 <transformName type="x-Accents">Accents</transformName> 76 <transformName type="x-Fullwidth">Fullwidth</transformName> 77 <transformName type="x-Halfwidth">Halfwidth</transformName> 78 <transformName type="x-Jamo">Jamo</transformName> 79 <transformName type="x-Pinyin">Pinyin</transformName> 80 <transformName type="x-Publishing">Publishing</transformName> 81 82 ??Accents [Any-Accents] 83 ??ConjoiningJamo [Latin-ConjoiningJamo] 84 ??Fullwidth [Fullwidth-Halfwidth] 85 ??Halfwidth [Fullwidth-Halfwidth] 86 ??InterIndic [Bengali-InterIndic, Devanagari-InterIndic, Gujarati-InterIndic, Gurmukhi-InterIndic, InterIndic-Bengali, InterIndic-Devanagari, InterIndic-Gujarati, InterIndic-Gurmukhi, InterIndic-Kannada, InterIndic-Latin, InterIndic-Malayalam, InterIndic-Oriya, InterIndic-Tamil, InterIndic-Telugu, Kannada-InterIndic, Latin-InterIndic, Malayalam-InterIndic, Oriya-InterIndic, Tamil-InterIndic, Telugu-InterIndic] 87 ??Jamo [Jamo-Latin, Latin-Jamo] 88 ??Latin-Names [Han-Latin-Names] 89 ??Lower [az-Lower, el-Lower, lt-Lower, tr-Lower] 90 ??NumericPinyin [Latin-NumericPinyin, Pinyin-NumericPinyin] 91 ??Publishing [Any-Publishing] 92 ??Simplified [Simplified-Traditional] 93 ??Spacedhan [Han-Spacedhan] 94 ??ThaiLogical [Thai-ThaiLogical, ThaiLogical-Latin] 95 ??ThaiSemi [Thai-ThaiSemi] 96 ??Title [az-Title, el-Title, lt-Title, nl-Title, tr-Title] 97 ??Traditional [Simplified-Traditional] 98 ??Upper [az-Upper, el-Upper, lt-Upper, tr-Upper] 99 100 */ 101 102 //CLDRTransforms transforms = CLDRTransforms.getInstance(); 103 Relation<String, String> missing = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 104 Set<String> found = new TreeSet<String>(); 105 Map<String, String> allFields = new TreeMap<String, String>(); 106 Map<String, String> specialFields = new TreeMap<String, String>(); 107 Map<String, String> allVariants = new TreeMap<String, String>(); 108 109 Set<String> internal = new TreeSet<String>(); 110 Set<String> cldrIds = getCldrIds(internal); 111 112 for (String id : CLDRTransforms.getAvailableIds()) { 113 if (id.endsWith(".xml")) { 114 id = id.substring(0, id.length() - 4); 115 } 116 int first = id.indexOf('-'); 117 int second = id.indexOf('-', first + 1); 118 String id2 = second < 0 ? id : id.substring(0, second) + "/" + id.substring(second + 1); 119 if (internal.contains(id2)) { 120 System.out.println("*Internal:\t" + id); 121 } else if (!cldrIds.contains(id2)) { 122 System.out.println("*Missing:\t" + id); 123 } 124 } 125 Set<String> icuOnlyIds = new TreeSet<String>(); 126 for (Enumeration<String> x = Transliterator.getAvailableIDs(); x.hasMoreElements();) { 127 String icuId = x.nextElement(); 128 if (!cldrIds.contains(icuId)) { 129 icuOnlyIds.add(icuId); 130 } 131 } 132 133 for (String id : With.in(cldrIds, icuOnlyIds)) { 134 String original = id; 135 136 ParsedTransformID ptd = new ParsedTransformID().set(id); 137 if (!id.equals(ptd.toString())) { 138 missing.put("ERROR\t" + id, ptd.toString()); 139 continue; 140 } 141 // und-Latn-t-und-cyrl 142 // und-Hebr-t-und-latn-m0-ungegn-1977 143 144 String variantSource = ptd.variant; 145 String variant = getFixedVariant(variantSource); 146 if (variant.contains("?")) { 147 missing.put(variantSource, id); 148 } else { 149 allVariants.put(variant, variantSource); 150 } 151 152 String source = getFixedName(ptd.source); 153 if (source.contains("?")) { 154 if (variantSource == null) { 155 String temp = fieldToVariant.get(ptd.source); 156 if (temp != null) { 157 source = ""; 158 variant = temp; 159 specialFields.put(source + "/" + variant, ptd.source); 160 } else { 161 missing.put(ptd.source, id); 162 } 163 } else { 164 missing.put(ptd.source, id); 165 } 166 } else { 167 allFields.put(source, ptd.source); 168 } 169 String target = getFixedName(ptd.target); 170 if (target.contains("?")) { 171 if (variantSource == null) { 172 String temp = fieldToVariant.get(ptd.target); 173 if (temp != null) { 174 target = "und"; 175 variant = temp; 176 specialFields.put(target + "/" + variant, ptd.target); 177 } else { 178 missing.put(ptd.target, id); 179 } 180 } else { 181 missing.put(ptd.target, id); 182 } 183 } else { 184 allFields.put(target, ptd.target); 185 } 186 String bcp47 = target + "-t" + (source.isEmpty() ? "" : "-" + source) + variant; 187 188 if (bcp47.contains("?")) { 189 continue; 190 } 191 found.add(bcp47 + "\t" + getName(target) + "\t" + getName(source) + "\t" + variant + "\t" + original); 192 } 193 194 System.out.println("\nAll Fields"); 195 for (Entry<String, String> s : allFields.entrySet()) { 196 System.out.println(s.getKey() + "\t" + getName(s.getKey()) + "\t" + s.getValue()); 197 } 198 System.out.println("\nSpecial Fields"); 199 for (Entry<String, String> s : specialFields.entrySet()) { 200 System.out.println(s.getKey() + "\t" + s.getValue()); 201 } 202 System.out.println("\nAll Variants"); 203 for (Entry<String, String> s : allVariants.entrySet()) { 204 System.out.println(s.getKey() + "\t" + s.getValue()); 205 } 206 System.out.println("\nFound IDs"); 207 for (String s : found) { 208 System.out.println(s); 209 } 210 System.out.println("\nUnconverted"); 211 for (Entry<String, Set<String>> s : missing.keyValuesSet()) { 212 System.out.println(s.getKey() + "\t" + s.getValue()); 213 } 214 } 215 216 private void addX(Map<String, String> oldToNewVariant2, String type, String prefix, String items) { 217 for (String part : items.split("\\s+")) { 218 String target = prefix + part.toLowerCase(Locale.ENGLISH); 219 if (target.length() > 8) { 220 target = target.substring(0, 8); 221 } 222 oldToNewVariant2.put(part, type + target); 223 } 224 } 225 226 LanguageTagParser ltp = new LanguageTagParser(); 227 CLDRFile english = testInfo.getEnglish(); 228 getName(String target)229 private String getName(String target) { 230 if (target.equals("und")) { 231 return "Any"; 232 } 233 ltp.set(target); 234 if (ltp.getLanguage().equals("und")) { 235 String result = ""; 236 result = add(result, CLDRFile.SCRIPT_NAME, ltp.getScript()); 237 result = add(result, CLDRFile.TERRITORY_NAME, ltp.getRegion()); 238 for (String v : ltp.getVariants()) { 239 result = add(result, CLDRFile.VARIANT_NAME, v); 240 } 241 return result; 242 } 243 return english.getName(target.replace('-', '_')); 244 } 245 add(String result, int type, String code)246 private String add(String result, int type, String code) { 247 if (code.isEmpty()) { 248 return result; 249 } 250 if (result.length() != 0) { 251 result += ", "; 252 } 253 String temp = english.getName(type, code); 254 if (type == CLDRFile.SCRIPT_NAME && fieldToCode.containsKey(temp)) { 255 temp += "*"; 256 } 257 return result + (temp == null ? code : temp); 258 } 259 getFixedVariant(String variant)260 private String getFixedVariant(String variant) { 261 if (variant == null || variant.isEmpty()) { 262 return ""; 263 } 264 String fixedVariant = oldToNewVariant.get(variant); 265 if (fixedVariant != null) { 266 return fixedVariant; 267 } 268 return "??" + variant; 269 } 270 getCldrIds(Set<String> internal)271 private Set<String> getCldrIds(Set<String> internal) { 272 Set<String> result = new LinkedHashSet<String>(); 273 for (String s : CLDRTransforms.getAvailableIds()) { 274 //String dir; 275 ParsedTransformID directionInfo = new ParsedTransformID(); 276 //String rules = CLDRTransforms.getIcuRulesFromXmlFile(CLDRTransforms.TRANSFORM_DIR, s, directionInfo); 277 Set<String> store = directionInfo.getVisibility() == Visibility.external ? result : internal; 278 if (directionInfo.getDirection() != Direction.backward) { 279 store.add(directionInfo.getId()); 280 } 281 if (directionInfo.getDirection() != Direction.forward) { 282 store.add(directionInfo.getBackwardId()); 283 } 284 } 285 return result; 286 } 287 getFixedName(String field)288 private String getFixedName(String field) { 289 String variant = ""; 290 if (field.equals("Any")) { 291 return "und"; 292 } 293 if (field.contains("_FONIPA")) { 294 field = field.replace("_FONIPA", ""); 295 variant = "-fonipa"; 296 } 297 if (field.equals("es_419") 298 || field.equals("ja_Latn") 299 || field.equals("zh_Latn") 300 || field.equals("und-Latn")) { 301 return field.replace("_", "-"); 302 } 303 int source = UScript.getCodeFromName(field); 304 if (languageCodes.contains(field)) { 305 return field + variant; 306 } 307 String name; 308 try { 309 name = UScript.getShortName(source); 310 return "und-" + name + variant; 311 } catch (Exception e) { 312 name = fieldToCode.get(field); 313 if (name != null) { 314 return name + variant; 315 } 316 } 317 return "??" + field; 318 } 319 320 } 321