1 package org.unicode.cldr.tool; 2 3 import java.util.HashMap; 4 import java.util.HashSet; 5 import java.util.Iterator; 6 import java.util.LinkedHashSet; 7 import java.util.Map; 8 import java.util.Map.Entry; 9 import java.util.Set; 10 import java.util.TreeMap; 11 import java.util.TreeSet; 12 import java.util.regex.Matcher; 13 14 import org.unicode.cldr.util.CLDRConfig; 15 import org.unicode.cldr.util.CLDRFile; 16 import org.unicode.cldr.util.Factory; 17 import org.unicode.cldr.util.Pair; 18 import org.unicode.cldr.util.PatternCache; 19 import org.unicode.cldr.util.SupplementalDataInfo; 20 import org.unicode.cldr.util.Timer; 21 import org.unicode.cldr.util.XPathParts; 22 23 import com.google.common.base.Splitter; 24 import com.ibm.icu.text.UnicodeSet; 25 26 public class ListUnits { 27 private static final UnicodeSet BIDI_CONTROL = new UnicodeSet("[:bidi_control:]").freeze(); 28 private static final CLDRConfig CONFIG = CLDRConfig.getInstance(); 29 private static final SupplementalDataInfo SUPP = CONFIG.getSupplementalDataInfo(); 30 private static final Task TASK = Task.listSimpleUnits; 31 32 private enum Task { 33 listUnits, listSimpleUnits, showDecimals, getDigits, 34 } 35 36 enum Type { 37 root, en, other; fromString(String type)38 static Type fromString(String type) { 39 return type.equals("en") ? en : type.equals("root") ? root : other; 40 } 41 } 42 main(String[] args)43 public static void main(String[] args) { 44 Factory cldrFactory = CONFIG.getCldrFactory(); 45 Set<String> defaultContent = SUPP.getDefaultContentLocales(); 46 Set<String> seen = new HashSet<>(); 47 48 LinkedHashSet<String> items = new LinkedHashSet<>(); 49 items.add("root"); 50 items.add("en"); 51 items.addAll(cldrFactory.getAvailableLanguages()); 52 Map<String, Data> rootMap = new HashMap<>(); 53 Map<String, Data> enMap = new HashMap<>(); 54 55 Timer timer = new Timer(); 56 int count = 0; 57 XPathParts parts = new XPathParts(); 58 Splitter SEMI = Splitter.on(";").trimResults(); 59 Matcher currencyMatcher = PatternCache.get("([^0#]*).*[0#]([^0#]*)").matcher(""); 60 61 for (String locale : items) { 62 Type type = Type.fromString(locale); 63 if (type == Type.root || type == Type.en || defaultContent.contains(locale)) { 64 continue; 65 } 66 CLDRFile cldrFile = cldrFactory.make(locale, true); 67 // DecimalFormat format = new DecimalFormat(currencyPattern); 68 // String prefix = format.getPositivePrefix(); 69 // String suffix = format.getPositiveSuffix(); 70 71 // ICUServiceBuilder builder = new ICUServiceBuilder().setCldrFile(cldrFile); 72 // DecimalFormat format = builder.getCurrencyFormat("XXX"); 73 // String prefix = format.getPositivePrefix().replace("XXX", "\u00a4"); 74 // String suffix = format.getPositiveSuffix().replace("XXX", "\u00a4"); 75 switch (TASK) { 76 case showDecimals: { 77 String compactPathPrefix = "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]"; 78 String currencyPattern = cldrFile 79 .getStringValue( 80 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength/currencyFormat[@type=\"standard\"]/pattern[@type=\"standard\"]"); 81 String firstPart = SEMI.split(currencyPattern).iterator().next(); 82 if (!currencyMatcher.reset(firstPart).matches()) { 83 throw new IllegalArgumentException("bad matcher"); 84 } 85 String prefix = currencyMatcher.group(1); 86 String suffix = currencyMatcher.group(2); 87 System.out.println("\n#" + locale + "\t«" + prefix + "»\t«" + suffix + "»\t«" + currencyPattern + "»"); 88 TreeMap<String, String> data = new TreeMap<>(); 89 for (String path : cldrFile.fullIterable()) { 90 // if (s.contains("decimalFormats")) { 91 // System.out.println(s); 92 // } 93 if (path.startsWith(compactPathPrefix)) { 94 String value = cldrFile.getStringValue(path); 95 String mod = path.replace("decimal", "currency") + "[@draft=\"provisional\"]"; 96 // // locale=en ; action=add ; new_path=//ldml/localeDisplayNames/territories/territory[@type="PS"][@alt="short"] ; new_value=Palestine 97 data.put(mod, "locale=" + locale 98 + " ; action=add" 99 + " ; new_value=" + prefix + value + suffix 100 + " ; new_path=" + mod); 101 } 102 } 103 for (Entry<String, String> line : data.entrySet()) { 104 System.out.println(line.getValue()); 105 } 106 data.clear(); 107 break; 108 } 109 case listUnits: 110 case listSimpleUnits: { 111 Set<String> units = getUnits(cldrFile, TASK, type == Type.root ? rootMap : type == Type.en ? enMap : null); 112 if (type == Type.en) { 113 TreeSet<String> missing = new TreeSet<>(seen); 114 missing.removeAll(units); 115 for (String unit : missing) { 116 // locale=en ; action=add ; new_path=//ldml/localeDisplayNames/territories/territory[@type="PS"][@alt="short"] ; new_value=Palestine 117 Data data = rootMap.get(unit); 118 if (data != null) { 119 System.out.println(data); 120 } 121 } 122 } 123 Splitter HYPHEN = Splitter.on('-'); 124 String oldBase = ""; 125 for (String unit : units) { 126 if (!seen.contains(unit)) { 127 switch (TASK) { 128 case listSimpleUnits: 129 String base = HYPHEN.split(unit).iterator().next(); 130 if (!base.equals(oldBase)) { 131 oldBase = base; 132 System.out.println(); 133 } else { 134 System.out.print(' '); 135 } 136 System.out.print(unit); 137 break; 138 case listUnits: 139 System.out.println("\t" + unit.replace("/", "\t") 140 .replaceFirst("-", "\t") + "\t" + locale); 141 break; 142 } 143 seen.add(unit); 144 } 145 } 146 break; 147 } 148 case getDigits: { 149 getDigits(cldrFile); 150 break; 151 } 152 } 153 } 154 System.out.println(); 155 System.out.println("#Done: " + count + ", " + timer); 156 } 157 getDigits(CLDRFile cldrFile)158 static void getDigits(CLDRFile cldrFile) { 159 System.out.println(cldrFile.getLocaleID()); 160 String numberSystem = cldrFile.getWinningValue("//ldml/numbers/defaultNumberingSystem"); 161 Set<String> seen = new HashSet<>(); 162 seen.add(numberSystem); 163 Pair<UnicodeSet, UnicodeSet> main = getCharacters(cldrFile, numberSystem); 164 System.out.println("\tdefault: " + numberSystem + ", " + main.getFirst().toPattern(false) + ", " + main.getSecond().toPattern(false)); 165 for (Iterator<String> it = cldrFile.iterator("//ldml/numbers/otherNumberingSystems"); it.hasNext();) { 166 String path = it.next(); 167 String otherNumberingSystem = cldrFile.getWinningValue(path); 168 if (seen.contains(otherNumberingSystem)) { 169 continue; 170 } 171 seen.add(otherNumberingSystem); 172 main = getCharacters(cldrFile, otherNumberingSystem); 173 System.out.println("\tother: " + otherNumberingSystem 174 + ", " + main.getFirst().toPattern(false) + "\t" + main.getSecond().toPattern(false)); 175 } 176 } 177 getCharacters(CLDRFile cldrFileToCheck, String numberSystem)178 private static Pair<UnicodeSet, UnicodeSet> getCharacters(CLDRFile cldrFileToCheck, String numberSystem) { 179 String digitString = SUPP.getDigits(numberSystem); 180 UnicodeSet digits = digitString == null ? UnicodeSet.EMPTY : new UnicodeSet().addAll(digitString); 181 182 UnicodeSet punctuation = new UnicodeSet(); 183 Set<String> errors = new LinkedHashSet<>(); 184 add(cldrFileToCheck, "decimal", numberSystem, punctuation, errors); 185 //add(cldrFileToCheck, "exponential", numberSystem, punctuation, errors); 186 add(cldrFileToCheck, "group", numberSystem, punctuation, errors); 187 //add(cldrFileToCheck, "infinity", numberSystem, punctuation, errors); 188 add(cldrFileToCheck, "minusSign", numberSystem, punctuation, errors); 189 //add(cldrFileToCheck, "nan", numberSystem, punctuation, errors); 190 add(cldrFileToCheck, "list", numberSystem, punctuation, errors); 191 add(cldrFileToCheck, "percentSign", numberSystem, punctuation, errors); 192 add(cldrFileToCheck, "perMille", numberSystem, punctuation, errors); 193 add(cldrFileToCheck, "plusSign", numberSystem, punctuation, errors); 194 // symbols.setZeroDigit(getSymbolString(cldrFileToCheck, "nativeZeroDigit", numberSystem)); 195 if (!errors.isEmpty() && digitString != null) { 196 System.out.println("Missing: " + numberSystem + "\t" + errors); 197 } 198 punctuation.removeAll(BIDI_CONTROL); 199 return Pair.of(digits, punctuation); 200 } 201 add(CLDRFile cldrFileToCheck, String subtype, String numberSystem, UnicodeSet punctuation, Set<String> errors)202 private static void add(CLDRFile cldrFileToCheck, String subtype, String numberSystem, UnicodeSet punctuation, Set<String> errors) { 203 final String result = getSymbolString(cldrFileToCheck, subtype, numberSystem); 204 if (result == null) { 205 errors.add(subtype); 206 } else { 207 punctuation.addAll(result); 208 } 209 } 210 getSymbolString(CLDRFile cldrFile, String key, String numsys)211 private static String getSymbolString(CLDRFile cldrFile, String key, String numsys) { 212 return cldrFile.getWinningValue("//ldml/numbers/symbols[@numberSystem=\"" + numsys + "\"]/" + key); 213 } 214 215 static final class Data { Data(String path2, String stringValue)216 public Data(String path2, String stringValue) { 217 path = path2; 218 value = stringValue; 219 } 220 221 final String path; 222 final String value; 223 toString()224 public String toString() { 225 return "locale=en" 226 + " ; action=add" 227 + " ; new_path=" + path 228 + " ; new_value=" + value; 229 } 230 } 231 getUnits(CLDRFile cldrFile, Task task, Map<String, Data> extra)232 private static Set<String> getUnits(CLDRFile cldrFile, Task task, Map<String, Data> extra) { 233 Set<String> seen = new TreeSet<String>(); 234 for (String path : cldrFile) { 235 if (!path.contains("/unit")) { 236 continue; 237 } 238 XPathParts parts = XPathParts.getFrozenInstance(path); 239 String unit = parts.findAttributeValue("unit", "type"); 240 if (unit == null) { 241 continue; 242 } 243 String key = unit; 244 if (task == Task.listUnits) { 245 String length = parts.findAttributeValue("unitLength", "type"); 246 String per = "perUnitPattern".equals(parts.getElement(-1)) ? "per" : ""; 247 key = unit + "/" + length + "/" + per; 248 } 249 seen.add(key); 250 if (extra != null && !path.endsWith("/alias")) { 251 extra.put(key, new Data(path, cldrFile.getStringValue(path))); 252 } 253 } 254 return seen; 255 } 256 } 257