1 package org.unicode.cldr.tool; 2 3 import java.util.HashMap; 4 import java.util.HashSet; 5 import java.util.Iterator; 6 import java.util.LinkedHashSet; 7 import java.util.Map; 8 import java.util.Map.Entry; 9 import java.util.Set; 10 import java.util.TreeMap; 11 import java.util.TreeSet; 12 import java.util.regex.Matcher; 13 14 import org.unicode.cldr.util.CLDRConfig; 15 import org.unicode.cldr.util.CLDRFile; 16 import org.unicode.cldr.util.Factory; 17 import org.unicode.cldr.util.Pair; 18 import org.unicode.cldr.util.PatternCache; 19 import org.unicode.cldr.util.SupplementalDataInfo; 20 import org.unicode.cldr.util.Timer; 21 import org.unicode.cldr.util.XPathParts; 22 23 import com.google.common.base.Splitter; 24 import com.ibm.icu.text.UnicodeSet; 25 26 public class ListUnits { 27 private static final UnicodeSet BIDI_CONTROL = new UnicodeSet("[:bidi_control:]").freeze(); 28 private static final CLDRConfig CONFIG = CLDRConfig.getInstance(); 29 private static final SupplementalDataInfo SUPP = CONFIG.getSupplementalDataInfo(); 30 private static final Task TASK = Task.listSimpleUnits; 31 32 private enum Task { 33 listUnits, listSimpleUnits, showDecimals, getDigits, 34 } 35 36 enum Type { 37 root, en, other; fromString(String type)38 static Type fromString(String type) { 39 return type.equals("en") ? en : type.equals("root") ? root : other; 40 } 41 } 42 main(String[] args)43 public static void main(String[] args) { 44 Factory cldrFactory = CONFIG.getCldrFactory(); 45 Set<String> defaultContent = SUPP.getDefaultContentLocales(); 46 Set<String> seen = new HashSet<>(); 47 48 LinkedHashSet<String> items = new LinkedHashSet<>(); 49 items.add("root"); 50 items.add("en"); 51 items.addAll(cldrFactory.getAvailableLanguages()); 52 Map<String, Data> rootMap = new HashMap<>(); 53 Map<String, Data> enMap = new HashMap<>(); 54 55 Timer timer = new Timer(); 56 int count = 0; 57 Splitter SEMI = Splitter.on(";").trimResults(); 58 Matcher currencyMatcher = PatternCache.get("([^0#]*).*[0#]([^0#]*)").matcher(""); 59 60 for (String locale : items) { 61 Type type = Type.fromString(locale); 62 if (type == Type.root || type == Type.en || defaultContent.contains(locale)) { 63 continue; 64 } 65 CLDRFile cldrFile = cldrFactory.make(locale, true); 66 // DecimalFormat format = new DecimalFormat(currencyPattern); 67 // String prefix = format.getPositivePrefix(); 68 // String suffix = format.getPositiveSuffix(); 69 70 // ICUServiceBuilder builder = new ICUServiceBuilder().setCldrFile(cldrFile); 71 // DecimalFormat format = builder.getCurrencyFormat("XXX"); 72 // String prefix = format.getPositivePrefix().replace("XXX", "\u00a4"); 73 // String suffix = format.getPositiveSuffix().replace("XXX", "\u00a4"); 74 switch (TASK) { 75 case showDecimals: { 76 String compactPathPrefix = "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]"; 77 String currencyPattern = cldrFile 78 .getStringValue( 79 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength/currencyFormat[@type=\"standard\"]/pattern[@type=\"standard\"]"); 80 String firstPart = SEMI.split(currencyPattern).iterator().next(); 81 if (!currencyMatcher.reset(firstPart).matches()) { 82 throw new IllegalArgumentException("bad matcher"); 83 } 84 String prefix = currencyMatcher.group(1); 85 String suffix = currencyMatcher.group(2); 86 System.out.println("\n#" + locale + "\t«" + prefix + "»\t«" + suffix + "»\t«" + currencyPattern + "»"); 87 TreeMap<String, String> data = new TreeMap<>(); 88 for (String path : cldrFile.fullIterable()) { 89 // if (s.contains("decimalFormats")) { 90 // System.out.println(s); 91 // } 92 if (path.startsWith(compactPathPrefix)) { 93 String value = cldrFile.getStringValue(path); 94 String mod = path.replace("decimal", "currency") + "[@draft=\"provisional\"]"; 95 // // locale=en ; action=add ; new_path=//ldml/localeDisplayNames/territories/territory[@type="PS"][@alt="short"] ; new_value=Palestine 96 data.put(mod, "locale=" + locale 97 + " ; action=add" 98 + " ; new_value=" + prefix + value + suffix 99 + " ; new_path=" + mod); 100 } 101 } 102 for (Entry<String, String> line : data.entrySet()) { 103 System.out.println(line.getValue()); 104 } 105 data.clear(); 106 break; 107 } 108 case listUnits: 109 case listSimpleUnits: { 110 Set<String> units = getUnits(cldrFile, TASK, type == Type.root ? rootMap : type == Type.en ? enMap : null); 111 if (type == Type.en) { 112 TreeSet<String> missing = new TreeSet<>(seen); 113 missing.removeAll(units); 114 for (String unit : missing) { 115 // locale=en ; action=add ; new_path=//ldml/localeDisplayNames/territories/territory[@type="PS"][@alt="short"] ; new_value=Palestine 116 Data data = rootMap.get(unit); 117 if (data != null) { 118 System.out.println(data); 119 } 120 } 121 } 122 Splitter HYPHEN = Splitter.on('-'); 123 String oldBase = ""; 124 for (String unit : units) { 125 if (!seen.contains(unit)) { 126 switch (TASK) { 127 case listSimpleUnits: 128 String base = HYPHEN.split(unit).iterator().next(); 129 if (!base.equals(oldBase)) { 130 oldBase = base; 131 System.out.println(); 132 } else { 133 System.out.print(' '); 134 } 135 System.out.print(unit); 136 break; 137 case listUnits: 138 System.out.println("\t" + unit.replace("/", "\t") 139 .replaceFirst("-", "\t") + "\t" + locale); 140 break; 141 } 142 seen.add(unit); 143 } 144 } 145 break; 146 } 147 case getDigits: { 148 getDigits(cldrFile); 149 break; 150 } 151 } 152 } 153 System.out.println(); 154 System.out.println("#Done: " + count + ", " + timer); 155 } 156 getDigits(CLDRFile cldrFile)157 static void getDigits(CLDRFile cldrFile) { 158 System.out.println(cldrFile.getLocaleID()); 159 String numberSystem = cldrFile.getWinningValue("//ldml/numbers/defaultNumberingSystem"); 160 Set<String> seen = new HashSet<>(); 161 seen.add(numberSystem); 162 Pair<UnicodeSet, UnicodeSet> main = getCharacters(cldrFile, numberSystem); 163 System.out.println("\tdefault: " + numberSystem + ", " + main.getFirst().toPattern(false) + ", " + main.getSecond().toPattern(false)); 164 for (Iterator<String> it = cldrFile.iterator("//ldml/numbers/otherNumberingSystems"); it.hasNext();) { 165 String path = it.next(); 166 String otherNumberingSystem = cldrFile.getWinningValue(path); 167 if (seen.contains(otherNumberingSystem)) { 168 continue; 169 } 170 seen.add(otherNumberingSystem); 171 main = getCharacters(cldrFile, otherNumberingSystem); 172 System.out.println("\tother: " + otherNumberingSystem 173 + ", " + main.getFirst().toPattern(false) + "\t" + main.getSecond().toPattern(false)); 174 } 175 } 176 getCharacters(CLDRFile cldrFileToCheck, String numberSystem)177 private static Pair<UnicodeSet, UnicodeSet> getCharacters(CLDRFile cldrFileToCheck, String numberSystem) { 178 String digitString = SUPP.getDigits(numberSystem); 179 UnicodeSet digits = digitString == null ? UnicodeSet.EMPTY : new UnicodeSet().addAll(digitString); 180 181 UnicodeSet punctuation = new UnicodeSet(); 182 Set<String> errors = new LinkedHashSet<>(); 183 add(cldrFileToCheck, "decimal", numberSystem, punctuation, errors); 184 //add(cldrFileToCheck, "exponential", numberSystem, punctuation, errors); 185 add(cldrFileToCheck, "group", numberSystem, punctuation, errors); 186 //add(cldrFileToCheck, "infinity", numberSystem, punctuation, errors); 187 add(cldrFileToCheck, "minusSign", numberSystem, punctuation, errors); 188 //add(cldrFileToCheck, "nan", numberSystem, punctuation, errors); 189 add(cldrFileToCheck, "list", numberSystem, punctuation, errors); 190 add(cldrFileToCheck, "percentSign", numberSystem, punctuation, errors); 191 add(cldrFileToCheck, "perMille", numberSystem, punctuation, errors); 192 add(cldrFileToCheck, "plusSign", numberSystem, punctuation, errors); 193 // symbols.setZeroDigit(getSymbolString(cldrFileToCheck, "nativeZeroDigit", numberSystem)); 194 if (!errors.isEmpty() && digitString != null) { 195 System.out.println("Missing: " + numberSystem + "\t" + errors); 196 } 197 punctuation.removeAll(BIDI_CONTROL); 198 return Pair.of(digits, punctuation); 199 } 200 add(CLDRFile cldrFileToCheck, String subtype, String numberSystem, UnicodeSet punctuation, Set<String> errors)201 private static void add(CLDRFile cldrFileToCheck, String subtype, String numberSystem, UnicodeSet punctuation, Set<String> errors) { 202 final String result = getSymbolString(cldrFileToCheck, subtype, numberSystem); 203 if (result == null) { 204 errors.add(subtype); 205 } else { 206 punctuation.addAll(result); 207 } 208 } 209 getSymbolString(CLDRFile cldrFile, String key, String numsys)210 private static String getSymbolString(CLDRFile cldrFile, String key, String numsys) { 211 return cldrFile.getWinningValue("//ldml/numbers/symbols[@numberSystem=\"" + numsys + "\"]/" + key); 212 } 213 214 static final class Data { Data(String path2, String stringValue)215 public Data(String path2, String stringValue) { 216 path = path2; 217 value = stringValue; 218 } 219 220 final String path; 221 final String value; 222 223 @Override toString()224 public String toString() { 225 return "locale=en" 226 + " ; action=add" 227 + " ; new_path=" + path 228 + " ; new_value=" + value; 229 } 230 } 231 getUnits(CLDRFile cldrFile, Task task, Map<String, Data> extra)232 private static Set<String> getUnits(CLDRFile cldrFile, Task task, Map<String, Data> extra) { 233 Set<String> seen = new TreeSet<>(); 234 for (String path : cldrFile) { 235 if (!path.contains("/unit")) { 236 continue; 237 } 238 XPathParts parts = XPathParts.getFrozenInstance(path); 239 String unit = parts.findAttributeValue("unit", "type"); 240 if (unit == null) { 241 continue; 242 } 243 String key = unit; 244 if (task == Task.listUnits) { 245 String length = parts.findAttributeValue("unitLength", "type"); 246 String per = "perUnitPattern".equals(parts.getElement(-1)) ? "per" : ""; 247 key = unit + "/" + length + "/" + per; 248 } 249 seen.add(key); 250 if (extra != null && !path.endsWith("/alias")) { 251 extra.put(key, new Data(path, cldrFile.getStringValue(path))); 252 } 253 } 254 return seen; 255 } 256 } 257