• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import java.util.HashMap;
4 import java.util.HashSet;
5 import java.util.Iterator;
6 import java.util.LinkedHashSet;
7 import java.util.Map;
8 import java.util.Map.Entry;
9 import java.util.Set;
10 import java.util.TreeMap;
11 import java.util.TreeSet;
12 import java.util.regex.Matcher;
13 
14 import org.unicode.cldr.util.CLDRConfig;
15 import org.unicode.cldr.util.CLDRFile;
16 import org.unicode.cldr.util.Factory;
17 import org.unicode.cldr.util.Pair;
18 import org.unicode.cldr.util.PatternCache;
19 import org.unicode.cldr.util.SupplementalDataInfo;
20 import org.unicode.cldr.util.Timer;
21 import org.unicode.cldr.util.XPathParts;
22 
23 import com.google.common.base.Splitter;
24 import com.ibm.icu.text.UnicodeSet;
25 
26 public class ListUnits {
27     private static final UnicodeSet BIDI_CONTROL = new UnicodeSet("[:bidi_control:]").freeze();
28     private static final CLDRConfig CONFIG = CLDRConfig.getInstance();
29     private static final SupplementalDataInfo SUPP = CONFIG.getSupplementalDataInfo();
30     private static final Task TASK = Task.listSimpleUnits;
31 
32     private enum Task {
33         listUnits, listSimpleUnits, showDecimals, getDigits,
34     }
35 
36     enum Type {
37         root, en, other;
fromString(String type)38         static Type fromString(String type) {
39             return type.equals("en") ? en : type.equals("root") ? root : other;
40         }
41     }
42 
main(String[] args)43     public static void main(String[] args) {
44         Factory cldrFactory = CONFIG.getCldrFactory();
45         Set<String> defaultContent = SUPP.getDefaultContentLocales();
46         Set<String> seen = new HashSet<>();
47 
48         LinkedHashSet<String> items = new LinkedHashSet<>();
49         items.add("root");
50         items.add("en");
51         items.addAll(cldrFactory.getAvailableLanguages());
52         Map<String, Data> rootMap = new HashMap<>();
53         Map<String, Data> enMap = new HashMap<>();
54 
55         Timer timer = new Timer();
56         int count = 0;
57         Splitter SEMI = Splitter.on(";").trimResults();
58         Matcher currencyMatcher = PatternCache.get("([^0#]*).*[0#]([^0#]*)").matcher("");
59 
60         for (String locale : items) {
61             Type type = Type.fromString(locale);
62             if (type == Type.root || type == Type.en || defaultContent.contains(locale)) {
63                 continue;
64             }
65             CLDRFile cldrFile = cldrFactory.make(locale, true);
66 //            DecimalFormat format = new DecimalFormat(currencyPattern);
67 //            String prefix = format.getPositivePrefix();
68 //            String suffix = format.getPositiveSuffix();
69 
70 //            ICUServiceBuilder builder = new ICUServiceBuilder().setCldrFile(cldrFile);
71 //            DecimalFormat format = builder.getCurrencyFormat("XXX");
72 //            String prefix = format.getPositivePrefix().replace("XXX", "\u00a4");
73 //            String suffix = format.getPositiveSuffix().replace("XXX", "\u00a4");
74             switch (TASK) {
75             case showDecimals: {
76                 String compactPathPrefix = "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]";
77                 String currencyPattern = cldrFile
78                     .getStringValue(
79                         "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength/currencyFormat[@type=\"standard\"]/pattern[@type=\"standard\"]");
80                 String firstPart = SEMI.split(currencyPattern).iterator().next();
81                 if (!currencyMatcher.reset(firstPart).matches()) {
82                     throw new IllegalArgumentException("bad matcher");
83                 }
84                 String prefix = currencyMatcher.group(1);
85                 String suffix = currencyMatcher.group(2);
86                 System.out.println("\n#" + locale + "\t«" + prefix + "»\t«" + suffix + "»\t«" + currencyPattern + "»");
87                 TreeMap<String, String> data = new TreeMap<>();
88                 for (String path : cldrFile.fullIterable()) {
89 //                    if (s.contains("decimalFormats")) {
90 //                        System.out.println(s);
91 //                    }
92                     if (path.startsWith(compactPathPrefix)) {
93                         String value = cldrFile.getStringValue(path);
94                         String mod = path.replace("decimal", "currency") + "[@draft=\"provisional\"]";
95                         //                        // locale=en ; action=add ; new_path=//ldml/localeDisplayNames/territories/territory[@type="PS"][@alt="short"] ; new_value=Palestine
96                         data.put(mod, "locale=" + locale
97                             + " ; action=add"
98                             + " ; new_value=" + prefix + value + suffix
99                             + " ; new_path=" + mod);
100                     }
101                 }
102                 for (Entry<String, String> line : data.entrySet()) {
103                     System.out.println(line.getValue());
104                 }
105                 data.clear();
106                 break;
107             }
108             case listUnits:
109             case listSimpleUnits: {
110                 Set<String> units = getUnits(cldrFile, TASK, type == Type.root ? rootMap : type == Type.en ? enMap : null);
111                 if (type == Type.en) {
112                     TreeSet<String> missing = new TreeSet<>(seen);
113                     missing.removeAll(units);
114                     for (String unit : missing) {
115                         // locale=en ; action=add ; new_path=//ldml/localeDisplayNames/territories/territory[@type="PS"][@alt="short"] ; new_value=Palestine
116                         Data data = rootMap.get(unit);
117                         if (data != null) {
118                             System.out.println(data);
119                         }
120                     }
121                 }
122                 Splitter HYPHEN = Splitter.on('-');
123                 String oldBase = "";
124                 for (String unit : units) {
125                     if (!seen.contains(unit)) {
126                         switch (TASK) {
127                         case listSimpleUnits:
128                             String base = HYPHEN.split(unit).iterator().next();
129                             if (!base.equals(oldBase)) {
130                                 oldBase = base;
131                                 System.out.println();
132                             } else {
133                                 System.out.print(' ');
134                             }
135                             System.out.print(unit);
136                             break;
137                         case listUnits:
138                             System.out.println("\t" + unit.replace("/", "\t")
139                                 .replaceFirst("-", "\t") + "\t" + locale);
140                             break;
141                         }
142                         seen.add(unit);
143                     }
144                 }
145                 break;
146             }
147             case getDigits: {
148                 getDigits(cldrFile);
149                 break;
150             }
151             }
152         }
153         System.out.println();
154         System.out.println("#Done: " + count + ", " + timer);
155     }
156 
getDigits(CLDRFile cldrFile)157     static void getDigits(CLDRFile cldrFile) {
158         System.out.println(cldrFile.getLocaleID());
159         String numberSystem = cldrFile.getWinningValue("//ldml/numbers/defaultNumberingSystem");
160         Set<String> seen = new HashSet<>();
161         seen.add(numberSystem);
162         Pair<UnicodeSet, UnicodeSet> main = getCharacters(cldrFile, numberSystem);
163         System.out.println("\tdefault: " + numberSystem + ", " + main.getFirst().toPattern(false) + ", " + main.getSecond().toPattern(false));
164         for (Iterator<String> it = cldrFile.iterator("//ldml/numbers/otherNumberingSystems"); it.hasNext();) {
165             String path = it.next();
166             String otherNumberingSystem = cldrFile.getWinningValue(path);
167             if (seen.contains(otherNumberingSystem)) {
168                 continue;
169             }
170             seen.add(otherNumberingSystem);
171             main = getCharacters(cldrFile, otherNumberingSystem);
172             System.out.println("\tother: " + otherNumberingSystem
173                 + ", " + main.getFirst().toPattern(false) + "\t" + main.getSecond().toPattern(false));
174         }
175     }
176 
getCharacters(CLDRFile cldrFileToCheck, String numberSystem)177     private static Pair<UnicodeSet, UnicodeSet> getCharacters(CLDRFile cldrFileToCheck, String numberSystem) {
178         String digitString = SUPP.getDigits(numberSystem);
179         UnicodeSet digits = digitString == null ? UnicodeSet.EMPTY : new UnicodeSet().addAll(digitString);
180 
181         UnicodeSet punctuation = new UnicodeSet();
182         Set<String> errors = new LinkedHashSet<>();
183         add(cldrFileToCheck, "decimal", numberSystem, punctuation, errors);
184         //add(cldrFileToCheck, "exponential", numberSystem, punctuation, errors);
185         add(cldrFileToCheck, "group", numberSystem, punctuation, errors);
186         //add(cldrFileToCheck, "infinity", numberSystem, punctuation, errors);
187         add(cldrFileToCheck, "minusSign", numberSystem, punctuation, errors);
188         //add(cldrFileToCheck, "nan", numberSystem, punctuation, errors);
189         add(cldrFileToCheck, "list", numberSystem, punctuation, errors);
190         add(cldrFileToCheck, "percentSign", numberSystem, punctuation, errors);
191         add(cldrFileToCheck, "perMille", numberSystem, punctuation, errors);
192         add(cldrFileToCheck, "plusSign", numberSystem, punctuation, errors);
193         // symbols.setZeroDigit(getSymbolString(cldrFileToCheck, "nativeZeroDigit", numberSystem));
194         if (!errors.isEmpty() && digitString != null) {
195             System.out.println("Missing: " + numberSystem + "\t" + errors);
196         }
197         punctuation.removeAll(BIDI_CONTROL);
198         return Pair.of(digits, punctuation);
199     }
200 
add(CLDRFile cldrFileToCheck, String subtype, String numberSystem, UnicodeSet punctuation, Set<String> errors)201     private static void add(CLDRFile cldrFileToCheck, String subtype, String numberSystem, UnicodeSet punctuation, Set<String> errors) {
202         final String result = getSymbolString(cldrFileToCheck, subtype, numberSystem);
203         if (result == null) {
204             errors.add(subtype);
205         } else {
206             punctuation.addAll(result);
207         }
208     }
209 
getSymbolString(CLDRFile cldrFile, String key, String numsys)210     private static String getSymbolString(CLDRFile cldrFile, String key, String numsys) {
211         return cldrFile.getWinningValue("//ldml/numbers/symbols[@numberSystem=\"" + numsys + "\"]/" + key);
212     }
213 
214     static final class Data {
Data(String path2, String stringValue)215         public Data(String path2, String stringValue) {
216             path = path2;
217             value = stringValue;
218         }
219 
220         final String path;
221         final String value;
222 
223         @Override
toString()224         public String toString() {
225             return "locale=en"
226                 + " ; action=add"
227                 + " ; new_path=" + path
228                 + " ; new_value=" + value;
229         }
230     }
231 
getUnits(CLDRFile cldrFile, Task task, Map<String, Data> extra)232     private static Set<String> getUnits(CLDRFile cldrFile, Task task, Map<String, Data> extra) {
233         Set<String> seen = new TreeSet<>();
234         for (String path : cldrFile) {
235             if (!path.contains("/unit")) {
236                 continue;
237             }
238             XPathParts parts = XPathParts.getFrozenInstance(path);
239             String unit = parts.findAttributeValue("unit", "type");
240             if (unit == null) {
241                 continue;
242             }
243             String key = unit;
244             if (task == Task.listUnits) {
245                 String length = parts.findAttributeValue("unitLength", "type");
246                 String per = "perUnitPattern".equals(parts.getElement(-1)) ? "per" : "";
247                 key = unit + "/" + length + "/" + per;
248             }
249             seen.add(key);
250             if (extra != null && !path.endsWith("/alias")) {
251                 extra.put(key, new Data(path, cldrFile.getStringValue(path)));
252             }
253         }
254         return seen;
255     }
256 }
257