• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.util;
2 
3 import com.ibm.icu.text.CompactDecimalFormat;
4 import com.ibm.icu.text.CompactDecimalFormat.CompactStyle;
5 import com.ibm.icu.text.NumberFormat;
6 import com.ibm.icu.text.UnicodeSet;
7 import com.ibm.icu.util.Currency;
8 import com.ibm.icu.util.ICUUncheckedIOException;
9 import com.ibm.icu.util.ULocale;
10 import java.io.File;
11 import java.io.IOException;
12 import java.io.PrintWriter;
13 import java.util.Arrays;
14 import java.util.HashSet;
15 import java.util.LinkedHashSet;
16 import java.util.Map;
17 import java.util.Set;
18 import java.util.TreeMap;
19 import java.util.TreeSet;
20 import java.util.regex.Pattern;
21 import org.unicode.cldr.draft.FileUtilities;
22 import org.unicode.cldr.test.BuildIcuCompactDecimalFormat;
23 import org.unicode.cldr.test.BuildIcuCompactDecimalFormat.CurrencyStyle;
24 import org.unicode.cldr.tool.ChartDelta;
25 import org.unicode.cldr.tool.FormattedFileWriter;
26 import org.unicode.cldr.tool.Option;
27 import org.unicode.cldr.tool.Option.Options;
28 import org.unicode.cldr.tool.ShowData;
29 import org.unicode.cldr.tool.ShowPlurals;
30 import org.unicode.cldr.tool.TablePrinter;
31 import org.unicode.cldr.util.CLDRFile.DraftStatus;
32 import org.unicode.cldr.util.PathHeader.PageId;
33 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
34 
35 public class VerifyCompactNumbers {
36 
37     private static final CLDRConfig CLDR_CONFIG = CLDRConfig.getInstance();
38     private static final String DIR = CLDRPaths.VERIFY_DIR + "numbers/";
39     // The following is also in ExampleGenerator and DateTimeFormats; it and other shared constant
40     // sets should
41     // probably be moved to a common file of such things.
42     private static final UnicodeSet BIDI_MARKS = new UnicodeSet("[:Bidi_Control:]").freeze();
43     private static final String exampleSep = "<br>";
44     private static final String rtlStart = "<div dir='rtl'>";
45     private static final String rtlEnd = "</div>";
46 
47     static final Options myOptions = new Options();
48 
49     enum MyOptions {
50         organization(".*", "CLDR", "organization"),
51         filter(".*", ".*", "locale filter (regex)"),
52         currency(".*", "EUR", "show currency"),
53         ;
54         // boilerplate
55         final Option option;
56 
MyOptions(String argumentPattern, String defaultArgument, String helpText)57         MyOptions(String argumentPattern, String defaultArgument, String helpText) {
58             option = myOptions.add(this, argumentPattern, defaultArgument, helpText);
59         }
60     }
61 
62     // later, look at DateTimeFormats to set up as an HTML table
63 
64     public static final Set<String> USES_GROUPS_OF_4 =
65             new HashSet<>(Arrays.asList("ko", "ja", "zh", "zh_Hant"));
66 
67     /**
68      * Produce a set of static tables from the vxml data. Only a stopgap until the above is
69      * integrated into ST.
70      *
71      * @param args
72      * @throws IOException
73      */
main(String[] args)74     public static void main(String[] args) throws IOException {
75         myOptions.parse(MyOptions.organization, args, true);
76         new File(DIR).mkdirs();
77         FileCopier.copy(ShowData.class, "verify-index.html", CLDRPaths.VERIFY_DIR, "index.html");
78         FileCopier.copy(ChartDelta.class, "index.css", CLDRPaths.VERIFY_DIR, "index.css");
79         FormattedFileWriter.copyIncludeHtmls(CLDRPaths.VERIFY_DIR);
80 
81         String organization = MyOptions.organization.option.getValue();
82         String filter = MyOptions.filter.option.getValue();
83         boolean showCurrency = true; // MyOptions.currency.option.doesOccur();
84         String currencyCode = MyOptions.currency.option.getValue();
85 
86         Factory factory2 = Factory.make(CLDRPaths.MAIN_DIRECTORY, filter);
87         CLDRFile englishCldrFile = factory2.make("en", true);
88 
89         SupplementalDataInfo sdi = CLDR_CONFIG.getSupplementalDataInfo();
90         Set<String> defaultContentLocales = sdi.getDefaultContentLocales();
91         NumberFormat enf = NumberFormat.getIntegerInstance(ULocale.ENGLISH);
92         enf.setGroupingUsed(false);
93 
94         Set<String> availableLanguages = new TreeSet<>(factory2.getAvailableLanguages());
95         if (Pattern.matches(filter, "pt_PT")) {
96             availableLanguages.add("pt_PT");
97         }
98 
99         PrintWriter plainText = FileUtilities.openUTF8Writer(DIR, "compactTestFile.txt");
100         DateTimeFormats.writeCss(DIR);
101         final CLDRFile english = CLDR_CONFIG.getEnglish();
102 
103         Map<String, String> indexMap = new TreeMap<>(CLDR_CONFIG.getCollator());
104 
105         for (String locale : availableLanguages) {
106             if (defaultContentLocales.contains(locale)) {
107                 continue;
108             }
109             Level level = StandardCodes.make().getLocaleCoverageLevel(organization, locale);
110             if (Level.MODERN.compareTo(level) > 0) {
111                 continue;
112             }
113             // TODO: fix to ignore locales with no data.
114             if (locale.equals("ne") || locale.equals("cy")) {
115                 continue;
116             }
117 
118             PrintWriter out = FileUtilities.openUTF8Writer(DIR, locale + ".html");
119             String title = "Verify Number Formats: " + englishCldrFile.getName(locale);
120             out.println(
121                     "<!doctype HTML PUBLIC '-//W3C//DTD HTML 4.0 Transitional//EN'><html><head>\n"
122                             + "<meta http-equiv='Content-Type' content='text/html; charset=utf-8'>\n"
123                             + "<title>"
124                             + title
125                             + "</title>\n"
126                             + "<link rel='stylesheet' type='text/css' href='index.css'>\n"
127                             + "</head><body><h1>"
128                             + title
129                             + "</h1>\n"
130                             + "<p><a href='index.html'>Index</a></p>\n");
131 
132             CLDRFile cldrFile = factory2.make(locale, true, DraftStatus.contributed);
133 
134             showNumbers(cldrFile, showCurrency, currencyCode, out, factory2);
135 
136             out.println("</body></html>");
137             out.close();
138             indexMap.put(english.getName(locale), locale + ".html");
139         }
140         try (PrintWriter index = DateTimeFormats.openIndex(DIR, "Numbers")) {
141             DateTimeFormats.writeIndexMap(indexMap, index);
142         }
143 
144         plainText.close();
145     }
146 
showNumbers( CLDRFile cldrFile, boolean showCurrency, String currencyCode, Appendable out, Factory factory)147     public static void showNumbers(
148             CLDRFile cldrFile,
149             boolean showCurrency,
150             String currencyCode,
151             Appendable out,
152             Factory factory) {
153         try {
154             Set<String> debugCreationErrors = new LinkedHashSet<>();
155             Set<String> errors = new LinkedHashSet<>();
156             String locale = cldrFile.getLocaleID();
157             String characterOrder =
158                     cldrFile.getStringValue("//ldml/layout/orientation/characterOrder");
159             boolean isRTL = (characterOrder != null && characterOrder.equals("right-to-left"));
160 
161             TablePrinter tablePrinter1 =
162                     new TablePrinter()
163                             // .setCaption("Timezone Formats")
164                             .setTableAttributes("class='dtf-table'")
165                             .addColumn(
166                                     "Numeric Format<br>(neutral context,<br>then RTL if relevant)")
167                             .setHeaderCell(true)
168                             .setHeaderAttributes("class='dtf-th'")
169                             .setCellAttributes("class='dtf-s'")
170                             .addColumn("Compact-Short")
171                             .setHeaderAttributes("class='dtf-th'")
172                             .setCellAttributes("class='dtf-s'")
173                             .addColumn("Compact-Long")
174                             .setHeaderAttributes("class='dtf-th'")
175                             .setCellAttributes("class='dtf-s'");
176             if (showCurrency) {
177                 tablePrinter1
178                         .addColumn("Compact-Short<br>+Currency")
179                         .setHeaderAttributes("class='dtf-th'")
180                         .setCellAttributes("class='dtf-s'")
181                 //                    .addColumn("Compact-Short<br>+Unit")
182                 //                    .setHeaderAttributes("class='dtf-th'")
183                 //                    .setCellAttributes("class='dtf-s'")
184                 // .addColumn("Compact-Long<br>+Currency")
185                 // .addColumn("Compact-Long<br>+Currency-Long")
186                 //                    .addColumn("Numeric
187                 // Format").setHeaderCell(true).setHeaderAttributes("class='dtf-th'")
188                 //                      .setCellAttributes("class='dtf-s'")
189                 ;
190             }
191             //
192             // tablePrinter1.addColumn("View").setHeaderCell(true).setHeaderAttributes("class='dtf-th'").setCellAttributes("class='dtf-s'");
193 
194             ULocale locale2 = new ULocale(locale);
195             ICUServiceBuilder builder = new ICUServiceBuilder().setCldrFile(cldrFile);
196             NumberFormat nf = builder.getNumberFormat(1);
197 
198             // nf.setMaximumFractionDigits(0);
199             SupplementalDataInfo sdi = CLDR_CONFIG.getSupplementalDataInfo();
200             String[] debugOriginals = null;
201             CompactDecimalFormat cdf =
202                     BuildIcuCompactDecimalFormat.build(
203                             cldrFile,
204                             debugCreationErrors,
205                             debugOriginals,
206                             CompactStyle.SHORT,
207                             locale2,
208                             CurrencyStyle.PLAIN,
209                             currencyCode);
210             captureErrors(debugCreationErrors, errors, locale, "short");
211             CompactDecimalFormat cdfs =
212                     BuildIcuCompactDecimalFormat.build(
213                             cldrFile,
214                             debugCreationErrors,
215                             debugOriginals,
216                             CompactStyle.LONG,
217                             locale2,
218                             CurrencyStyle.PLAIN,
219                             currencyCode);
220             captureErrors(debugCreationErrors, errors, locale, "long");
221 
222             CompactDecimalFormat cdfCurr =
223                     BuildIcuCompactDecimalFormat.build(
224                             cldrFile,
225                             debugCreationErrors,
226                             debugOriginals,
227                             CompactStyle.SHORT,
228                             locale2,
229                             CurrencyStyle.CURRENCY,
230                             currencyCode);
231             captureErrors(debugCreationErrors, errors, locale, "short-curr");
232             //            CompactDecimalFormat cdfU = BuildIcuCompactDecimalFormat.build(cldrFile,
233             // debugCreationErrors,
234             //                debugOriginals, CompactStyle.SHORT, locale2, CurrencyStyle.UNIT,
235             // "EUR");
236             //            captureErrors(debugCreationErrors, errors, locale, "short-kg");
237             //             CompactDecimalFormat cdfsCurr =
238             // BuildIcuCompactDecimalFormat.build(cldrFile, debugCreationErrors,
239             //             debugOriginals, CompactStyle.SHORT, locale2, CurrencyStyle.CURRENCY,
240             // currencyCode);
241             //             CompactDecimalFormat cdfsCurrISO =
242             // BuildIcuCompactDecimalFormat.build(cldrFile, debugCreationErrors,
243             //             debugOriginals, CompactStyle.LONG, locale2, CurrencyStyle.ISO_CURRENCY,
244             // "EUR");
245 
246             Set<Double> allSamples =
247                     collectSamplesAndSetFormats(currencyCode, locale, sdi, cdf, cdfs, cdfCurr);
248 
249             try {
250                 for (double source : allSamples) {
251                     if (false && source == 22000000 && locale.equals("cs")) {
252                         System.out.println("**");
253                     }
254 
255                     String formattedNumber = nf.format(source);
256                     if (isRTL || BIDI_MARKS.containsSome(formattedNumber)) {
257                         formattedNumber += exampleSep + rtlStart + formattedNumber + rtlEnd;
258                     }
259                     String compactFormattedNumber = cdf == null ? "n/a" : cdf.format(source);
260                     String compactLongFormattedNumber = cdfs == null ? "n/a" : cdfs.format(source);
261                     String compactCurrFormattedNumber =
262                             !showCurrency || cdfs == null ? "n/a" : cdfCurr.format(source);
263                     // plainText.println(locale
264                     // + "\t__" + source
265                     // + "\t__" + compactFormattedNumber
266                     // + "\t__" + compactLongFormattedNumber
267                     // );
268                     tablePrinter1
269                             .addRow()
270                             .addCell(formattedNumber)
271                             .addCell(compactFormattedNumber)
272                             .addCell(compactLongFormattedNumber);
273                     if (showCurrency) {
274                         tablePrinter1.addCell(compactCurrFormattedNumber)
275                         //                            .addCell(cdfU.format(source))
276                         //                             .addCell(cdfsCurr.format(source))
277                         // .addCell(cdfsCurrLong.format(source))
278                         // .addCell(cdfsCurrLong.format(source))
279                         // .addCell(formattedNumber)
280                         ;
281                     }
282                     //                    String view = PathHeader.getLinkedView(surveyUrl,
283                     // cldrFile, METAZONE_PREFIX + metazone + METAZONE_SUFFIX);
284                     //                    tablePrinter1.addCell(view == null
285                     //                            ? ""
286                     //                                    : view);
287                     tablePrinter1.finishRow();
288                 }
289             } catch (Exception e) {
290                 e.printStackTrace();
291             }
292             out.append(
293                     "<p>To correct problems in compact numbers below, please go to "
294                             + PathHeader.SECTION_LINK
295                             + CLDR_CONFIG
296                                     .urls()
297                                     .forPage(
298                                             cldrFile.getLocaleID(),
299                                             PageId.Compact_Decimal_Formatting)
300                             + "'><em>"
301                             + PageId.Compact_Decimal_Formatting
302                             + "</em></a>.</p>");
303             out.append(tablePrinter1.toString() + "\n");
304             out.append("<h3>Plural Rules</h3>");
305             out.append(
306                     "<p>Look over the Minimal Pairs to make sure they are ok. "
307                             + "Then review the examples in the cell to the left. "
308                             + "All of those you should be able to substitute for the numbers in the Minimal Pairs, "
309                             + "with an acceptable result. "
310                             + "If any would be incorrect, please "
311                             + "<a target='ticket' href='"
312                             + CLDRURLS.CLDR_NEWTICKET_URL
313                             + "'>file a ticket</a>.</p>"
314                             + "<p>For more details, see "
315                             + "<a target='CLDR-ST-DOCS' href='http://cldr.unicode.org/index/cldr-spec/plural-rules'>Plural Rules</a>.</p>");
316             ShowPlurals showPlurals = new ShowPlurals(CLDR_CONFIG.getSupplementalDataInfo());
317             showPlurals.printPluralTable(cldrFile, locale, out, factory);
318             ShowPlurals.appendBlanksForScrolling(out);
319             showErrors(errors, out);
320             showErrors(debugCreationErrors, out);
321         } catch (IOException e) {
322             throw new ICUUncheckedIOException(e);
323         }
324     }
325 
collectSamplesAndSetFormats( String currencyCode, String locale, SupplementalDataInfo sdi, CompactDecimalFormat cdf, CompactDecimalFormat cdfs, CompactDecimalFormat cdfCurr)326     public static Set<Double> collectSamplesAndSetFormats(
327             String currencyCode,
328             String locale,
329             SupplementalDataInfo sdi,
330             CompactDecimalFormat cdf,
331             CompactDecimalFormat cdfs,
332             CompactDecimalFormat cdfCurr) {
333         // Collect samples for display
334         // one path for group-3, one for group-4
335         // TODO, fix for indic.
336         int factor = USES_GROUPS_OF_4.contains(locale) ? 10000 : 1000;
337 
338         // we want to collect a sample of at least one sample for each plural category for each
339         // power of ten
340         PluralInfo pluralInfo = sdi.getPlurals(locale);
341         Set<Double> samples = new TreeSet<>();
342         samples.add(1.1d);
343         samples.add(1.5d);
344         samples.add(1100d);
345         collectItems(pluralInfo, 1, 10, samples);
346         collectItems(pluralInfo, 10, 100, samples);
347         collectItems(pluralInfo, 100, 1000, samples);
348         int sigDigits = 3;
349         if (factor > 1000) {
350             collectItems(pluralInfo, 1000, 10000, samples);
351             sigDigits = 4;
352         }
353         if (cdf != null) {
354             cdf.setMaximumSignificantDigits(sigDigits);
355         }
356         if (cdfs != null) {
357             cdfs.setMaximumSignificantDigits(sigDigits);
358         }
359         if (cdfCurr != null) {
360             cdfCurr.setCurrency(Currency.getInstance(currencyCode));
361             cdfCurr.setMaximumSignificantDigits(sigDigits);
362         }
363         //            cdfU.setMaximumSignificantDigits(sigDigits);
364 
365         // for (Entry<Count, List<Double>> entry : pluralInfo.getCountToExamplesMap().entrySet()) {
366         // samples.add(entry.getValue().get(0));
367         // }
368         //
369         // Set<Double> samples2 = new TreeSet<Double>();
370         // for (int i = 10; i < factor; i *= 10) {
371         // for (Double sample : samples) {
372         // samples2.add(sample*i);
373         // }
374         // }
375         // samples.addAll(samples2);
376 
377         Set<Double> allSamples = new TreeSet<>();
378         // First add selected negative values and 0
379         allSamples.add(-123456.7d); // decimal sep, and grouping sep if used
380         allSamples.add(-123456d); // no decimal sep, grouping sep if used
381         allSamples.add(-12.3d); // decimal sep, no grouping sep
382         allSamples.add(-12d); // no decimal or grouping sep
383         allSamples.add(0d); // no decimal or grouping sep
384         // Then the larger set of positive values
385         for (long i = 1; i <= 100000000000000L; i *= factor) {
386             for (Double sample : samples) {
387                 double source = i * sample;
388                 allSamples.add(source);
389             }
390         }
391         return allSamples;
392     }
393 
394     private static String surveyUrl =
395             CLDR_CONFIG.getProperty("CLDR_SURVEY_URL", "http://st.unicode.org/cldr-apps/survey");
396 
showErrors(Set<String> errors, Appendable out)397     private static void showErrors(Set<String> errors, Appendable out) throws IOException {
398         if (errors.size() != 0) {
399             out.append("<h2>" + "Errors" + "</h2>\n");
400             for (String s : errors) {
401                 out.append("<p>" + s + "</p>\n");
402             }
403             errors.clear();
404         }
405     }
406 
collectItems( PluralInfo pluralInfo, double start, double limit, Set<Double> samples)407     private static Set<Double> collectItems(
408             PluralInfo pluralInfo, double start, double limit, Set<Double> samples) {
409         // TODO optimize once we have all the keywords
410         Map<String, Double> ones = new TreeMap<>();
411         for (double i = start; i < limit; ++i) {
412             String cat = pluralInfo.getPluralRules().select(i);
413             if (ones.containsKey(cat)) {
414                 continue;
415             }
416             ones.put(cat, i);
417         }
418         samples.addAll(ones.values());
419         return samples;
420     }
421 
captureErrors( Set<String> debugCreationErrors, Set<String> errors, String locale, String length)422     private static void captureErrors(
423             Set<String> debugCreationErrors, Set<String> errors, String locale, String length) {
424         if (debugCreationErrors.size() != 0) {
425             for (String s : debugCreationErrors) {
426                 errors.add(locale + "\t" + length + "\t" + s);
427             }
428             debugCreationErrors.clear();
429         }
430     }
431 }
432