1 package org.unicode.cldr.util; 2 3 import com.ibm.icu.text.CompactDecimalFormat; 4 import com.ibm.icu.text.CompactDecimalFormat.CompactStyle; 5 import com.ibm.icu.text.NumberFormat; 6 import com.ibm.icu.text.UnicodeSet; 7 import com.ibm.icu.util.Currency; 8 import com.ibm.icu.util.ICUUncheckedIOException; 9 import com.ibm.icu.util.ULocale; 10 import java.io.File; 11 import java.io.IOException; 12 import java.io.PrintWriter; 13 import java.util.Arrays; 14 import java.util.HashSet; 15 import java.util.LinkedHashSet; 16 import java.util.Map; 17 import java.util.Set; 18 import java.util.TreeMap; 19 import java.util.TreeSet; 20 import java.util.regex.Pattern; 21 import org.unicode.cldr.draft.FileUtilities; 22 import org.unicode.cldr.test.BuildIcuCompactDecimalFormat; 23 import org.unicode.cldr.test.BuildIcuCompactDecimalFormat.CurrencyStyle; 24 import org.unicode.cldr.tool.ChartDelta; 25 import org.unicode.cldr.tool.FormattedFileWriter; 26 import org.unicode.cldr.tool.Option; 27 import org.unicode.cldr.tool.Option.Options; 28 import org.unicode.cldr.tool.ShowData; 29 import org.unicode.cldr.tool.ShowPlurals; 30 import org.unicode.cldr.tool.TablePrinter; 31 import org.unicode.cldr.util.CLDRFile.DraftStatus; 32 import org.unicode.cldr.util.PathHeader.PageId; 33 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 34 35 public class VerifyCompactNumbers { 36 37 private static final CLDRConfig CLDR_CONFIG = CLDRConfig.getInstance(); 38 private static final String DIR = CLDRPaths.VERIFY_DIR + "numbers/"; 39 // The following is also in ExampleGenerator and DateTimeFormats; it and other shared constant 40 // sets should 41 // probably be moved to a common file of such things. 42 private static final UnicodeSet BIDI_MARKS = new UnicodeSet("[:Bidi_Control:]").freeze(); 43 private static final String exampleSep = "<br>"; 44 private static final String rtlStart = "<div dir='rtl'>"; 45 private static final String rtlEnd = "</div>"; 46 47 static final Options myOptions = new Options(); 48 49 enum MyOptions { 50 organization(".*", "CLDR", "organization"), 51 filter(".*", ".*", "locale filter (regex)"), 52 currency(".*", "EUR", "show currency"), 53 ; 54 // boilerplate 55 final Option option; 56 MyOptions(String argumentPattern, String defaultArgument, String helpText)57 MyOptions(String argumentPattern, String defaultArgument, String helpText) { 58 option = myOptions.add(this, argumentPattern, defaultArgument, helpText); 59 } 60 } 61 62 // later, look at DateTimeFormats to set up as an HTML table 63 64 public static final Set<String> USES_GROUPS_OF_4 = 65 new HashSet<>(Arrays.asList("ko", "ja", "zh", "zh_Hant")); 66 67 /** 68 * Produce a set of static tables from the vxml data. Only a stopgap until the above is 69 * integrated into ST. 70 * 71 * @param args 72 * @throws IOException 73 */ main(String[] args)74 public static void main(String[] args) throws IOException { 75 myOptions.parse(MyOptions.organization, args, true); 76 new File(DIR).mkdirs(); 77 FileCopier.copy(ShowData.class, "verify-index.html", CLDRPaths.VERIFY_DIR, "index.html"); 78 FileCopier.copy(ChartDelta.class, "index.css", CLDRPaths.VERIFY_DIR, "index.css"); 79 FormattedFileWriter.copyIncludeHtmls(CLDRPaths.VERIFY_DIR); 80 81 String organization = MyOptions.organization.option.getValue(); 82 String filter = MyOptions.filter.option.getValue(); 83 boolean showCurrency = true; // MyOptions.currency.option.doesOccur(); 84 String currencyCode = MyOptions.currency.option.getValue(); 85 86 Factory factory2 = Factory.make(CLDRPaths.MAIN_DIRECTORY, filter); 87 CLDRFile englishCldrFile = factory2.make("en", true); 88 89 SupplementalDataInfo sdi = CLDR_CONFIG.getSupplementalDataInfo(); 90 Set<String> defaultContentLocales = sdi.getDefaultContentLocales(); 91 NumberFormat enf = NumberFormat.getIntegerInstance(ULocale.ENGLISH); 92 enf.setGroupingUsed(false); 93 94 Set<String> availableLanguages = new TreeSet<>(factory2.getAvailableLanguages()); 95 if (Pattern.matches(filter, "pt_PT")) { 96 availableLanguages.add("pt_PT"); 97 } 98 99 PrintWriter plainText = FileUtilities.openUTF8Writer(DIR, "compactTestFile.txt"); 100 DateTimeFormats.writeCss(DIR); 101 final CLDRFile english = CLDR_CONFIG.getEnglish(); 102 103 Map<String, String> indexMap = new TreeMap<>(CLDR_CONFIG.getCollator()); 104 105 for (String locale : availableLanguages) { 106 if (defaultContentLocales.contains(locale)) { 107 continue; 108 } 109 Level level = StandardCodes.make().getLocaleCoverageLevel(organization, locale); 110 if (Level.MODERN.compareTo(level) > 0) { 111 continue; 112 } 113 // TODO: fix to ignore locales with no data. 114 if (locale.equals("ne") || locale.equals("cy")) { 115 continue; 116 } 117 118 PrintWriter out = FileUtilities.openUTF8Writer(DIR, locale + ".html"); 119 String title = "Verify Number Formats: " + englishCldrFile.getName(locale); 120 out.println( 121 "<!doctype HTML PUBLIC '-//W3C//DTD HTML 4.0 Transitional//EN'><html><head>\n" 122 + "<meta http-equiv='Content-Type' content='text/html; charset=utf-8'>\n" 123 + "<title>" 124 + title 125 + "</title>\n" 126 + "<link rel='stylesheet' type='text/css' href='index.css'>\n" 127 + "</head><body><h1>" 128 + title 129 + "</h1>\n" 130 + "<p><a href='index.html'>Index</a></p>\n"); 131 132 CLDRFile cldrFile = factory2.make(locale, true, DraftStatus.contributed); 133 134 showNumbers(cldrFile, showCurrency, currencyCode, out, factory2); 135 136 out.println("</body></html>"); 137 out.close(); 138 indexMap.put(english.getName(locale), locale + ".html"); 139 } 140 try (PrintWriter index = DateTimeFormats.openIndex(DIR, "Numbers")) { 141 DateTimeFormats.writeIndexMap(indexMap, index); 142 } 143 144 plainText.close(); 145 } 146 showNumbers( CLDRFile cldrFile, boolean showCurrency, String currencyCode, Appendable out, Factory factory)147 public static void showNumbers( 148 CLDRFile cldrFile, 149 boolean showCurrency, 150 String currencyCode, 151 Appendable out, 152 Factory factory) { 153 try { 154 Set<String> debugCreationErrors = new LinkedHashSet<>(); 155 Set<String> errors = new LinkedHashSet<>(); 156 String locale = cldrFile.getLocaleID(); 157 String characterOrder = 158 cldrFile.getStringValue("//ldml/layout/orientation/characterOrder"); 159 boolean isRTL = (characterOrder != null && characterOrder.equals("right-to-left")); 160 161 TablePrinter tablePrinter1 = 162 new TablePrinter() 163 // .setCaption("Timezone Formats") 164 .setTableAttributes("class='dtf-table'") 165 .addColumn( 166 "Numeric Format<br>(neutral context,<br>then RTL if relevant)") 167 .setHeaderCell(true) 168 .setHeaderAttributes("class='dtf-th'") 169 .setCellAttributes("class='dtf-s'") 170 .addColumn("Compact-Short") 171 .setHeaderAttributes("class='dtf-th'") 172 .setCellAttributes("class='dtf-s'") 173 .addColumn("Compact-Long") 174 .setHeaderAttributes("class='dtf-th'") 175 .setCellAttributes("class='dtf-s'"); 176 if (showCurrency) { 177 tablePrinter1 178 .addColumn("Compact-Short<br>+Currency") 179 .setHeaderAttributes("class='dtf-th'") 180 .setCellAttributes("class='dtf-s'") 181 // .addColumn("Compact-Short<br>+Unit") 182 // .setHeaderAttributes("class='dtf-th'") 183 // .setCellAttributes("class='dtf-s'") 184 // .addColumn("Compact-Long<br>+Currency") 185 // .addColumn("Compact-Long<br>+Currency-Long") 186 // .addColumn("Numeric 187 // Format").setHeaderCell(true).setHeaderAttributes("class='dtf-th'") 188 // .setCellAttributes("class='dtf-s'") 189 ; 190 } 191 // 192 // tablePrinter1.addColumn("View").setHeaderCell(true).setHeaderAttributes("class='dtf-th'").setCellAttributes("class='dtf-s'"); 193 194 ULocale locale2 = new ULocale(locale); 195 ICUServiceBuilder builder = new ICUServiceBuilder().setCldrFile(cldrFile); 196 NumberFormat nf = builder.getNumberFormat(1); 197 198 // nf.setMaximumFractionDigits(0); 199 SupplementalDataInfo sdi = CLDR_CONFIG.getSupplementalDataInfo(); 200 String[] debugOriginals = null; 201 CompactDecimalFormat cdf = 202 BuildIcuCompactDecimalFormat.build( 203 cldrFile, 204 debugCreationErrors, 205 debugOriginals, 206 CompactStyle.SHORT, 207 locale2, 208 CurrencyStyle.PLAIN, 209 currencyCode); 210 captureErrors(debugCreationErrors, errors, locale, "short"); 211 CompactDecimalFormat cdfs = 212 BuildIcuCompactDecimalFormat.build( 213 cldrFile, 214 debugCreationErrors, 215 debugOriginals, 216 CompactStyle.LONG, 217 locale2, 218 CurrencyStyle.PLAIN, 219 currencyCode); 220 captureErrors(debugCreationErrors, errors, locale, "long"); 221 222 CompactDecimalFormat cdfCurr = 223 BuildIcuCompactDecimalFormat.build( 224 cldrFile, 225 debugCreationErrors, 226 debugOriginals, 227 CompactStyle.SHORT, 228 locale2, 229 CurrencyStyle.CURRENCY, 230 currencyCode); 231 captureErrors(debugCreationErrors, errors, locale, "short-curr"); 232 // CompactDecimalFormat cdfU = BuildIcuCompactDecimalFormat.build(cldrFile, 233 // debugCreationErrors, 234 // debugOriginals, CompactStyle.SHORT, locale2, CurrencyStyle.UNIT, 235 // "EUR"); 236 // captureErrors(debugCreationErrors, errors, locale, "short-kg"); 237 // CompactDecimalFormat cdfsCurr = 238 // BuildIcuCompactDecimalFormat.build(cldrFile, debugCreationErrors, 239 // debugOriginals, CompactStyle.SHORT, locale2, CurrencyStyle.CURRENCY, 240 // currencyCode); 241 // CompactDecimalFormat cdfsCurrISO = 242 // BuildIcuCompactDecimalFormat.build(cldrFile, debugCreationErrors, 243 // debugOriginals, CompactStyle.LONG, locale2, CurrencyStyle.ISO_CURRENCY, 244 // "EUR"); 245 246 Set<Double> allSamples = 247 collectSamplesAndSetFormats(currencyCode, locale, sdi, cdf, cdfs, cdfCurr); 248 249 try { 250 for (double source : allSamples) { 251 if (false && source == 22000000 && locale.equals("cs")) { 252 System.out.println("**"); 253 } 254 255 String formattedNumber = nf.format(source); 256 if (isRTL || BIDI_MARKS.containsSome(formattedNumber)) { 257 formattedNumber += exampleSep + rtlStart + formattedNumber + rtlEnd; 258 } 259 String compactFormattedNumber = cdf == null ? "n/a" : cdf.format(source); 260 String compactLongFormattedNumber = cdfs == null ? "n/a" : cdfs.format(source); 261 String compactCurrFormattedNumber = 262 !showCurrency || cdfs == null ? "n/a" : cdfCurr.format(source); 263 // plainText.println(locale 264 // + "\t__" + source 265 // + "\t__" + compactFormattedNumber 266 // + "\t__" + compactLongFormattedNumber 267 // ); 268 tablePrinter1 269 .addRow() 270 .addCell(formattedNumber) 271 .addCell(compactFormattedNumber) 272 .addCell(compactLongFormattedNumber); 273 if (showCurrency) { 274 tablePrinter1.addCell(compactCurrFormattedNumber) 275 // .addCell(cdfU.format(source)) 276 // .addCell(cdfsCurr.format(source)) 277 // .addCell(cdfsCurrLong.format(source)) 278 // .addCell(cdfsCurrLong.format(source)) 279 // .addCell(formattedNumber) 280 ; 281 } 282 // String view = PathHeader.getLinkedView(surveyUrl, 283 // cldrFile, METAZONE_PREFIX + metazone + METAZONE_SUFFIX); 284 // tablePrinter1.addCell(view == null 285 // ? "" 286 // : view); 287 tablePrinter1.finishRow(); 288 } 289 } catch (Exception e) { 290 e.printStackTrace(); 291 } 292 out.append( 293 "<p>To correct problems in compact numbers below, please go to " 294 + PathHeader.SECTION_LINK 295 + CLDR_CONFIG 296 .urls() 297 .forPage( 298 cldrFile.getLocaleID(), 299 PageId.Compact_Decimal_Formatting) 300 + "'><em>" 301 + PageId.Compact_Decimal_Formatting 302 + "</em></a>.</p>"); 303 out.append(tablePrinter1.toString() + "\n"); 304 out.append("<h3>Plural Rules</h3>"); 305 out.append( 306 "<p>Look over the Minimal Pairs to make sure they are ok. " 307 + "Then review the examples in the cell to the left. " 308 + "All of those you should be able to substitute for the numbers in the Minimal Pairs, " 309 + "with an acceptable result. " 310 + "If any would be incorrect, please " 311 + "<a target='ticket' href='" 312 + CLDRURLS.CLDR_NEWTICKET_URL 313 + "'>file a ticket</a>.</p>" 314 + "<p>For more details, see " 315 + "<a target='CLDR-ST-DOCS' href='http://cldr.unicode.org/index/cldr-spec/plural-rules'>Plural Rules</a>.</p>"); 316 ShowPlurals showPlurals = new ShowPlurals(CLDR_CONFIG.getSupplementalDataInfo()); 317 showPlurals.printPluralTable(cldrFile, locale, out, factory); 318 ShowPlurals.appendBlanksForScrolling(out); 319 showErrors(errors, out); 320 showErrors(debugCreationErrors, out); 321 } catch (IOException e) { 322 throw new ICUUncheckedIOException(e); 323 } 324 } 325 collectSamplesAndSetFormats( String currencyCode, String locale, SupplementalDataInfo sdi, CompactDecimalFormat cdf, CompactDecimalFormat cdfs, CompactDecimalFormat cdfCurr)326 public static Set<Double> collectSamplesAndSetFormats( 327 String currencyCode, 328 String locale, 329 SupplementalDataInfo sdi, 330 CompactDecimalFormat cdf, 331 CompactDecimalFormat cdfs, 332 CompactDecimalFormat cdfCurr) { 333 // Collect samples for display 334 // one path for group-3, one for group-4 335 // TODO, fix for indic. 336 int factor = USES_GROUPS_OF_4.contains(locale) ? 10000 : 1000; 337 338 // we want to collect a sample of at least one sample for each plural category for each 339 // power of ten 340 PluralInfo pluralInfo = sdi.getPlurals(locale); 341 Set<Double> samples = new TreeSet<>(); 342 samples.add(1.1d); 343 samples.add(1.5d); 344 samples.add(1100d); 345 collectItems(pluralInfo, 1, 10, samples); 346 collectItems(pluralInfo, 10, 100, samples); 347 collectItems(pluralInfo, 100, 1000, samples); 348 int sigDigits = 3; 349 if (factor > 1000) { 350 collectItems(pluralInfo, 1000, 10000, samples); 351 sigDigits = 4; 352 } 353 if (cdf != null) { 354 cdf.setMaximumSignificantDigits(sigDigits); 355 } 356 if (cdfs != null) { 357 cdfs.setMaximumSignificantDigits(sigDigits); 358 } 359 if (cdfCurr != null) { 360 cdfCurr.setCurrency(Currency.getInstance(currencyCode)); 361 cdfCurr.setMaximumSignificantDigits(sigDigits); 362 } 363 // cdfU.setMaximumSignificantDigits(sigDigits); 364 365 // for (Entry<Count, List<Double>> entry : pluralInfo.getCountToExamplesMap().entrySet()) { 366 // samples.add(entry.getValue().get(0)); 367 // } 368 // 369 // Set<Double> samples2 = new TreeSet<Double>(); 370 // for (int i = 10; i < factor; i *= 10) { 371 // for (Double sample : samples) { 372 // samples2.add(sample*i); 373 // } 374 // } 375 // samples.addAll(samples2); 376 377 Set<Double> allSamples = new TreeSet<>(); 378 // First add selected negative values and 0 379 allSamples.add(-123456.7d); // decimal sep, and grouping sep if used 380 allSamples.add(-123456d); // no decimal sep, grouping sep if used 381 allSamples.add(-12.3d); // decimal sep, no grouping sep 382 allSamples.add(-12d); // no decimal or grouping sep 383 allSamples.add(0d); // no decimal or grouping sep 384 // Then the larger set of positive values 385 for (long i = 1; i <= 100000000000000L; i *= factor) { 386 for (Double sample : samples) { 387 double source = i * sample; 388 allSamples.add(source); 389 } 390 } 391 return allSamples; 392 } 393 394 private static String surveyUrl = 395 CLDR_CONFIG.getProperty("CLDR_SURVEY_URL", "http://st.unicode.org/cldr-apps/survey"); 396 showErrors(Set<String> errors, Appendable out)397 private static void showErrors(Set<String> errors, Appendable out) throws IOException { 398 if (errors.size() != 0) { 399 out.append("<h2>" + "Errors" + "</h2>\n"); 400 for (String s : errors) { 401 out.append("<p>" + s + "</p>\n"); 402 } 403 errors.clear(); 404 } 405 } 406 collectItems( PluralInfo pluralInfo, double start, double limit, Set<Double> samples)407 private static Set<Double> collectItems( 408 PluralInfo pluralInfo, double start, double limit, Set<Double> samples) { 409 // TODO optimize once we have all the keywords 410 Map<String, Double> ones = new TreeMap<>(); 411 for (double i = start; i < limit; ++i) { 412 String cat = pluralInfo.getPluralRules().select(i); 413 if (ones.containsKey(cat)) { 414 continue; 415 } 416 ones.put(cat, i); 417 } 418 samples.addAll(ones.values()); 419 return samples; 420 } 421 captureErrors( Set<String> debugCreationErrors, Set<String> errors, String locale, String length)422 private static void captureErrors( 423 Set<String> debugCreationErrors, Set<String> errors, String locale, String length) { 424 if (debugCreationErrors.size() != 0) { 425 for (String s : debugCreationErrors) { 426 errors.add(locale + "\t" + length + "\t" + s); 427 } 428 debugCreationErrors.clear(); 429 } 430 } 431 } 432