1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2011, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Mark Davis 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.tool; 10 11 import java.io.File; 12 import java.io.PrintWriter; 13 import java.text.ParseException; 14 import java.text.ParsePosition; 15 import java.util.Arrays; 16 import java.util.Collection; 17 import java.util.Collections; 18 import java.util.Comparator; 19 import java.util.Date; 20 import java.util.Iterator; 21 import java.util.LinkedHashMap; 22 import java.util.LinkedHashSet; 23 import java.util.List; 24 import java.util.Locale; 25 import java.util.Map; 26 import java.util.Set; 27 import java.util.TreeMap; 28 import java.util.TreeSet; 29 import java.util.regex.Matcher; 30 31 import org.unicode.cldr.draft.FileUtilities; 32 import org.unicode.cldr.util.Builder; 33 import org.unicode.cldr.util.CLDRFile; 34 import org.unicode.cldr.util.CLDRFile.DraftStatus; 35 import org.unicode.cldr.util.CLDRPaths; 36 import org.unicode.cldr.util.CldrUtility; 37 import org.unicode.cldr.util.Factory; 38 import org.unicode.cldr.util.ICUServiceBuilder; 39 import org.unicode.cldr.util.LanguageTagParser; 40 import org.unicode.cldr.util.Log; 41 import org.unicode.cldr.util.PatternCache; 42 import org.unicode.cldr.util.SortedBag; 43 import org.unicode.cldr.util.TimezoneFormatter; 44 import org.unicode.cldr.util.TransliteratorUtilities; 45 import org.w3c.dom.NamedNodeMap; 46 import org.w3c.dom.Node; 47 48 import com.ibm.icu.dev.tool.UOption; 49 import com.ibm.icu.dev.util.UnicodeMap; 50 import com.ibm.icu.impl.Relation; 51 import com.ibm.icu.lang.UCharacter; 52 import com.ibm.icu.text.Collator; 53 import com.ibm.icu.text.DateFormat; 54 import com.ibm.icu.text.DecimalFormat; 55 import com.ibm.icu.text.Normalizer; 56 import com.ibm.icu.text.RuleBasedCollator; 57 import com.ibm.icu.text.SimpleDateFormat; 58 import com.ibm.icu.text.UTF16; 59 import com.ibm.icu.text.UnicodeSet; 60 import com.ibm.icu.text.UnicodeSetIterator; 61 import com.ibm.icu.util.ULocale; 62 63 //import org.unicode.cldr.tool.GenerateCldrDateTimeTests; 64 65 /** 66 * Generated tests for CLDR. 67 * 68 * @author medavis 69 */ 70 71 public class GenerateCldrTests { 72 73 protected static final boolean METAZONES_WORK = false; 74 75 // static private PrintWriter log; 76 PrintWriter out; 77 78 private static final int HELP1 = 0, HELP2 = 1, SOURCEDIR = 2, DESTDIR = 3, 79 LOGDIR = 4, MATCH = 5, NOT_RESOLVED = 6, LANGUAGES = 7, 80 SHOW = 8; 81 82 private static final UOption[] options = { 83 UOption.HELP_H(), 84 UOption.HELP_QUESTION_MARK(), 85 UOption.SOURCEDIR().setDefault(CLDRPaths.COMMON_DIRECTORY), 86 UOption.DESTDIR().setDefault(CLDRPaths.GEN_DIRECTORY + "/test/"), 87 UOption.create("log", 'l', UOption.REQUIRES_ARG).setDefault(CLDRPaths.GEN_DIRECTORY), 88 UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"), 89 UOption.create("notresolved", 'n', UOption.NO_ARG), 90 UOption.create("languages", 'g', UOption.NO_ARG), 91 // "C:\\ICU4J\\icu4j\\src\\com\\ibm\\icu\\dev\\tool\\cldr\\"), 92 UOption.create("show", 's', UOption.NO_ARG), }; 93 94 private static final String VERSION = CLDRFile.GEN_VERSION; 95 96 GenerateCldrCollationTests cldrCollations; 97 98 static String logDir = null, destDir = null; 99 hasLocalizedLanguageFor(String locale, String otherLocale)100 public static boolean hasLocalizedLanguageFor(String locale, 101 String otherLocale) { 102 String lang = new LanguageTagParser().set(otherLocale).getLanguage(); 103 String localizedVersion = english.getName(locale); 104 return !lang.equals(localizedVersion); 105 } 106 hasLocalizedCountryFor(String locale, String otherLocale)107 public static boolean hasLocalizedCountryFor(String locale, 108 String otherLocale) { 109 String country = new LanguageTagParser().set(otherLocale).getRegion(); 110 if (country.equals("")) 111 return true; 112 String localizedVersion = english.getName(CLDRFile.TERRITORY_NAME, locale); 113 return !country.equals(localizedVersion); 114 } 115 main(String[] args)116 public static void main(String[] args) throws Exception { 117 double deltaTime = System.currentTimeMillis(); 118 UOption.parseArgs(args, options); 119 Log.setLog(options[LOGDIR].value, "log.txt"); 120 // log = FileUtilities.openUTF8Writer(options[LOGDIR].value, "log.txt"); 121 try { 122 if (options[LANGUAGES].doesOccur) { 123 GenerateStatistics.generateSize( 124 options[GenerateCldrTests.SOURCEDIR].value + "main/", 125 options[GenerateCldrTests.LOGDIR].value, 126 options[MATCH].value, true); 127 return; 128 } 129 // compareAvailable(); 130 131 // if (true) return; 132 // System.out.println(createCaseClosure(new 133 // UnicodeSet("[a{bc}{def}{oss}]"))); 134 // System.out.println(createCaseClosure(new 135 // UnicodeSet("[a-z\u00c3\u0178{aa}]"))); 136 GenerateCldrTests t = new GenerateCldrTests(); 137 // t.generate(new ULocale("hu"), null); 138 t.generate(options[MATCH].value); 139 } finally { 140 Log.close(); 141 deltaTime = System.currentTimeMillis() - deltaTime; 142 System.out.println("Elapsed: " + deltaTime / 1000.0 + " seconds"); 143 System.out.println("Done"); 144 } 145 } 146 147 /* 148 * private static void compareAvailable() { String[] cols = 149 * Collator.getAvailableULocales(); Locale[] alocs = 150 * NumberFormat.getAvailableLocales(); Set sCols = filter(cols); Set sLocs = 151 * filter(alocs); Set oldSLocs = new TreeSet(sCols); sLocs.removeAll(sCols); 152 * log.println("main - collation"); showLocales(sLocs); 153 * sCols.removeAll(oldSLocs); log.println(); 154 * log.println("collation - main"); showLocales(sCols); } 155 */ 156 157 /** 158 * 159 */ 160 /* 161 * private static void checkLocaleNames() {Stringe[] locales = 162 * String.getAvailableLocales(); for (int i = 0; i < locales.length; ++i) { 163 * if (!hasLocalizedCountryFor(String.ENGLISH, locales[i]) || 164 * !hasLocalizedLanguageFor(String.ENGLISH, locales[i]) || 165 * !hasLocalizedCountryFor(locales[i], locales[i]) || 166 * !hasLocalizedLanguageFor(locales[i], locales[i])) { 167 * Log.getLog().print("FAILURE\t"); } else { 168 * Log.getLog().print(" \t"); } Log.logln(locales[i] + "\t" + 169 * locales[i].getDisplayName(String.ENGLISH) + "\t" + 170 * locales[i].getDisplayName(locales[i])); } } 171 */ 172 /** 173 * @param sLocs 174 */ showLocales(Set<String> sLocs)175 private static void showLocales(Set<String> sLocs) { 176 for (Iterator<String> it = sLocs.iterator(); it.hasNext();) { 177 String s = it.next(); 178 Log.logln(s + "\t" + ULocale.getDisplayLanguage(s, "en")); 179 } 180 } 181 182 /** 183 * @param cols 184 * @return 185 */ filter(Object[] cols)186 private static Set<String> filter(Object[] cols) { 187 Set<String> result = new TreeSet<String>(); 188 for (int i = 0; i < cols.length; ++i) { 189 String s = cols[i].toString(); 190 if (s.indexOf('_') >= 0) 191 continue; 192 result.add(s); 193 } 194 return result; 195 } 196 addULocales(Object[] objects, Set<String> target)197 Set<String> addULocales(Object[] objects, Set<String> target) { 198 for (int i = 0; i < objects.length; ++i) { 199 target.add(objects[i].toString()); 200 } 201 return target; 202 } 203 204 LanguageTagParser ltp = new LanguageTagParser(); 205 addLocale(String locale)206 private void addLocale(String locale) { 207 String lang; 208 try { 209 lang = ltp.set(locale).getLanguageScript(); 210 // lang = locale.getLanguage(); 211 if (lang.length() == 0 || lang.equals("root")) 212 return; // skip root 213 } catch (RuntimeException e) { 214 return; // illegal locale name, must be supplemental 215 } 216 // ULocale parent = new ULocale(lang); 217 // System.out.println(item + ", " + parent); 218 parentToLocales.put(lang, locale); 219 /* 220 * RuleBasedCollator col = cldrCollations.getInstance(item); if (col == 221 * null) { System.out.println("No collator for: " + item); } String 222 * rules = col.getRules(); // 223 * ((RuleBasedCollator)Collator.getInstance(item)).getRules(); 224 * rulesToLocales.add(rules, item); localesToRules.put(item, rules); 225 */ 226 } 227 228 Set<String> collationLocales = new TreeSet<String>(); // =ULocaleComparator 229 // addULocales(Collator.getAvailableULocales(), 230 // new 231 // TreeSet(ULocaleComparator)); 232 233 // Set numberLocales = addULocales(NumberFormat.getAvailableLocales(), new 234 // TreeSet(ULocaleComparator)); 235 // Set dateLocales = addULocales(DateFormat.getAvailableLocales(), new 236 // TreeSet(ULocaleComparator)); 237 Set<String> allLocales = new TreeSet<String>(); // ULocaleComparator 238 239 // Map localesToRules = new HashMap(); 240 241 // Relation rulesToLocales = new Relation(new TreeMap(ULocaleComparator),TreeSet.class); 242 243 Relation<String, String> parentToLocales = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 244 245 /* 246 * void getLocaleList() { collationLocales = new TreeSet(ULocaleComparator); 247 * collationLocales.addAll(cldrCollations.getAvailableSet()); 248 * 249 * collationLocales = addULocales(new String[] { // HACK "ga", "nl", "pt", 250 * "de@collation=phonebook", "es@collation=traditional", 251 * "hi@collation=direct", "zh@collation=pinyin", "zh@collation=stroke", 252 * "zh@collation=traditional", }, collationLocales); 253 * 254 * allLocales.addAll(collationLocales); allLocales.addAll(numberLocales); 255 * allLocales.addAll(dateLocales); // HACK // get all collations with same 256 * rules 257 * 258 * for (Iterator it = allLocales.iterator(); it.hasNext();) { 259 * addLocale((ULocale) it.next()); } 260 * 261 * String[] others = new String[] { "de@collation=phonebook", 262 * "es@collation=traditional", "hi@collation=direct", "zh@collation=pinyin", 263 * "zh@collation=stroke", "zh@collation=traditional", }; for (int i = 0; i < 264 * others.length; ++i) { addLocale(new ULocale(others[i])); } 265 * 266 * } 267 */ 268 269 // GenerateCldrDateTimeTests cldrOthers; 270 Factory mainCldrFactory; 271 272 ICUServiceBuilder icuServiceBuilder; 273 274 private static CLDRFile english; 275 276 // static Transform<String,ULocale> TO_LOCALE = new Transform<String,ULocale>(){ 277 // public ULocale transform(String source) { 278 // return new ULocale(source); 279 // } 280 // }; 281 generate(String pat)282 void generate(String pat) throws Exception { 283 mainCldrFactory = Factory.make(options[SOURCEDIR].value + "main" 284 + File.separator, pat); 285 english = mainCldrFactory.make("en", true); 286 Factory collationCldrFactory = Factory.make(options[SOURCEDIR].value 287 + "collation" + File.separator, pat); 288 //Factory supplementalCldrFactory = Factory.make(options[SOURCEDIR].value 289 // + "supplemental" + File.separator, ".*"); 290 291 // allLocales = Builder.with(allLocales) 292 // .addAll(mainCldrFactory.getAvailable(), TO_LOCALE) 293 // .addAll(collationCldrFactory.getAvailable(), TO_LOCALE) 294 // .freeze(); 295 allLocales.addAll(mainCldrFactory.getAvailable()); 296 if (!allLocales.containsAll(collationCldrFactory.getAvailable())) { 297 System.err.println("Collation locale that is not in main!\t" 298 + Builder.with(new TreeSet<String>(collationCldrFactory.getAvailable())).removeAll(allLocales).get()); 299 } 300 allLocales.addAll(collationCldrFactory.getAvailable()); 301 allLocales = Collections.unmodifiableSet(allLocales); 302 303 cldrCollations = new GenerateCldrCollationTests(options[SOURCEDIR].value 304 + "collation" + File.separator, pat, allLocales); 305 if (options[SHOW].doesOccur) 306 cldrCollations.show(); 307 308 for (Iterator<String> it = cldrCollations.getAvailableSet().iterator(); it 309 .hasNext();) { 310 collationLocales.add(it.next()); 311 } 312 if (!allLocales.containsAll(collationLocales)) { 313 System.err.println("Collation locale that is not in main!\t" 314 + Builder.with(new TreeSet<String>(collationLocales)).removeAll(allLocales).get()); 315 } 316 collationLocales = allLocales; 317 318 // TODO HACK 319 // collationLocales.remove("ar_IN"); 320 icuServiceBuilder = new ICUServiceBuilder(); 321 /* 322 * cldrOthers = new GenerateCldrDateTimeTests(options[SOURCEDIR].value + 323 * "main" + File.separator, pat, 324 * !options[GenerateCldrTests.NOT_RESOLVED].doesOccur); if 325 * (options[SHOW].doesOccur) cldrOthers.show(); 326 */ 327 // getLocaleList(); 328 for (Iterator<String> it = collationLocales.iterator(); it.hasNext();) { 329 addLocale(it.next()); 330 } 331 332 Matcher m = PatternCache.get(pat).matcher(""); 333 for (Iterator<String> it = parentToLocales.keySet().iterator(); it.hasNext();) { 334 String p = it.next(); 335 if (!m.reset(p).matches()) 336 continue; 337 generate2(p); 338 } 339 } 340 generate2(String locale)341 private void generate2(String locale) throws Exception { 342 System.out.println("Main Generation:\t" + locale); 343 out = FileUtilities.openUTF8Writer(options[DESTDIR].value, locale + ".xml"); 344 out.println("<?xml version='1.0' encoding='UTF-8' ?>"); 345 out.println( 346 // "<!DOCTYPE cldrTest SYSTEM 'http://www.unicode.org/cldr/dtd/1.5/cldrTest.dtd'>" 347 // + 348 "<!DOCTYPE cldrTest SYSTEM '../common/dtd/cldrTest.dtd'>"); 349 out.println("<!-- For information, see readme.html -->"); 350 out.println(" <cldrTest version='" + VERSION + 351 "' base='" + locale + "'>"); 352 CLDRFile localeFile = mainCldrFactory.make(locale, true); 353 out.println(" <!-- " 354 + TransliteratorUtilities.toXML.transliterate(english.getName(locale) 355 + " [" + localeFile.getName(locale)) 356 + "] -->"); 357 generateItems(locale, allLocales, NumberShower); 358 generateItems(locale, allLocales, DateShower); 359 generateItems(locale, allLocales, ZoneFieldShower); 360 generateItems(locale, collationLocales, CollationShower); 361 out.println(" </cldrTest>"); 362 out.close(); 363 ToolUtilities.generateBat(options[SOURCEDIR].value + "test" + File.separator, 364 locale + ".xml", options[DESTDIR].value, locale + ".xml", 365 new CldrUtility.SimpleLineComparator(0)); 366 } 367 368 /* 369 * 370 * // first pass through and get all the functional equivalents Map 371 * uniqueLocales = new TreeMap(); 372 * 373 * String[] keywords = Collator.getKeywords(); boolean [] isAvailable = new 374 * boolean[1]; for (int i = 0; i < locales.length; ++i) { add(locales[i], 375 * uniqueLocales); if (true) continue; // TODO restore once Vladimir fixes 376 * for (int j = 0; j < keywords.length; ++j) { String[] values = 377 * Collator.getKeywordValues(keywords[j]); for (int k = 0; k < 378 * values.length; ++k) { // TODO -- for a full job, would do all 379 * combinations of different keywords! if (values[k].equals("standard")) 380 * continue; add(new ULocale(locales[i] + "@" + keywords[j] + "=" + 381 * values[k]), uniqueLocales); //ULocale other = 382 * Collator.getFunctionalEquivalent(keywords[j], locales[i], isAvailable); } 383 * } } for (int i = 0; i < extras.length; ++i) { add(new ULocale(extras[i]), 384 * uniqueLocales); } // items are now sorted by rules. So resort by locale 385 * Map toDo = new TreeMap(ULocaleComparator); for (Iterator it = 386 * uniqueLocales.keySet().iterator(); it.hasNext();) { Object rules = 387 * it.next(); Set s = (Set) uniqueLocales.get(rules); ULocale ulocale = 388 * (ULocale) s.iterator().next(); // get first one toDo.put(ulocale, s); } 389 * for (Iterator it = toDo.keySet().iterator(); it.hasNext();) { ULocale 390 * ulocale = (ULocale) it.next(); Set s = (Set) toDo.get(ulocale); 391 * generate(ulocale); } 392 */ 393 394 /** 395 * add locale into list. Replace old if shorter 396 * 397 * @param locale 398 */ add(String locale, Map<String, Set<String>> uniqueLocales)399 void add(String locale, Map<String, Set<String>> uniqueLocales) { 400 try { 401 RuleBasedCollator col = cldrCollations.getInstance(locale); // (RuleBasedCollator) 402 // Collator.getInstance(locale); 403 // for our purposes, separate locales if we are using different 404 // exemplars 405 String key = col.getRules() + "\uFFFF" + getExemplarSet(locale, 0, DraftStatus.unconfirmed); 406 Set<String> s = uniqueLocales.get(key); 407 if (s == null) { 408 s = new TreeSet<String>(ULocaleComparator); 409 uniqueLocales.put(key, s); 410 } 411 System.out.println("Adding " + locale); 412 s.add(locale); 413 } catch (Throwable e) { // skip 414 System.out.println("skipped " + locale); 415 } 416 } 417 418 /** 419 * Work-around 420 */ getExemplarSet(String locale, int options, DraftStatus minimalDraftStatus)421 public UnicodeSet getExemplarSet(String locale, int options, 422 DraftStatus minimalDraftStatus) { 423 String n = locale.toString(); 424 int pos = n.indexOf('@'); 425 if (pos >= 0) 426 locale = n.substring(0, pos); 427 CLDRFile cldrFile = mainCldrFactory.make(locale.toString(), true, 428 minimalDraftStatus); 429 String v = cldrFile.getStringValue("//ldml/characters/exemplarCharacters"); 430 UnicodeSet result = new UnicodeSet(v); 431 v = cldrFile 432 .getStringValue("//ldml/characters/exemplarCharacters[@type=\"auxiliary\"]"); 433 if (v != null) { 434 result.addAll(new UnicodeSet(v)); 435 } 436 if (options == 0) 437 result.closeOver(UnicodeSet.CASE); 438 return result; 439 } 440 441 public static final Comparator<Object> ULocaleComparator = new Comparator<Object>() { 442 public int compare(Object o1, Object o2) { 443 return o1.toString().compareTo(o2.toString()); 444 } 445 }; 446 447 /* 448 * public interface Equator { public boolean equals(Object o1, Object o2); } 449 */ 450 @SuppressWarnings("rawtypes") intersects(Collection a, Collection b)451 static boolean intersects(Collection a, Collection b) { 452 for (Iterator it = a.iterator(); it.hasNext();) { 453 if (b.contains(it.next())) 454 return true; 455 } 456 return false; 457 } 458 459 /* 460 * static Collection extract(Object x, Collection a, Equator e, Collection 461 * output) { List itemsToRemove = new ArrayList(); for (Iterator it = 462 * a.iterator(); it.hasNext();) { Object item = it.next(); if (e.equals(x, 463 * item)) { itemsToRemove.add(item); // have to do this because iterator may 464 * not allow output.add(item); } } a.removeAll(itemsToRemove); return 465 * output; } 466 */ 467 class ResultsPrinter { 468 private Set<Map> listOfSettings = new LinkedHashSet<Map>(); 469 470 private transient LinkedHashMap<String, String> settings = new LinkedHashMap<String, String>(); 471 ResultsPrinter()472 ResultsPrinter() { 473 } 474 ResultsPrinter(ResultsPrinter rpIncludeDraft, ResultsPrinter rpNoDraft)475 ResultsPrinter(ResultsPrinter rpIncludeDraft, ResultsPrinter rpNoDraft) { 476 Set<Map> listOfSettings1 = rpIncludeDraft.getListOfSettings(); 477 Set<Map> listOfSettings2 = rpNoDraft.getListOfSettings(); 478 if (listOfSettings1.size() != listOfSettings2.size()) { 479 throw new InternalError("can't combine"); 480 } 481 Iterator<Map> it1 = listOfSettings1.iterator(); 482 Iterator<Map> it2 = listOfSettings2.iterator(); 483 while (it1.hasNext()) { 484 Map settings1 = it1.next(); 485 Map settings2 = it2.next(); 486 if (settings1.equals(settings2)) { 487 settings1.put("draft", "unconfirmed approved"); 488 addToListOfSettings(settings1); 489 } else { 490 // they should only differ by result! 491 settings1.put("draft", "unconfirmed"); 492 addToListOfSettings(settings1); 493 settings2.put("draft", "approved"); 494 addToListOfSettings(settings2); 495 } 496 } 497 } 498 addToListOfSettings(Map settings1)499 private void addToListOfSettings(Map settings1) { 500 for (Object key : settings1.keySet()) { 501 if (key == null || settings1.get(key) == null) { 502 throw new IllegalArgumentException("null key or value in settings."); 503 } 504 } 505 listOfSettings.add(settings1); 506 } 507 set(String name, String value)508 void set(String name, String value) { 509 if (name == null || value == null) { 510 throw new IllegalArgumentException("null key or value in settings."); 511 } 512 settings.put(name, value); 513 } 514 setResult(String result)515 void setResult(String result) { 516 if (result == null) { 517 throw new IllegalArgumentException("null key or value in settings."); 518 } 519 settings.put("result", result); 520 addToListOfSettings((Map) settings.clone()); 521 } 522 print()523 void print() { 524 Map oldSettings = new TreeMap(); 525 for (Iterator it2 = getListOfSettings().iterator(); it2.hasNext();) { 526 Map settings = (Map) it2.next(); 527 String result = (String) settings.get("result"); 528 out.print(" <result"); 529 for (Iterator it = settings.keySet().iterator(); it.hasNext();) { 530 Object key = it.next(); 531 if (key.equals("result")) 532 continue; 533 Object value = settings.get(key); 534 if (!value.equals(oldSettings.get(key))) { 535 out.print(" " + key + "='" 536 + TransliteratorUtilities.toXML.transliterate(value.toString()) 537 + "'"); 538 } 539 } 540 out.println(">" + TransliteratorUtilities.toXML.transliterate(result) 541 + "</result>"); 542 oldSettings = settings; 543 } 544 } 545 equals(Object other)546 public boolean equals(Object other) { 547 try { 548 ResultsPrinter that = (ResultsPrinter) other; 549 return getListOfSettings().equals(that.getListOfSettings()); 550 } catch (Exception e) { 551 return false; 552 } 553 } 554 hashCode()555 public int hashCode() { 556 throw new IllegalArgumentException(); 557 } 558 559 /** 560 * 561 */ 562 setListOfSettings(Set listOfSettings)563 private void setListOfSettings(Set listOfSettings) { 564 this.listOfSettings = listOfSettings; 565 } 566 getListOfSettings()567 private Set<Map> getListOfSettings() { 568 return Collections.unmodifiableSet(listOfSettings); 569 } 570 } 571 572 abstract class DataShower { show(String first_locale, DraftStatus minimalDraftStatus)573 abstract ResultsPrinter show(String first_locale, DraftStatus minimalDraftStatus); 574 show(String first)575 ResultsPrinter show(String first) throws Exception { 576 ResultsPrinter rpIncludeDraft = show(first, DraftStatus.unconfirmed); 577 ResultsPrinter rpNoDraft = show(first, DraftStatus.approved); 578 return new ResultsPrinter(rpIncludeDraft, rpNoDraft); 579 } 580 getElement()581 abstract String getElement(); 582 } 583 584 interface DataShower2 { show(ULocale first, Collection others)585 void show(ULocale first, Collection others) throws Exception; 586 } 587 generateItems(String locale, Collection<String> onlyLocales, DataShower generator)588 private void generateItems(String locale, Collection<String> onlyLocales, 589 DataShower generator) throws Exception { 590 Set<String> sublocales = new TreeSet<String>(); // ULocaleComparator 591 sublocales.add(locale); 592 sublocales.addAll(parentToLocales.getAll(locale)); 593 sublocales.retainAll(onlyLocales); 594 Map<String, ResultsPrinter> locale_results = new TreeMap<String, ResultsPrinter>(ULocaleComparator); 595 for (Iterator<String> it = sublocales.iterator(); it.hasNext();) { 596 String current = it.next(); 597 locale_results.put(current, generator.show(current)); 598 } 599 // do it this way so that the locales stay in order 600 Set<String> matchingLocales = new TreeSet<String>(ULocaleComparator); 601 while (sublocales.size() != 0) { 602 String first = sublocales.iterator().next(); 603 ResultsPrinter r = locale_results.get(first); 604 for (Iterator<String> it = sublocales.iterator(); it.hasNext();) { 605 String other = it.next(); 606 ResultsPrinter r2 = locale_results.get(other); 607 if (r2.equals(r)) 608 matchingLocales.add(other); 609 } 610 showLocales(generator.getElement(), matchingLocales); 611 r.print(); 612 out.println(" </" + generator.getElement() + ">"); 613 sublocales.removeAll(matchingLocales); 614 matchingLocales.clear(); 615 } 616 //Comparator c; 617 } 618 showLocales(String elementName, Collection<String> others)619 public void showLocales(String elementName, Collection<String> others) { 620 // System.out.println(elementName + ": " + locale); 621 out.println(" <" + elementName + " "); 622 StringBuffer comment = new StringBuffer(); 623 if (others != null && others.size() != 0) { 624 out.print("locales='"); 625 boolean first = true; 626 for (Iterator<String> it = others.iterator(); it.hasNext();) { 627 if (first) 628 first = false; 629 else { 630 out.print(" "); 631 comment.append("; "); 632 } 633 String loc = it.next(); 634 out.print(loc); 635 comment.append(english.getName(loc) + " [" 636 + getNativeName(loc) + "]"); 637 } 638 out.print("'"); 639 } 640 out.println(">"); 641 out.println("<!-- " 642 + TransliteratorUtilities.toXML.transliterate(comment.toString()) 643 + " -->"); 644 } 645 getNativeName(String loc)646 private String getNativeName(String loc) { 647 int atPos = loc.indexOf('@'); 648 String keywords = ""; 649 if (atPos >= 0) { 650 keywords = loc.substring(atPos + 1); 651 loc = loc.substring(0, atPos); 652 } 653 return mainCldrFactory.make(loc, true).getName(loc) + "@" + keywords; 654 } 655 656 DataShower ZoneFieldShower = new DataShower() { 657 658 // Set zones = new 659 // TreeSet(sc.getAvailableCodes("tzid")); 660 List<String> zones = Arrays.asList(new String[] { "America/Los_Angeles", 661 "America/Argentina/Buenos_Aires", "America/Buenos_Aires", 662 "America/Havana", "Australia/ACT", "Australia/Sydney", "Europe/London", 663 "Europe/Moscow", "Etc/GMT+3" }); 664 665 String[] perZoneSamples = { "Z", "ZZZZ", "z", "zzzz", "v", "vvvv", "V", "VVVV" }; 666 667 String[] dates = { "2004-01-15T12:00:00Z", "2004-07-15T12:00:00Z" }; 668 669 public ResultsPrinter show(String first, DraftStatus minimalDraftStatus) { 670 TimezoneFormatter tzf = new TimezoneFormatter(mainCldrFactory, first 671 .toString(), minimalDraftStatus); 672 ResultsPrinter rp = new ResultsPrinter(); 673 if (!METAZONES_WORK) { 674 return rp; 675 } 676 // TODO Auto-generated 677 // method stub 678 ParsePosition parsePosition = new ParsePosition(0); 679 for (Iterator<String> it = zones.iterator(); it.hasNext();) { 680 String tzid = it.next(); 681 rp.set("zone", tzid); 682 for (int j = 0; j < dates.length; ++j) { 683 String date = dates[j]; 684 Date datetime; 685 try { 686 datetime = ICUServiceBuilder.isoDateParse(date); 687 } catch (ParseException e1) { 688 throw new IllegalArgumentException(e1); 689 } 690 rp.set("date", dates[j]); 691 for (int i = 0; i < perZoneSamples.length; ++i) { 692 try { 693 String pattern = perZoneSamples[i]; 694 if (!METAZONES_WORK && (pattern.contains("z") || pattern.contains("V"))) { 695 continue; 696 } 697 rp.set("field", pattern); 698 String formatted = tzf.getFormattedZone(tzid, pattern, datetime.getTime(), false); 699 parsePosition.setIndex(0); 700 String parsed = tzf.parse(formatted, parsePosition); 701 if (parsed == null) { 702 // for 703 // debugging 704 formatted = tzf.getFormattedZone(tzid, pattern, datetime.getTime(), false); 705 parsePosition.setIndex(0); 706 parsed = tzf.parse(formatted, parsePosition); 707 } 708 rp.set("parse", parsed); 709 rp.setResult(formatted); 710 } catch (RuntimeException e) { 711 throw (IllegalArgumentException) new IllegalArgumentException( 712 "Failure in " + first).initCause(e); 713 } 714 } 715 } 716 } 717 return rp; 718 /* 719 * Date datetime = ICUServiceBuilder .isoDateParse (samples[j]); 720 * rp.set("input", ICUServiceBuilder .isoDateFormat (datetime)); 721 */ 722 } 723 724 public String getElement() { 725 return "zoneFields"; 726 } 727 }; 728 729 DataShower DateShower = new DataShower() { 730 public ResultsPrinter show(String locale, DraftStatus minimalDraftStatus) { 731 String[] samples = { "1900-01-31T00:00:00Z", "1909-02-28T00:00:01Z", 732 "1918-03-26T00:59:59Z", "1932-04-24T01:00:00Z", 733 "1945-05-20T01:00:01Z", "1952-06-18T11:59:59Z", 734 "1973-07-16T12:00:00Z", "1999-08-14T12:00:01Z", 735 "2000-09-12T22:59:59Z", "2001-10-08T23:00:00Z", 736 "2004-11-04T23:00:01Z", "2010-12-01T23:59:59Z", }; 737 CLDRFile cldrFile = mainCldrFactory.make(locale.toString(), true, 738 minimalDraftStatus); 739 icuServiceBuilder.setCldrFile(cldrFile); 740 ResultsPrinter rp = new ResultsPrinter(); 741 for (int j = 0; j < samples.length; ++j) { 742 Date datetime; 743 try { 744 datetime = ICUServiceBuilder.isoDateParse(samples[j]); 745 } catch (ParseException e) { 746 throw new IllegalArgumentException(e); 747 } 748 rp.set("input", ICUServiceBuilder.isoDateFormat(datetime)); 749 for (int i = 0; i < ICUServiceBuilder.LIMIT_DATE_FORMAT_INDEX; ++i) { 750 rp.set("dateType", ICUServiceBuilder.getDateNames(i)); 751 for (int k = 0; k < ICUServiceBuilder.LIMIT_DATE_FORMAT_INDEX; ++k) { 752 if (i == 0 && k == 0) 753 continue; 754 DateFormat df = icuServiceBuilder.getDateFormat("gregorian", i, k); 755 String pattern = ((SimpleDateFormat) df).toPattern(); 756 if (!METAZONES_WORK && (pattern.contains("z") || pattern.contains("V"))) { 757 continue; 758 } 759 rp.set("timeType", ICUServiceBuilder.getDateNames(k)); 760 if (false && i == 2 && k == 0) { 761 System.out.println("debug: date " 762 + icuServiceBuilder.getDateNames(i) + ", time " 763 + icuServiceBuilder.getDateNames(k) + " = " 764 + df.format(datetime)); 765 } 766 rp.setResult(df.format(datetime)); 767 } 768 } 769 } 770 return rp; 771 } 772 773 public String getElement() { 774 return "date"; 775 } 776 }; 777 778 DataShower NumberShower = new DataShower() { 779 public ResultsPrinter show(String locale, DraftStatus minimalDraftStatus) { 780 CLDRFile cldrFile = mainCldrFactory.make(locale.toString(), true, 781 minimalDraftStatus); 782 icuServiceBuilder.setCldrFile(cldrFile); 783 784 double[] samples = { 0, 0.01, -0.01, 1, -1, 123.456, -123.456, 123456.78, 785 -123456.78, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, 786 Double.NaN }; 787 ResultsPrinter rp = new ResultsPrinter(); 788 for (int j = 0; j < samples.length; ++j) { 789 double sample = samples[j]; 790 rp.set("input", String.valueOf(sample)); 791 for (int i = 0; i < ICUServiceBuilder.LIMIT_NUMBER_INDEX; ++i) { 792 rp.set("numberType", icuServiceBuilder.getNumberNames(i)); 793 DecimalFormat nf = icuServiceBuilder.getNumberFormat(i); 794 String formatted = nf.format(sample); 795 if (formatted.indexOf("NaNNaN") >= 0) { 796 formatted = nf.format(sample); // for 797 // debugging 798 } 799 rp.setResult(formatted); 800 } 801 } 802 return rp; 803 } 804 805 public String getElement() { 806 return "number"; 807 } 808 }; 809 810 // ========== COLLATION ========== 811 812 /* 813 * Equator CollationEquator = new Equator() { 814 *//** 815 * Must both be ULocales 816 */ 817 /* 818 * public boolean equals(Object o1, Object o2) { try { ULocale loc1 = 819 * (ULocale) o1; ULocale loc2 = (ULocale) o2; if (loc1.equals(loc2)) return 820 * true; return 821 * cldrCollations.getInstance(loc1).equals(cldrCollations.getInstance 822 * (loc2)); } catch (RuntimeException e) { System.out.println("Failed on: " 823 * + o1 + " ;\t" + o2); throw e; } } }; 824 */static ULocale zhHack = new ULocale("zh"); // FIXME 825 // hack 826 // for 827 // zh 828 829 DataShower CollationShower = new DataShower() { 830 public ResultsPrinter show(String locale, DraftStatus minimalDraftStatus) { 831 // if 832 // (locale.equals(zhHack)) 833 // return; 834 835 Collator col = cldrCollations.getInstance(locale); // Collator.getInstance(locale); 836 837 UnicodeSet tailored = new UnicodeSet(); 838 if (col != null) { 839 tailored = col.getTailoredSet(); 840 if (new LanguageTagParser().set(locale).getLanguage().equals("zh")) { 841 tailored.addAll(new UnicodeSet("[[a-z]-[v]]")); 842 Log.logln("HACK for Pinyin"); 843 } 844 tailored = createCaseClosure(tailored); 845 tailored = nfc(tailored); 846 } else { 847 System.out.println("No collation for: " + locale); 848 col = cldrCollations.getInstance("root"); 849 } 850 // System.out.println(tailored.toPattern(true)); 851 852 UnicodeSet exemplars = getExemplarSet(locale, UnicodeSet.CASE, 853 minimalDraftStatus); 854 // add all the exemplars 855 856 exemplars = createCaseClosure(exemplars); 857 exemplars = nfc(exemplars); 858 // System.out.println(exemplars.toPattern(true)); 859 tailored.addAll(exemplars); 860 // UnicodeSet 861 // tailoredMinusHan = new 862 // UnicodeSet(tailored).removeAll(SKIP_COLLATION_SET); 863 if (!exemplars.containsAll(tailored)) { 864 // BagFormatter bf = 865 // new 866 // BagFormatter(); 867 Log.logln("In Tailored, but not Exemplar; Locale: " + locale + "\t" 868 + english.getName(locale)); 869 Log.logln(new UnicodeSet(tailored).removeAll(exemplars) 870 .toPattern(false)); 871 // bf.(log,"tailored", 872 // tailored, 873 // "exemplars", 874 // exemplars); 875 Log.getLog().flush(); 876 } 877 tailored.addAll(new UnicodeSet("[\\ .02{12}]")); 878 tailored.removeAll(SKIP_COLLATION_SET); 879 880 SortedBag bag = new SortedBag(col); 881 return doCollationResult(col, tailored, bag); 882 } 883 884 public String getElement() { 885 return "collation"; 886 } 887 }; 888 889 /* 890 * public void show(ULocale locale, Collection others) { 891 * showLocales("collation", others); 892 * 893 * Collator col = cldrCollations.getInstance(locale); // 894 * Collator.getInstance(locale); 895 * 896 * UnicodeSet tailored = col.getTailoredSet(); if 897 * (locale.getLanguage().equals("zh")) { tailored.addAll(new 898 * UnicodeSet("[[a-z]-[v]]")); log.println("HACK for Pinyin"); } tailored = 899 * createCaseClosure(tailored); tailored = nfc(tailored); 900 * //System.out.println(tailored.toPattern(true)); 901 * 902 * UnicodeSet exemplars = getExemplarSet(locale, UnicodeSet.CASE); // add 903 * all the exemplars if (false) for (Iterator it = others.iterator(); 904 * it.hasNext(); ) { exemplars.addAll(getExemplarSet((ULocale)it.next(), 905 * UnicodeSet.CASE)); } 906 * 907 * exemplars = createCaseClosure(exemplars); exemplars = nfc(exemplars); 908 * //System.out.println(exemplars.toPattern(true)); 909 * tailored.addAll(exemplars); //UnicodeSet tailoredMinusHan = new 910 * UnicodeSet(tailored).removeAll(SKIP_COLLATION_SET); if 911 * (!exemplars.containsAll(tailored)) { //BagFormatter bf = new 912 * BagFormatter(); log.println("In Tailored, but not Exemplar; Locale: " + 913 * locale + "\t" + locale.getDisplayName()); log.println(new 914 * UnicodeSet(tailored).removeAll(exemplars).toPattern(false)); 915 * //bf.(log,"tailored", tailored, "exemplars", exemplars); log.flush(); } 916 * tailored.addAll(new UnicodeSet("[\\ .02{12}]")); 917 * tailored.removeAll(SKIP_COLLATION_SET); 918 * 919 * SortedBag bag = new SortedBag(col); doCollationResult(col, tailored, 920 * bag); out.println(" </collation>"); }}; 921 */ 922 static final UnicodeSet SKIP_COLLATION_SET = new UnicodeSet( 923 "[[:script=han:][:script=hangul:]-[\u4e00-\u4eff \u9f00-\u9fff \uac00-\uacff \ud700-\ud7ff]]"); 924 925 /** 926 * @param col 927 * @param tailored 928 * @param bag 929 */ doCollationResult(Collator col, UnicodeSet tailored, SortedBag bag)930 private ResultsPrinter doCollationResult(Collator col, UnicodeSet tailored, 931 SortedBag bag) { 932 for (UnicodeSetIterator usi = new UnicodeSetIterator(tailored); usi.next();) { 933 String s = usi.getString(); 934 bag.add('x' + s); 935 bag.add('X' + s); 936 bag.add('x' + s + 'x'); 937 } 938 // out.println(" <set locale='" + locale + "'/>"); 939 /* 940 * if (others != null) for (Iterator it = others.iterator(); 941 * it.hasNext(); ) { ULocale uloc = (ULocale) it.next(); if 942 * (uloc.equals(locale)) continue; out.println(" <other locale='" + 943 * uloc + "'/>"); } 944 */ 945 String last = ""; 946 boolean needEquals = false; 947 StringBuffer tempResult = new StringBuffer(CldrUtility.LINE_SEPARATOR); 948 for (Iterator<String> it = bag.iterator(); it.hasNext();) { 949 String s = it.next(); 950 if (col.compare(s, last) != 0) { 951 if (needEquals) 952 tempResult.append(last).append(CldrUtility.LINE_SEPARATOR); 953 needEquals = false; 954 last = s; 955 } else { 956 needEquals = true; 957 } 958 tempResult.append(TransliteratorUtilities.toXML.transliterate(s)).append( 959 CldrUtility.LINE_SEPARATOR); 960 } 961 ResultsPrinter result = new ResultsPrinter(); 962 result.setResult(tempResult.toString()); 963 return result; 964 } 965 getMatchingXMLFiles(String dir, String localeRegex)966 static public Set<String> getMatchingXMLFiles(String dir, String localeRegex) { 967 Matcher m = PatternCache.get(localeRegex).matcher(""); 968 Set<String> s = new TreeSet<String>(); 969 File[] files = new File(dir).listFiles(); 970 for (int i = 0; i < files.length; ++i) { 971 String name = files[i].getName(); 972 if (!name.endsWith(".xml")) 973 continue; 974 if (name.startsWith("supplementalData")) 975 continue; 976 String locale = name.substring(0, name.length() - 4); // drop .xml 977 if (!locale.equals("root") && !m.reset(locale).matches()) 978 continue; 979 s.add(locale); 980 } 981 return s; 982 } 983 984 /* 985 * public static boolean isDraft(Node node) { for (; node.getNodeType() != 986 * Node.DOCUMENT_NODE; node = node.getParentNode()){ NamedNodeMap attributes 987 * = node.getAttributes(); if (attributes == null) continue; for (int i = 0; 988 * i < attributes.getLength(); ++i) { Node attribute = attributes.item(i); 989 * if (attribute.getNodeName().equals("draft") && 990 * attribute.getNodeValue().equals("true")) return true; } } return false; } 991 */ getXPath(Node node)992 public static String getXPath(Node node) { 993 StringBuffer xpathFragment = new StringBuffer(); 994 StringBuffer xpath = new StringBuffer(); 995 for (; node.getNodeType() != Node.DOCUMENT_NODE; node = node 996 .getParentNode()) { 997 xpathFragment.setLength(0); 998 xpathFragment.append('/').append(node.getNodeName()); 999 NamedNodeMap attributes = node.getAttributes(); 1000 if (attributes != null) { 1001 for (int i = 0; i < attributes.getLength(); ++i) { 1002 Node attribute = attributes.item(i); 1003 xpathFragment.append("[@").append(attribute.getNodeName()) 1004 .append('=').append(attribute.getNodeValue()).append(']'); 1005 } 1006 } 1007 xpath.insert(0, xpathFragment); 1008 } 1009 xpath.insert(0, '/'); 1010 return xpath.toString(); 1011 } 1012 replace(String source, String pattern, String replacement)1013 public static String replace(String source, String pattern, String replacement) { 1014 // dumb code for now 1015 for (int pos = source.indexOf(pattern, 0); pos >= 0; pos = source.indexOf( 1016 pattern, pos + 1)) { 1017 source = source.substring(0, pos) + replacement 1018 + source.substring(pos + pattern.length()); 1019 } 1020 return source; 1021 } 1022 1023 public static interface Apply { apply(String source)1024 String apply(String source); 1025 } 1026 apply(UnicodeSet source, Apply apply)1027 static UnicodeSet apply(UnicodeSet source, Apply apply) { 1028 UnicodeSet target = new UnicodeSet(); 1029 for (UnicodeSetIterator usi = new UnicodeSetIterator(source); usi.next();) { 1030 String s = usi.getString(); 1031 target.add(apply.apply(s)); 1032 } 1033 return target; 1034 } 1035 nfc(UnicodeSet source)1036 static UnicodeSet nfc(UnicodeSet source) { 1037 return apply(source, new Apply() { 1038 public String apply(String source) { 1039 return Normalizer.compose(source, false); 1040 } 1041 }); 1042 } 1043 1044 public static interface CloseCodePoint { 1045 /** 1046 * @param cp 1047 * code point to get closure for 1048 * @param toAddTo 1049 * Unicode set for the closure 1050 * @return toAddTo (for chaining) 1051 */ 1052 UnicodeSet close(int cp, UnicodeSet toAddTo); 1053 } 1054 1055 public static UnicodeSet createCaseClosure(UnicodeSet source) { 1056 UnicodeSet target = new UnicodeSet(); 1057 for (UnicodeSetIterator usi = new UnicodeSetIterator(source); usi.next();) { 1058 String s = usi.getString(); 1059 UnicodeSet temp = createClosure(s, CCCP); 1060 if (temp == null) 1061 target.add(s); 1062 else 1063 target.addAll(temp); 1064 } 1065 return target; 1066 } 1067 1068 public static class UnicodeSetComparator implements Comparator<Object> { 1069 UnicodeSetIterator ait = new UnicodeSetIterator(); 1070 1071 UnicodeSetIterator bit = new UnicodeSetIterator(); 1072 1073 public int compare(Object o1, Object o2) { 1074 if (o1 == o2) 1075 return 0; 1076 if (o1 == null) 1077 return -1; 1078 if (o2 == null) 1079 return 1; 1080 UnicodeSet a = (UnicodeSet) o1; 1081 UnicodeSet b = (UnicodeSet) o2; 1082 if (a.size() != b.size()) { 1083 return a.size() < b.size() ? -1 : 1; 1084 } 1085 ait.reset(a); 1086 bit.reset(b); 1087 while (ait.nextRange()) { 1088 bit.nextRange(); 1089 if (ait.codepoint != bit.codepoint) { 1090 return ait.codepoint < bit.codepoint ? -1 : 1; 1091 } 1092 if (ait.codepoint == UnicodeSetIterator.IS_STRING) { 1093 int result = ait.string.compareTo(bit.string); 1094 if (result != 0) 1095 return result; 1096 } else if (ait.codepointEnd != bit.codepointEnd) { 1097 return ait.codepointEnd < bit.codepointEnd ? -1 : 1; 1098 } 1099 } 1100 return 0; 1101 } 1102 } 1103 1104 public static final CloseCodePoint CCCP = new CloseCodePoint() { 1105 Locale locale = Locale.ENGLISH; 1106 1107 UnicodeSet NONE = new UnicodeSet(); 1108 1109 UnicodeMap<UnicodeSet> map = new UnicodeMap<UnicodeSet>(); // new 1110 1111 // UnicodeSetComparator() 1112 1113 public UnicodeSet close(int cp, UnicodeSet toAddTo) { 1114 UnicodeSet result = map.getValue(cp); 1115 if (result == null) { 1116 result = new UnicodeSet(); 1117 result.add(cp); 1118 String s = UCharacter.toLowerCase(locale, UTF16.valueOf(cp)); 1119 result.add(s); 1120 s = UCharacter.toUpperCase(locale, UTF16.valueOf(cp)); 1121 result.add(s); 1122 s = UCharacter.toTitleCase(locale, UTF16.valueOf(cp), null); 1123 result.add(s); 1124 // special hack 1125 if (result.contains("SS")) 1126 result.add("sS").add("ss"); 1127 if (result.size() == 1) 1128 result = NONE; 1129 map.put(cp, result); 1130 } 1131 if (result != NONE) 1132 toAddTo.addAll(result); 1133 else 1134 toAddTo.add(cp); 1135 return toAddTo; 1136 } 1137 }; 1138 1139 public static UnicodeSet createClosure(String source, CloseCodePoint closer) { 1140 return createClosure(source, 0, closer); 1141 } 1142 1143 public static UnicodeSet createClosure(String source, int position, 1144 CloseCodePoint closer) { 1145 UnicodeSet result = new UnicodeSet(); 1146 // if at end, return empty set 1147 if (position >= source.length()) 1148 return result; 1149 int cp = UTF16.charAt(source, position); 1150 // if last character, return its set 1151 int endPosition = position + UTF16.getCharCount(cp); 1152 if (endPosition >= source.length()) 1153 return closer.close(cp, result); 1154 // otherwise concatenate its set with the remainder 1155 UnicodeSet remainder = createClosure(source, endPosition, closer); 1156 return createAppend(closer.close(cp, result), remainder); 1157 } 1158 1159 /** 1160 * Produce the result of appending each element of this to each element of 1161 * other. That is, [a{cd}] + [d{ef}] => [{ad}{aef}{cdd}{cdef}] 1162 */ 1163 public static UnicodeSet createAppend(UnicodeSet a, UnicodeSet b) { 1164 UnicodeSet target = new UnicodeSet(); 1165 for (UnicodeSetIterator usi = new UnicodeSetIterator(a); usi.next();) { 1166 String s = usi.getString(); 1167 for (UnicodeSetIterator usi2 = new UnicodeSetIterator(b); usi2.next();) { 1168 String s2 = usi2.getString(); 1169 target.add(s + s2); 1170 } 1171 } 1172 return target; 1173 } 1174 } 1175