1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2004, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Mark Davis 7 ********************************************************************** 8 */ 9 10 package org.unicode.cldr.util; 11 12 import java.io.BufferedReader; 13 import java.io.File; 14 import java.io.IOException; 15 import java.io.PrintStream; 16 import java.io.PrintWriter; 17 import java.text.ParseException; 18 import java.text.ParsePosition; 19 import java.util.ArrayList; 20 import java.util.Date; 21 import java.util.Iterator; 22 import java.util.List; 23 import java.util.Map; 24 import java.util.Set; 25 import java.util.TreeMap; 26 import java.util.TreeSet; 27 import java.util.regex.Matcher; 28 29 import org.unicode.cldr.draft.FileUtilities; 30 import org.unicode.cldr.test.ExampleGenerator; 31 import org.unicode.cldr.tool.GenerateAttributeList; 32 33 import com.google.common.base.Joiner; 34 import com.ibm.icu.impl.Relation; 35 import com.ibm.icu.lang.UCharacter; 36 import com.ibm.icu.lang.UProperty; 37 import com.ibm.icu.text.BreakIterator; 38 import com.ibm.icu.text.Collator; 39 import com.ibm.icu.text.RuleBasedBreakIterator; 40 import com.ibm.icu.text.RuleBasedCollator; 41 import com.ibm.icu.text.Transliterator; 42 import com.ibm.icu.text.UTF16; 43 import com.ibm.icu.text.UnicodeSet; 44 import com.ibm.icu.text.UnicodeSetIterator; 45 import com.ibm.icu.util.ULocale; 46 import com.ibm.icu.util.UniversalTimeScale; 47 48 /** 49 * @author davis 50 * 51 * TODO To change the template for this generated type comment go to Window - 52 * Preferences - Java - Code Style - Code Templates 53 */ 54 public class TestUtilities { 55 enum State { 56 a, b, c; 57 public static State cc = c; 58 } 59 60 String s; 61 main(String[] args)62 public static void main(String[] args) throws Exception { 63 try { 64 checkStandardCodes(); 65 if (true) return; 66 testExampleGenerator(); 67 for (String lang : Iso639Data.getAvailable()) { 68 String biblio = Iso639Data.toBiblio3(lang); 69 if (biblio == null) continue; 70 String alpha = Iso639Data.toAlpha3(lang); 71 if (!biblio.equals(alpha)) { 72 System.out.println(lang + "\t\t" + biblio + "\t\t" + alpha); 73 } 74 } 75 System.out.println(State.a + ", " + State.b + ", " + State.c + ", " + State.cc); 76 77 ULocale myLocale = null; 78 String string1 = null, string2 = null; 79 RuleBasedCollator col = (RuleBasedCollator) Collator.getInstance(myLocale); 80 col.setNumericCollation(true); 81 col.compare(string1, string2); // compare strings 82 col.getRawCollationKey(string1, null); // get sort key (for indexing) 83 84 testNames(); 85 testExampleGenerator(); 86 if (true) 87 return; 88 checkNumericTimezone(); 89 90 long foo = UniversalTimeScale.from(new Date().getTime(), UniversalTimeScale.JAVA_TIME); 91 System.out.println("Current Universal Time: " + Long.toString(foo, 16)); 92 System.out.println("LVT_Syllable count: " + new UnicodeSet("[:Hangul_Syllable_Type=LVT_Syllable:]").size()); 93 System.out.println("LV_Syllable count: " + new UnicodeSet("[:Hangul_Syllable_Type=LV_Syllable:]").size()); 94 System.out.println("AC00 value: " 95 + UCharacter.getIntPropertyValue('\uAC00', UProperty.HANGUL_SYLLABLE_TYPE)); 96 // checkTranslit(); 97 // writeMetaData(); 98 // testXMLFileReader(); 99 // testBreakIterator("a\nÿĀ"); 100 101 // checkLanguages(); 102 // printCountries(); 103 // printZoneSamples(); 104 // printCurrencies(); 105 } finally { 106 System.out.println("Done"); 107 } 108 } 109 testNames()110 private static void testNames() { 111 Factory mainCldrFactory = Factory.make(CLDRPaths.COMMON_DIRECTORY + "main" + File.separator, ".*"); 112 CLDRFile english = mainCldrFactory.make("en", true); 113 CLDRFile french = mainCldrFactory.make("fr", true); 114 String[] tests = { "en", "en_AU", "de_CH", "de_Arab_CH", "gsw", "gsw_Arab", "zh_Hans", "zh_Hans_US", 115 "zh_Hans_US_SAAHO" }; 116 for (String test : tests) { 117 System.out.println(test + "\t" + english.getName(test) + "\t" + french.getName(test)); 118 } 119 } 120 testExampleGenerator()121 private static void testExampleGenerator() throws IOException { 122 System.out.println("Creating English CLDRFile"); 123 Factory mainCldrFactory = Factory.make(CLDRPaths.COMMON_DIRECTORY + "main" + File.separator, ".*"); 124 CLDRFile english = mainCldrFactory.make("en", true); 125 System.out.println("Creating Example Generator"); 126 ExampleGenerator englishExampleGenerator = new ExampleGenerator(english, english, 127 CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY); 128 // invoke once 129 System.out.println("Processing paths"); 130 StringBuilder result = new StringBuilder(); 131 Relation<String, String> message_paths = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 132 for (String path : english) { 133 String value = english.getStringValue(path); 134 result.setLength(0); 135 String examples = englishExampleGenerator.getExampleHtml(path, value); 136 if (examples != null) { 137 result.append(examples).append("<hr>"); 138 } 139 String helpText = englishExampleGenerator.getHelpHtml(path, "@"); 140 if (helpText != null) { 141 result.append(helpText).append("<hr>"); 142 } else { 143 System.out.println("No help phrase for " + path); 144 } 145 if (result.length() != 0) { 146 message_paths.put(result.toString(), path + "\t:\t" + value); 147 } else { 148 message_paths.put("\uFFFD<b>NO MESSAGE</b><hr>", path + "\t:\t" + value); 149 } 150 } 151 PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY + "test/", "test_examples.html"); 152 out.println("<html><body><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>"); 153 for (String message : message_paths.keySet()) { 154 Set<String> paths = message_paths.getAll(message); 155 out.println("<p>"); 156 out.println(CldrUtility.join(paths, "<br>" + CldrUtility.LINE_SEPARATOR)); 157 out.println("</p><blockquote>"); 158 out.println(message); 159 out.println("</blockquote>"); 160 } 161 out.println(CldrUtility.ANALYTICS); 162 out.println("</body></html>"); 163 out.close(); 164 } 165 checkNumericTimezone()166 private static void checkNumericTimezone() throws IOException { 167 String[] map_integer_zones = new String[1000]; 168 StandardCodes sc = StandardCodes.make(); 169 Set<String> timezones = new TreeSet<>(sc.getGoodAvailableCodes("tzid")); 170 Map<String, Integer> map_timezone_integer = new TreeMap<>(); 171 BufferedReader input = CldrUtility.getUTF8Data("timezone_numeric.txt"); 172 int maxNumeric = -1; 173 Map<String, String> fixOld = sc.zoneParser.getZoneLinkold_new(); 174 while (true) { 175 String line = input.readLine(); 176 if (line == null) 177 break; 178 String[] parts = line.split(";\\s*"); 179 int numeric = Integer.parseInt(parts[0]); 180 String originalTzid = parts[1].trim(); 181 String fixedID = fixOld.get(originalTzid); 182 if (fixedID == null) { 183 if (!timezones.contains(originalTzid)) { 184 System.out.println(numeric + "\t" + originalTzid + "\tStrange ID: " + fixedID); 185 } 186 fixedID = originalTzid; 187 } else { 188 System.out.println("Replacing " + originalTzid + " with " + fixedID); 189 } 190 if (map_integer_zones[numeric] != null) { 191 System.out.println("Duplicate number:" + numeric + ",\t" + fixedID + ",\t" + originalTzid + ",\t" 192 + map_integer_zones[numeric]); 193 fixedID = "{" + originalTzid + "}"; 194 } 195 if (map_timezone_integer.get(fixedID) != null) { 196 System.out.println("Duplicate zone:" + numeric + ",\t" + fixedID + ",\t" + originalTzid + ",\t" 197 + map_timezone_integer.get(fixedID)); 198 fixedID = "{" + originalTzid + "}"; 199 } 200 map_integer_zones[numeric] = fixedID; 201 map_timezone_integer.put(fixedID, new Integer(numeric)); 202 if (maxNumeric < numeric) 203 maxNumeric = numeric; 204 } 205 // get the differences (and sort them) 206 RuleBasedCollator eng = (RuleBasedCollator) Collator.getInstance(); 207 eng.setNumericCollation(true); 208 209 Set<String> extra = new TreeSet<>(eng); 210 extra.addAll(map_timezone_integer.keySet()); 211 extra.removeAll(timezones); 212 System.out.println("Extra: " + extra); 213 Set<String> needed = new TreeSet<>(eng); 214 needed.addAll(timezones); 215 needed.removeAll(map_timezone_integer.keySet()); 216 System.out.println("Needed: " + needed); 217 218 // fill in the slots with the missing items 219 // make Etc/GMT go first 220 int numeric = 1; 221 List<String> ordered = new ArrayList<>(needed); 222 // if (ordered.contains("Etc/GMT")) { 223 // ordered.remove("Etc/GMT"); 224 // ordered.add(0,"Etc/GMT"); 225 // } 226 227 for (String tzid : ordered) { 228 while (map_integer_zones[numeric] != null) 229 ++numeric; // find first free one 230 if (maxNumeric < numeric) 231 maxNumeric = numeric; 232 map_integer_zones[numeric] = tzid; 233 map_timezone_integer.put(tzid, new Integer(numeric)); 234 } 235 236 // print it out 237 Map<String, Set<String>> equiv = sc.zoneParser.getZoneLinkNew_OldSet(); 238 Set<String> old = new TreeSet<>(); 239 for (int i = 1; i <= maxNumeric; ++i) { 240 Set<String> s = equiv.get(map_integer_zones[i]); 241 if (s != null) { 242 old.clear(); 243 old.addAll(s); 244 } 245 System.out.println("\t\"" + map_integer_zones[i] + "\","); 246 } 247 } 248 checkTranslit()249 private static void checkTranslit() { 250 251 for (int i = 0; i < 0xFFFF; ++i) { 252 checkTranslit(UTF16.valueOf(i)); 253 } 254 PrintStream out = System.out; 255 Transliterator toHTML = TransliteratorUtilities.toHTML; 256 UnicodeSet a_out = new UnicodeSet("[:whitespace:]"); 257 for (UnicodeSetIterator it = new UnicodeSetIterator(a_out); it.next();) { 258 int s = it.codepoint; 259 String literal = toHTML.transliterate(UTF16.valueOf(s)); 260 out.println(com.ibm.icu.impl.Utility.hex(s, 4) + " (" + literal + ") " + UCharacter.getName(s)); 261 } 262 } 263 checkTranslit(String string)264 private static void checkTranslit(String string) { 265 String html = TransliteratorUtilities.toHTML.transliterate(string); 266 String reverse = TransliteratorUtilities.fromHTML.transliterate(html); 267 if (!reverse.equals(string)) 268 System.out 269 .println(string + "\t=>\t" + html + "\t=>\t" + reverse + (!reverse.equals(string) ? " FAIL" : "")); 270 String htmlAscii = TransliteratorUtilities.toHTMLAscii.transliterate(string); 271 String reverseAscii = TransliteratorUtilities.fromHTML.transliterate(htmlAscii); 272 if (!reverseAscii.equals(string)) 273 System.out.println(string + "\t=>\t" + htmlAscii + "\t=>\t" + reverseAscii 274 + (!reverseAscii.equals(string) ? " FAIL" : "")); 275 } 276 writeMetaData()277 private static void writeMetaData() throws IOException { 278 CLDRFile meta = SimpleFactory.makeFile("metaData").setNonInheriting(true); 279 String[] elements = new String[] { "ldml", "identity", "alias", "localeDisplayNames", "layout", "characters", 280 "delimiters", "measurement", "dates", "numbers", "collations", "posix", 281 "segmentations", "references", "version", "generation", "language", "script", "territory", "variant", 282 "languages", "scripts", "territories", "variants", "keys", "types", 283 "measurementSystemNames", "key", "type", "measurementSystemName", "orientation", "inList", 284 "exemplarCharacters", "mapping", "quotationStart", "quotationEnd", "alternateQuotationStart", 285 "alternateQuotationEnd", "measurementSystem", "paperSize", "height", "width", "localizedPatternChars", 286 "calendars", "timeZoneNames", "months", "monthNames", "monthAbbr", "days", "dayNames", 287 "dayAbbr", "quarters", "week", "am", "pm", "eras", "dateFormats", "timeFormats", "dateTimeFormats", 288 "fields", "month", "day", "quarter", "minDays", "firstDay", "weekendStart", "weekendEnd", 289 "eraNames", "eraAbbr", "era", "pattern", "displayName", "dateFormatItem", "appendItem", "hourFormat", 290 "hoursFormat", "gmtFormat", "regionFormat", "fallbackFormat", "abbreviationFallback", 291 "preferenceOrdering", "singleCountries", "default", "calendar", "monthContext", "monthWidth", "dayContext", 292 "dayWidth", "quarterContext", "quarterWidth", "dateFormatLength", "dateFormat", 293 "timeFormatLength", "timeFormat", "dateTimeFormatLength", "availableFormats", "appendItems", 294 "dateTimeFormat", "zone", "metazone", "long", "short", "usesMetazone", "exemplarCity", "generic", 295 "standard", "daylight", "field", "relative", "symbols", "decimalFormats", "scientificFormats", 296 "percentFormats", "currencyFormats", "currencies", "decimalFormatLength", "decimalFormat", 297 "scientificFormatLength", "scientificFormat", "percentFormatLength", "percentFormat", "currencySpacing", 298 "currencyFormatLength", "beforeCurrency", "afterCurrency", "currencyMatch", 299 "surroundingMatch", "insertBetween", "currencyFormat", "currency", "symbol", "decimal", "group", "list", 300 "percentSign", "nativeZeroDigit", "patternDigit", "plusSign", "minusSign", 301 "exponential", "perMille", "infinity", "nan", "collation", "messages", "yesstr", "nostr", "yesexpr", 302 "noexpr", "segmentation", "variables", "segmentRules", "special", "variable", "rule", 303 "comment", 304 // collation 305 "base", "settings", "suppress_contractions", "optimize", "rules" }; 306 String list = String.join(" ", elements); 307 String prefix = "//supplementalData[@version=\"1.4\"]/metaData/"; 308 meta.add(prefix + "elementOrder", list); 309 310 String[] attOrder = new String[] { "_q", 311 "type", 312 // always after 313 "key", "registry", "source", "target", "path", "day", "date", "version", "count", "lines", "characters", 314 "before", "from", "to", "number", "time", "casing", "list", "uri", "iso4217", 315 "digits", "rounding", "iso3166", "hex", "id", "request", "direction", 316 // collation stuff 317 "alternate", "backwards", "caseFirst", "caseLevel", "hiraganaQuarternary", "hiraganaQuaternary", 318 "normalization", "numeric", "strength", 319 // always near the end 320 "validSubLocales", "standard", "references", "elements", "element", "attributes", "attribute", 321 // these are always at the end 322 "alt", "draft", }; 323 meta.add(prefix + "attributeOrder", String.join(" ", attOrder)); 324 325 String[] serialElements = new String[] { "variable", "comment", 326 "tRule", 327 // collation 328 "reset", "p", "pc", "s", "sc", "t", "tc", "i", "ic", "x", "extend", "first_variable", "last_variable", 329 "first_tertiary_ignorable", "last_tertiary_ignorable", 330 "first_secondary_ignorable", "last_secondary_ignorable", "first_primary_ignorable", 331 "last_primary_ignorable", "first_non_ignorable", "last_non_ignorable", "first_trailing", "last_trailing" }; 332 meta.add(prefix + "serialElements", String.join(" ", serialElements)); 333 /* 334 * 335 * <attributeValues elements="weekendStart weekendEnd" attributes="day" 336 * order="given"> sun mon tue wed thu fri sat</attributeValues> 337 * 338 * if (attribute.equals("day")) { // && (element.startsWith("weekend") comp = 339 * dayValueOrder; } else if (attribute.equals("type")) { 340 * 341 * else if (element.equals("day")) comp = dayValueOrder; 342 * 343 * else if (element.equals("zone")) comp = zoneOrder; 344 */ 345 Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); 346 GenerateAttributeList attributes = new GenerateAttributeList(cldrFactory); 347 Map<String, Map<String, Set<String>[]>> element_attribute_valueSet = attributes.getElement_attribute_valueSet(); 348 for (Iterator<String> it = element_attribute_valueSet.keySet().iterator(); it.hasNext();) { 349 String element = it.next(); 350 Map<String, Set<String>[]> attribute_valueSet = element_attribute_valueSet.get(element); 351 int size = attribute_valueSet.size(); 352 if (size == 0) 353 continue; 354 for (Iterator<String> it2 = attribute_valueSet.keySet().iterator(); it2.hasNext();) { 355 String attribute = it2.next(); 356 Set<String>[] valueSets = attribute_valueSet.get(attribute); 357 for (int i = 0; i < 2; ++i) { 358 meta.add(prefix + "valid/attributeValues" + "[@elements=\"" + element + "\"]" + "[@attributes=\"" 359 + attribute + "\"]" + (i == 1 ? "[@x=\"true\"]" : ""), 360 Joiner.on(" ").join(valueSets[i])); 361 } 362 } 363 } 364 365 String[] dayValueOrder = new String[] { "sun", "mon", "tue", "wed", "thu", "fri", "sat" }; 366 meta.add(prefix + "valid/attributeValues[@order=\"given\"][@attributes=\"type\"][@elements=\"" + "day" + "\"]", 367 String.join(" ", dayValueOrder)); 368 meta.add(prefix + "valid/attributeValues[@order=\"given\"][@attributes=\"" + "day" + "\"][@elements=\"" 369 + "firstDay weekendEnd weekendStart" + "\"]", String.join(" ", dayValueOrder)); 370 371 String[] widths = { "monthWidth", "dayWidth", "quarterWidth" }; 372 String[] widthOrder = new String[] { "abbreviated", "narrow", "wide" }; 373 meta.add(prefix + "valid/attributeValues[@order=\"given\"][@attributes=\"type\"][@elements=\"" 374 + String.join(" ", widths) + "\"]", String.join(" ", widthOrder)); 375 376 String[] formatLengths = { "dateFormatLength", "timeFormatLength", "dateTimeFormatLength", 377 "decimalFormatLength", "scientificFormatLength", "percentFormatLength", "currencyFormatLength" }; 378 String[] lengthOrder = new String[] { "full", "long", "medium", "short" }; 379 meta.add(prefix + "valid/attributeValues[@order=\"given\"][@attributes=\"type\"][@elements=\"" 380 + String.join(" ", formatLengths) + "\"]", String.join(" ", lengthOrder)); 381 382 String[] dateFieldOrder = new String[] { "era", "year", "month", "week", "day", "weekday", "dayperiod", "hour", 383 "minute", "second", "zone" }; 384 meta.add(prefix + "valid/attributeValues[@order=\"given\"][@attributes=\"type\"][@elements=\"field\"]", 385 String.join(" ", dateFieldOrder)); 386 387 String[][] suppressData = { { "ldml", "version", "*" }, { "orientation", "characters", "left-to-right" }, 388 { "orientation", "lines", "top-to-bottom" }, { "weekendStart", "time", "00:00" }, 389 { "weekendEnd", "time", "24:00" }, { "dateFormat", "type", "standard" }, 390 { "timeFormat", "type", "standard" }, { "dateTimeFormat", "type", "standard" }, 391 { "decimalFormat", "type", "standard" }, { "scientificFormat", "type", "standard" }, 392 { "percentFormat", "type", "standard" }, { "currencyFormat", "type", "standard" }, 393 { "pattern", "type", "standard" }, { "currency", "type", "standard" }, { "collation", "type", "standard" }, 394 { "*", "_q", "*" }, }; 395 for (int i = 0; i < suppressData.length; ++i) { 396 meta.add(prefix + "suppress/attributes" + "[@element=\"" + suppressData[i][0] + "\"][@attribute=\"" 397 + suppressData[i][1] + "\"][@attributeValue=\"" + suppressData[i][2] + "\"]", ""); 398 } 399 // write out and look at 400 PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY + "meta/", "metaData.xml"); 401 meta.write(out); 402 out.close(); 403 XMLFileReader xfr = new XMLFileReader().setHandler(new MyHandler()); 404 xfr.read(CLDRPaths.GEN_DIRECTORY + "meta/metaData.xml", XMLFileReader.CONTENT_HANDLER 405 | XMLFileReader.ERROR_HANDLER, false); 406 } 407 testXMLFileReader()408 private static void testXMLFileReader() { 409 XMLFileReader xfr = new XMLFileReader().setHandler(new MyHandler()); 410 xfr.read(CLDRPaths.MAIN_DIRECTORY + "root.xml", -1, true); 411 } 412 413 static class MyHandler extends XMLFileReader.SimpleHandler { 414 415 @Override handleAttributeDecl(String eName, String aName, String type, String mode, String value)416 public void handleAttributeDecl(String eName, String aName, String type, String mode, String value) { 417 System.out.println("eName: " + eName + ",\t aName: " + aName + ",\t type: " + type + ",\t mode: " + mode 418 + ",\t value: " + value); 419 } 420 421 @Override handleElementDecl(String name, String model)422 public void handleElementDecl(String name, String model) { 423 System.out.println("name: " + name + ",\t model: " + model); 424 } 425 426 @Override handlePathValue(String path, String value)427 public void handlePathValue(String path, String value) { 428 System.out.println("path: " + path + ",\t value: " + value); 429 } 430 431 @Override handleComment(String path, String comment)432 public void handleComment(String path, String comment) { 433 System.out.println("path: " + path + ",\t comment: " + comment); 434 } 435 436 } 437 testBreakIterator(String text)438 public static void testBreakIterator(String text) { 439 System.out.println(text); 440 String choice = "Line"; 441 442 String BASE_RULES = "'<' > '<' ;" + "'<' < '&'[lL][Tt]';' ;" + "'&' > '&' ;" 443 + "'&' < '&'[aA][mM][pP]';' ;" + "'>' < '&'[gG][tT]';' ;" + "'\"' < '&'[qQ][uU][oO][tT]';' ; " 444 + "'' < '&'[aA][pP][oO][sS]';' ; "; 445 446 String CONTENT_RULES = "'>' > '>' ;"; 447 448 String HTML_RULES = BASE_RULES + CONTENT_RULES + "'\"' > '"' ; "; 449 450 String HTML_RULES_CONTROLS = HTML_RULES 451 + "([[:C:][:Z:][:whitespace:][:Default_Ignorable_Code_Point:][\\u0080-\\U0010FFFF]]) > &hex/xml($1) ; "; 452 453 Transliterator toHTML = Transliterator.createFromRules("any-xml", HTML_RULES_CONTROLS, Transliterator.FORWARD); 454 455 RuleBasedBreakIterator b; 456 if (choice.equals("Word")) 457 b = (RuleBasedBreakIterator) BreakIterator.getWordInstance(); 458 else if (choice.equals("Line")) 459 b = (RuleBasedBreakIterator) BreakIterator.getLineInstance(); 460 else if (choice.equals("Sentence")) 461 b = (RuleBasedBreakIterator) BreakIterator.getSentenceInstance(); 462 else 463 b = (RuleBasedBreakIterator) BreakIterator.getCharacterInstance(); 464 465 Matcher decimalEscapes = PatternCache.get("&#(x?)([0-9]+);").matcher(text); 466 // quick hack, since hex-any doesn't do decimal escapes 467 int start = 0; 468 StringBuffer result2 = new StringBuffer(); 469 while (decimalEscapes.find(start)) { 470 int radix = decimalEscapes.group(2).length() == 0 ? 10 : 16; 471 int code = Integer.parseInt(decimalEscapes.group(2), radix); 472 result2.append(text.substring(start, decimalEscapes.start()) + UTF16.valueOf(code)); 473 start = decimalEscapes.end(); 474 } 475 result2.append(text.substring(start)); 476 text = result2.toString(); 477 478 int lastBreak = 0; 479 StringBuffer result = new StringBuffer(); 480 b.setText(text); 481 b.first(); 482 for (int nextBreak = b.next(); nextBreak != BreakIterator.DONE; nextBreak = b.next()) { 483 b.getRuleStatus(); 484 String piece = text.substring(lastBreak, nextBreak); 485 piece = toHTML.transliterate(piece); 486 piece = piece.replaceAll("
", "<br>"); 487 result.append("<span class='break'>").append(piece).append("</span>"); 488 lastBreak = nextBreak; 489 } 490 491 System.out.println(result); 492 } 493 checkStandardCodes()494 private static void checkStandardCodes() { 495 StandardCodes sc = StandardCodes.make(); 496 showCodes(sc, "language"); 497 showCodes(sc, "script"); 498 showCodes(sc, "territory"); 499 showCodes(sc, "tzid"); 500 showCodes(sc, "currency"); 501 502 Map<String, Map<String, Map<String, String>>> m = StandardCodes.getLStreg(); 503 // print lstreg first 504 if (false) { 505 System.out.println("Printing Data"); 506 for (Iterator it = m.keySet().iterator(); it.hasNext();) { 507 String type = (String) it.next(); 508 Map subtagData = m.get(type); 509 for (Iterator it2 = subtagData.keySet().iterator(); it2.hasNext();) { 510 String subtag = (String) it2.next(); 511 Map labelData = (Map) subtagData.get(subtag); 512 System.out.println(type + "\t " + subtag + "\t " + labelData); 513 } 514 } 515 } 516 for (Iterator<String> it = m.keySet().iterator(); it.hasNext();) { 517 String type = it.next(); 518 Map<String, Map<String, String>> subtagData = m.get(type); 519 520 String oldType = type.equals("region") ? "territory" : type; 521 Set<String> allCodes = sc.getAvailableCodes(oldType); 522 Set<String> temp = new TreeSet<>(subtagData.keySet()); 523 temp.removeAll(allCodes); 524 System.out.println(type + "\t in new but not old\t" + temp); 525 526 temp = new TreeSet<>(allCodes); 527 temp.removeAll(subtagData.keySet()); 528 System.out.println(type + "\t in old but not new\t" + temp); 529 } 530 for (Iterator<String> it = m.keySet().iterator(); it.hasNext();) { 531 String type = it.next(); 532 Map<String, Map<String, String>> subtagData = m.get(type); 533 String oldType = type.equals("region") ? "territory" : type; 534 Set<String> goodCodes = sc.getGoodAvailableCodes(oldType); 535 536 for (Iterator<String> it2 = subtagData.keySet().iterator(); it2.hasNext();) { 537 String tag = it2.next(); 538 Map<String, String> data = subtagData.get(tag); 539 List<String> sdata = sc.getFullData(oldType, tag); 540 if (sdata == null) { 541 if (true) 542 continue; 543 System.out.println("new in ltru"); 544 System.out.println("\t" + type + "\t" + tag + "\t" + data); 545 continue; 546 } 547 String description = sdata.get(0); 548 boolean deprecated = !goodCodes.contains(tag); 549 if (description.equalsIgnoreCase("PRIVATE USE")) { 550 // description = ""; 551 deprecated = false; 552 } 553 String newDescription = data.get("Description"); 554 boolean newDeprecated = data.get("Deprecated") != null; 555 if (!description.equals(newDescription)) { 556 System.out.println(type + "\t" + tag + "\tDescriptions differ: {" + description + "} ### {" 557 + newDescription + "}"); 558 } 559 if (deprecated != newDeprecated) { 560 System.out.println(type + "\t" + tag + "\tDeprecated differs: {" + deprecated + "} ### {" 561 + newDeprecated + "}"); 562 } 563 } 564 } 565 // print metadata 566 for (Iterator<String> it = m.keySet().iterator(); it.hasNext();) { 567 String type = it.next(); 568 Map<String, Map<String, String>> subtagData = m.get(type); 569 String oldType = type.equals("region") ? "territory" : type; 570 571 String aliasType =oldType.equals("legacy") ? "language" : oldType; 572 Set<String> allCodes = new TreeSet<>(); 573 Set<String> deprecatedCodes = new TreeSet<>(); 574 575 for (Iterator<String> it2 = subtagData.keySet().iterator(); it2.hasNext();) { 576 String tag = it2.next(); 577 Map<String, String> data = subtagData.get(tag); 578 if (data.get("Deprecated") != null) { 579 String preferred = data.get("Preferred-Value"); 580 String cldr = null != data.get("CLDR") ? "CLDR: " : ""; 581 System.out.println("\t\t\t<" + aliasType + "Alias type=\"" + tag + "\"" 582 + (preferred == null || preferred.length() == 0 ? "" : " replacement=\"" + preferred + "\"") 583 + "/> <!-- " + cldr 584 + data.get("Description") + " -->"); 585 deprecatedCodes.add(tag); 586 } else { 587 allCodes.add(tag); 588 } 589 } 590 // get old ones 591 Set<String> goodCodes = sc.getAvailableCodes(oldType); 592 TreeSet<String> oldAndNotNew = new TreeSet<>(goodCodes); 593 oldAndNotNew.removeAll(allCodes); 594 oldAndNotNew.removeAll(deprecatedCodes); 595 for (Iterator<String> it2 = oldAndNotNew.iterator(); it2.hasNext();) { 596 String tag = it2.next(); 597 List<String> sdata = sc.getFullData(oldType, tag); 598 String preferred = sdata.get(2); 599 System.out.println("\t\t\t<" + aliasType + "Alias type=\"" + tag + "\" replacement=\"" + preferred 600 + "\"/> <!-- CLDR:" + sdata.get(0) + " -->"); 601 } 602 String allCodeString = Joiner.on(" ").join(allCodes); 603 System.out 604 .println("\t\t\t<variable id=\"$" + oldType + "\" type=\"list\">" + allCodeString + "</variable>"); 605 } 606 } 607 showCodes(StandardCodes sc, String type)608 private static void showCodes(StandardCodes sc, String type) { 609 Set<String> codes = sc.getSurveyToolDisplayCodes(type); 610 System.out.println("Survey Tool Codes " + codes.size() + "\t" + type); 611 for (String code : codes) { 612 System.out.println("\t" + code + "\t" + sc.getFullData(type, code)); 613 } 614 } 615 checkLanguages()616 private static void checkLanguages() { 617 // TODO Auto-generated method stub 618 619 Factory mainCldrFactory = Factory.make(CLDRPaths.COMMON_DIRECTORY + "main" + File.separator, ".*"); 620 Set<String> availableLocales = mainCldrFactory.getAvailable(); 621 Set<String> available = new TreeSet<>(); 622 LocaleIDParser lip = new LocaleIDParser(); 623 for (Iterator<String> it = availableLocales.iterator(); it.hasNext();) { 624 available.add(lip.set(it.next()).getLanguage()); 625 } 626 Set<String> langHack = new TreeSet<>(); 627 for (int i = 0; i < language_territory_hack.length; ++i) { 628 String lang = language_territory_hack[i][0]; 629 langHack.add(lang); 630 } 631 if (langHack.containsAll(available)) 632 System.out.println("All ok"); 633 else { 634 available.removeAll(langHack); 635 for (Iterator<String> it = available.iterator(); it.hasNext();) { 636 String item = it.next(); 637 System.out.println("{\"" + item + "\", \"XXX\"},/t//" 638 + ULocale.getDisplayLanguage(item, ULocale.ENGLISH)); 639 } 640 } 641 } 642 643 /** 644 * @throws IOException 645 * 646 */ printCountries()647 private static void printCountries() throws IOException { 648 Factory mainCldrFactory = Factory.make(CLDRPaths.COMMON_DIRECTORY + "main" + File.separator, ".*"); 649 CLDRFile english = mainCldrFactory.make("en", true); 650 PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "country_language_names.txt"); 651 StandardCodes sc = StandardCodes.make(); 652 for (Iterator<String> it = sc.getGoodAvailableCodes("language").iterator(); it.hasNext();) { 653 String code = it.next(); 654 out.println(code + "\t" + english.getName(CLDRFile.LANGUAGE_NAME, code)); 655 } 656 out.println("****"); 657 for (Iterator<String> it = sc.getGoodAvailableCodes("territory").iterator(); it.hasNext();) { 658 String code = it.next(); 659 out.println(code + "\t" + english.getName(CLDRFile.TERRITORY_NAME, code)); 660 } 661 out.println("****"); 662 for (Iterator<String> it = sc.getGoodAvailableCodes("script").iterator(); it.hasNext();) { 663 String code = it.next(); 664 out.println(code + "\t" + english.getName(CLDRFile.SCRIPT_NAME, code)); 665 } 666 out.close(); 667 } 668 669 /** 670 * 671 */ printCurrencies()672 private static void printCurrencies() { 673 StandardCodes sc = StandardCodes.make(); 674 Set<String> s = sc.getAvailableCodes("currency"); 675 for (Iterator<String> it = s.iterator(); it.hasNext();) { 676 String code = it.next(); 677 String name = sc.getData("currency", code); 678 List<String> data = sc.getFullData("currency", code); 679 System.out.println(code + "\t" + name + "\t" + data); 680 } 681 } 682 683 /** 684 * @throws IOException 685 * @throws ParseException 686 * 687 */ printZoneSamples()688 private static void printZoneSamples() throws Exception { 689 String[] locales = { "en", "en_GB", "de", "zh", "hi", "bg", "ru", "ja", "as" // picked 690 // deliberately 691 // because 692 // it 693 // has 694 // few 695 // itesm 696 }; 697 String[] zones = { "America/Los_Angeles", "America/Argentina/Buenos_Aires", "America/Buenos_Aires", 698 "America/Havana", "Australia/ACT", "Australia/Sydney", "Europe/London", "Europe/Moscow", 699 "Etc/GMT+3" }; 700 String[][] fields = { { "2004-01-15T00:00:00Z", "Z", "ZZZZ", "z", "zzzz" }, 701 { "2004-07-15T00:00:00Z", "Z", "ZZZZ", "z", "zzzz", "v", "vvvv" } }; 702 Factory mainCldrFactory = Factory.make(CLDRPaths.COMMON_DIRECTORY + "main" + File.separator, ".*"); 703 PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "timezone_samples.txt"); 704 long[] offsetMillis = new long[1]; 705 ParsePosition parsePosition = new ParsePosition(0); 706 707 for (int i = 0; i < locales.length; ++i) { 708 String locale = locales[i]; 709 TimezoneFormatter tzf = new TimezoneFormatter(mainCldrFactory, locale, false).setSkipDraft(true); 710 for (int j = 0; j < zones.length; ++j) { 711 String zone = zones[j]; 712 for (int k = 0; k < fields.length; ++k) { 713 String type = fields[k][0]; 714 Date datetime = ICUServiceBuilder.isoDateParse(type); 715 for (int m = 1; m < fields[k].length; ++m) { 716 String field = fields[k][m]; 717 String formatted = tzf.getFormattedZone(zone, field, datetime.getTime(), false); 718 parsePosition.setIndex(0); 719 String parsed = tzf.parse(formatted, parsePosition, offsetMillis); 720 if (parsed == null) 721 parsed = "FAILED PARSE"; 722 else if (parsed.length() == 0) 723 parsed = format(offsetMillis[0]); 724 out.println("{\"" + locale + "\",\t\"" + zone + "\",\t\"" + type + "\",\t\"" + field 725 + "\",\t\"" + formatted + "\",\t\"" + parsed + "\"},"); 726 } 727 } 728 out.println(); 729 } 730 out.println("=========="); 731 out.println(); 732 } 733 out.close(); 734 } 735 736 /** 737 * quick & dirty format 738 */ format(long offsetMillis)739 private static String format(long offsetMillis) { 740 offsetMillis /= 60 * 1000; 741 String sign = "+"; 742 if (offsetMillis < 0) { 743 offsetMillis = -offsetMillis; 744 sign = "-"; 745 } 746 return sign + String.valueOf(offsetMillis / 60) + ":" 747 + String.valueOf(100 + (offsetMillis % 60)).substring(1, 3); 748 } 749 750 private static final String[][] language_territory_hack = { { "af", "ZA" }, { "am", "ET" }, { "ar", "SA" }, 751 { "as", "IN" }, { "ay", "PE" }, { "az", "AZ" }, { "bal", "PK" }, { "be", "BY" }, 752 { "bg", "BG" }, { "bn", "IN" }, { "bs", "BA" }, { "ca", "ES" }, { "ch", "MP" }, { "cpe", "SL" }, 753 { "cs", "CZ" }, { "cy", "GB" }, { "da", "DK" }, { "de", "DE" }, { "dv", "MV" }, { "dz", "BT" }, 754 { "el", "GR" }, { "en", "US" }, { "es", "ES" }, { "et", "EE" }, { "eu", "ES" }, { "fa", "IR" }, { "fi", "FI" }, 755 { "fil", "PH" }, { "fj", "FJ" }, { "fo", "FO" }, { "fr", "FR" }, { "ga", "IE" }, 756 { "gd", "GB" }, { "gl", "ES" }, { "gn", "PY" }, { "gu", "IN" }, { "gv", "GB" }, { "ha", "NG" }, { "he", "IL" }, 757 { "hi", "IN" }, { "ho", "PG" }, { "hr", "HR" }, { "ht", "HT" }, { "hu", "HU" }, 758 { "hy", "AM" }, { "id", "ID" }, { "is", "IS" }, { "it", "IT" }, { "ja", "JP" }, { "ka", "GE" }, { "kk", "KZ" }, 759 { "kl", "GL" }, { "km", "KH" }, { "kn", "IN" }, { "ko", "KR" }, { "kok", "IN" }, 760 { "ks", "IN" }, { "ku", "TR" }, { "ky", "KG" }, { "la", "VA" }, { "lb", "LU" }, { "ln", "CG" }, { "lo", "LA" }, 761 { "lt", "LT" }, { "lv", "LV" }, { "mai", "IN" }, { "men", "GN" }, { "mg", "MG" }, 762 { "mh", "MH" }, { "mk", "MK" }, { "ml", "IN" }, { "mn", "MN" }, { "mni", "IN" }, { "mo", "MD" }, 763 { "mr", "IN" }, { "ms", "MY" }, { "mt", "MT" }, { "my", "MM" }, { "na", "NR" }, { "nb", "NO" }, 764 { "nd", "ZA" }, { "ne", "NP" }, { "niu", "NU" }, { "nl", "NL" }, { "nn", "NO" }, { "no", "NO" }, 765 { "nr", "ZA" }, { "nso", "ZA" }, { "ny", "MW" }, { "om", "KE" }, { "or", "IN" }, { "pa", "IN" }, 766 { "pau", "PW" }, { "pl", "PL" }, { "ps", "PK" }, { "pt", "BR" }, { "qu", "PE" }, { "rn", "BI" }, 767 { "ro", "RO" }, { "ru", "RU" }, { "rw", "RW" }, { "sd", "IN" }, { "sg", "CF" }, { "si", "LK" }, 768 { "sk", "SK" }, { "sl", "SI" }, { "sm", "WS" }, { "so", "DJ" }, { "sq", "CS" }, { "sr", "CS" }, { "ss", "ZA" }, 769 { "st", "ZA" }, { "sv", "SE" }, { "sw", "KE" }, { "ta", "IN" }, { "te", "IN" }, 770 { "tem", "SL" }, { "tet", "TL" }, { "th", "TH" }, { "ti", "ET" }, { "tg", "TJ" }, { "tk", "TM" }, 771 { "tkl", "TK" }, { "tvl", "TV" }, { "tl", "PH" }, { "tn", "ZA" }, { "to", "TO" }, 772 { "tpi", "PG" }, { "tr", "TR" }, { "ts", "ZA" }, { "uk", "UA" }, { "ur", "IN" }, { "uz", "UZ" }, 773 { "ve", "ZA" }, { "vi", "VN" }, { "wo", "SN" }, { "xh", "ZA" }, { "zh", "CN" }, 774 { "zh_Hant", "TW" }, { "zu", "ZA" }, { "aa", "ET" }, { "byn", "ER" }, { "eo", "DE" }, { "gez", "ET" }, 775 { "haw", "US" }, { "iu", "CA" }, { "kw", "GB" }, { "sa", "IN" }, { "sh", "HR" }, 776 { "sid", "ET" }, { "syr", "SY" }, { "tig", "ER" }, { "tt", "RU" }, { "wal", "ET" }, }; 777 778 } 779