• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  **********************************************************************
3  * Copyright (c) 2002-2004, International Business Machines
4  * Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  * Author: Mark Davis
7  **********************************************************************
8  */
9 
10 package org.unicode.cldr.util;
11 
12 import java.io.BufferedReader;
13 import java.io.File;
14 import java.io.IOException;
15 import java.io.PrintStream;
16 import java.io.PrintWriter;
17 import java.text.ParseException;
18 import java.text.ParsePosition;
19 import java.util.ArrayList;
20 import java.util.Date;
21 import java.util.Iterator;
22 import java.util.List;
23 import java.util.Map;
24 import java.util.Set;
25 import java.util.TreeMap;
26 import java.util.TreeSet;
27 import java.util.regex.Matcher;
28 
29 import org.unicode.cldr.draft.FileUtilities;
30 import org.unicode.cldr.test.ExampleGenerator;
31 import org.unicode.cldr.tool.GenerateAttributeList;
32 
33 import com.google.common.base.Joiner;
34 import com.ibm.icu.impl.Relation;
35 import com.ibm.icu.lang.UCharacter;
36 import com.ibm.icu.lang.UProperty;
37 import com.ibm.icu.text.BreakIterator;
38 import com.ibm.icu.text.Collator;
39 import com.ibm.icu.text.RuleBasedBreakIterator;
40 import com.ibm.icu.text.RuleBasedCollator;
41 import com.ibm.icu.text.Transliterator;
42 import com.ibm.icu.text.UTF16;
43 import com.ibm.icu.text.UnicodeSet;
44 import com.ibm.icu.text.UnicodeSetIterator;
45 import com.ibm.icu.util.ULocale;
46 import com.ibm.icu.util.UniversalTimeScale;
47 
48 /**
49  * @author davis
50  *
51  *         TODO To change the template for this generated type comment go to Window -
52  *         Preferences - Java - Code Style - Code Templates
53  */
54 public class TestUtilities {
55     enum State {
56         a, b, c;
57         public static State cc = c;
58     }
59 
60     String s;
61 
main(String[] args)62     public static void main(String[] args) throws Exception {
63         try {
64             checkStandardCodes();
65             if (true) return;
66             testExampleGenerator();
67             for (String lang : Iso639Data.getAvailable()) {
68                 String biblio = Iso639Data.toBiblio3(lang);
69                 if (biblio == null) continue;
70                 String alpha = Iso639Data.toAlpha3(lang);
71                 if (!biblio.equals(alpha)) {
72                     System.out.println(lang + "\t\t" + biblio + "\t\t" + alpha);
73                 }
74             }
75             System.out.println(State.a + ", " + State.b + ", " + State.c + ", " + State.cc);
76 
77             ULocale myLocale = null;
78             String string1 = null, string2 = null;
79             RuleBasedCollator col = (RuleBasedCollator) Collator.getInstance(myLocale);
80             col.setNumericCollation(true);
81             col.compare(string1, string2); // compare strings
82             col.getRawCollationKey(string1, null); // get sort key (for indexing)
83 
84             testNames();
85             testExampleGenerator();
86             if (true)
87                 return;
88             checkNumericTimezone();
89 
90             long foo = UniversalTimeScale.from(new Date().getTime(), UniversalTimeScale.JAVA_TIME);
91             System.out.println("Current Universal Time: " + Long.toString(foo, 16));
92             System.out.println("LVT_Syllable count: " + new UnicodeSet("[:Hangul_Syllable_Type=LVT_Syllable:]").size());
93             System.out.println("LV_Syllable count: " + new UnicodeSet("[:Hangul_Syllable_Type=LV_Syllable:]").size());
94             System.out.println("AC00 value: "
95                 + UCharacter.getIntPropertyValue('\uAC00', UProperty.HANGUL_SYLLABLE_TYPE));
96             // checkTranslit();
97             // writeMetaData();
98             // testXMLFileReader();
99             // testBreakIterator("a\nÿĀ");
100 
101             // checkLanguages();
102             // printCountries();
103             // printZoneSamples();
104             // printCurrencies();
105         } finally {
106             System.out.println("Done");
107         }
108     }
109 
testNames()110     private static void testNames() {
111         Factory mainCldrFactory = Factory.make(CLDRPaths.COMMON_DIRECTORY + "main" + File.separator, ".*");
112         CLDRFile english = mainCldrFactory.make("en", true);
113         CLDRFile french = mainCldrFactory.make("fr", true);
114         String[] tests = { "en", "en_AU", "de_CH", "de_Arab_CH", "gsw", "gsw_Arab", "zh_Hans", "zh_Hans_US",
115             "zh_Hans_US_SAAHO" };
116         for (String test : tests) {
117             System.out.println(test + "\t" + english.getName(test) + "\t" + french.getName(test));
118         }
119     }
120 
testExampleGenerator()121     private static void testExampleGenerator() throws IOException {
122         System.out.println("Creating English CLDRFile");
123         Factory mainCldrFactory = Factory.make(CLDRPaths.COMMON_DIRECTORY + "main" + File.separator, ".*");
124         CLDRFile english = mainCldrFactory.make("en", true);
125         System.out.println("Creating Example Generator");
126         ExampleGenerator englishExampleGenerator = new ExampleGenerator(english, english,
127             CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY);
128         // invoke once
129         System.out.println("Processing paths");
130         StringBuilder result = new StringBuilder();
131         Relation<String, String> message_paths = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
132         for (String path : english) {
133             String value = english.getStringValue(path);
134             result.setLength(0);
135             String examples = englishExampleGenerator.getExampleHtml(path, value);
136             if (examples != null) {
137                 result.append(examples).append("<hr>");
138             }
139             String helpText = englishExampleGenerator.getHelpHtml(path, "@");
140             if (helpText != null) {
141                 result.append(helpText).append("<hr>");
142             } else {
143                 System.out.println("No help phrase for " + path);
144             }
145             if (result.length() != 0) {
146                 message_paths.put(result.toString(), path + "\t:\t" + value);
147             } else {
148                 message_paths.put("\uFFFD<b>NO MESSAGE</b><hr>", path + "\t:\t" + value);
149             }
150         }
151         PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY + "test/", "test_examples.html");
152         out.println("<html><body><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
153         for (String message : message_paths.keySet()) {
154             Set<String> paths = message_paths.getAll(message);
155             out.println("<p>");
156             out.println(CldrUtility.join(paths, "<br>" + CldrUtility.LINE_SEPARATOR));
157             out.println("</p><blockquote>");
158             out.println(message);
159             out.println("</blockquote>");
160         }
161         out.println(CldrUtility.ANALYTICS);
162         out.println("</body></html>");
163         out.close();
164     }
165 
checkNumericTimezone()166     private static void checkNumericTimezone() throws IOException {
167         String[] map_integer_zones = new String[1000];
168         StandardCodes sc = StandardCodes.make();
169         Set<String> timezones = new TreeSet<>(sc.getGoodAvailableCodes("tzid"));
170         Map<String, Integer> map_timezone_integer = new TreeMap<>();
171         BufferedReader input = CldrUtility.getUTF8Data("timezone_numeric.txt");
172         int maxNumeric = -1;
173         Map<String, String> fixOld = sc.zoneParser.getZoneLinkold_new();
174         while (true) {
175             String line = input.readLine();
176             if (line == null)
177                 break;
178             String[] parts = line.split(";\\s*");
179             int numeric = Integer.parseInt(parts[0]);
180             String originalTzid = parts[1].trim();
181             String fixedID = fixOld.get(originalTzid);
182             if (fixedID == null) {
183                 if (!timezones.contains(originalTzid)) {
184                     System.out.println(numeric + "\t" + originalTzid + "\tStrange ID: " + fixedID);
185                 }
186                 fixedID = originalTzid;
187             } else {
188                 System.out.println("Replacing " + originalTzid + " with " + fixedID);
189             }
190             if (map_integer_zones[numeric] != null) {
191                 System.out.println("Duplicate number:" + numeric + ",\t" + fixedID + ",\t" + originalTzid + ",\t"
192                     + map_integer_zones[numeric]);
193                 fixedID = "{" + originalTzid + "}";
194             }
195             if (map_timezone_integer.get(fixedID) != null) {
196                 System.out.println("Duplicate zone:" + numeric + ",\t" + fixedID + ",\t" + originalTzid + ",\t"
197                     + map_timezone_integer.get(fixedID));
198                 fixedID = "{" + originalTzid + "}";
199             }
200             map_integer_zones[numeric] = fixedID;
201             map_timezone_integer.put(fixedID, new Integer(numeric));
202             if (maxNumeric < numeric)
203                 maxNumeric = numeric;
204         }
205         // get the differences (and sort them)
206         RuleBasedCollator eng = (RuleBasedCollator) Collator.getInstance();
207         eng.setNumericCollation(true);
208 
209         Set<String> extra = new TreeSet<>(eng);
210         extra.addAll(map_timezone_integer.keySet());
211         extra.removeAll(timezones);
212         System.out.println("Extra: " + extra);
213         Set<String> needed = new TreeSet<>(eng);
214         needed.addAll(timezones);
215         needed.removeAll(map_timezone_integer.keySet());
216         System.out.println("Needed: " + needed);
217 
218         // fill in the slots with the missing items
219         // make Etc/GMT go first
220         int numeric = 1;
221         List<String> ordered = new ArrayList<>(needed);
222         // if (ordered.contains("Etc/GMT")) {
223         // ordered.remove("Etc/GMT");
224         // ordered.add(0,"Etc/GMT");
225         // }
226 
227         for (String tzid : ordered) {
228             while (map_integer_zones[numeric] != null)
229                 ++numeric; // find first free one
230             if (maxNumeric < numeric)
231                 maxNumeric = numeric;
232             map_integer_zones[numeric] = tzid;
233             map_timezone_integer.put(tzid, new Integer(numeric));
234         }
235 
236         // print it out
237         Map<String, Set<String>> equiv = sc.zoneParser.getZoneLinkNew_OldSet();
238         Set<String> old = new TreeSet<>();
239         for (int i = 1; i <= maxNumeric; ++i) {
240             Set<String> s = equiv.get(map_integer_zones[i]);
241             if (s != null) {
242                 old.clear();
243                 old.addAll(s);
244             }
245             System.out.println("\t\"" + map_integer_zones[i] + "\",");
246         }
247     }
248 
checkTranslit()249     private static void checkTranslit() {
250 
251         for (int i = 0; i < 0xFFFF; ++i) {
252             checkTranslit(UTF16.valueOf(i));
253         }
254         PrintStream out = System.out;
255         Transliterator toHTML = TransliteratorUtilities.toHTML;
256         UnicodeSet a_out = new UnicodeSet("[:whitespace:]");
257         for (UnicodeSetIterator it = new UnicodeSetIterator(a_out); it.next();) {
258             int s = it.codepoint;
259             String literal = toHTML.transliterate(UTF16.valueOf(s));
260             out.println(com.ibm.icu.impl.Utility.hex(s, 4) + " (" + literal + ") " + UCharacter.getName(s));
261         }
262     }
263 
checkTranslit(String string)264     private static void checkTranslit(String string) {
265         String html = TransliteratorUtilities.toHTML.transliterate(string);
266         String reverse = TransliteratorUtilities.fromHTML.transliterate(html);
267         if (!reverse.equals(string))
268             System.out
269                 .println(string + "\t=>\t" + html + "\t=>\t" + reverse + (!reverse.equals(string) ? " FAIL" : ""));
270         String htmlAscii = TransliteratorUtilities.toHTMLAscii.transliterate(string);
271         String reverseAscii = TransliteratorUtilities.fromHTML.transliterate(htmlAscii);
272         if (!reverseAscii.equals(string))
273             System.out.println(string + "\t=>\t" + htmlAscii + "\t=>\t" + reverseAscii
274                 + (!reverseAscii.equals(string) ? " FAIL" : ""));
275     }
276 
writeMetaData()277     private static void writeMetaData() throws IOException {
278         CLDRFile meta = SimpleFactory.makeFile("metaData").setNonInheriting(true);
279         String[] elements = new String[] { "ldml", "identity", "alias", "localeDisplayNames", "layout", "characters",
280             "delimiters", "measurement", "dates", "numbers", "collations", "posix",
281             "segmentations", "references", "version", "generation", "language", "script", "territory", "variant",
282             "languages", "scripts", "territories", "variants", "keys", "types",
283             "measurementSystemNames", "key", "type", "measurementSystemName", "orientation", "inList",
284             "exemplarCharacters", "mapping", "quotationStart", "quotationEnd", "alternateQuotationStart",
285             "alternateQuotationEnd", "measurementSystem", "paperSize", "height", "width", "localizedPatternChars",
286             "calendars", "timeZoneNames", "months", "monthNames", "monthAbbr", "days", "dayNames",
287             "dayAbbr", "quarters", "week", "am", "pm", "eras", "dateFormats", "timeFormats", "dateTimeFormats",
288             "fields", "month", "day", "quarter", "minDays", "firstDay", "weekendStart", "weekendEnd",
289             "eraNames", "eraAbbr", "era", "pattern", "displayName", "dateFormatItem", "appendItem", "hourFormat",
290             "hoursFormat", "gmtFormat", "regionFormat", "fallbackFormat", "abbreviationFallback",
291             "preferenceOrdering", "singleCountries", "default", "calendar", "monthContext", "monthWidth", "dayContext",
292             "dayWidth", "quarterContext", "quarterWidth", "dateFormatLength", "dateFormat",
293             "timeFormatLength", "timeFormat", "dateTimeFormatLength", "availableFormats", "appendItems",
294             "dateTimeFormat", "zone", "metazone", "long", "short", "usesMetazone", "exemplarCity", "generic",
295             "standard", "daylight", "field", "relative", "symbols", "decimalFormats", "scientificFormats",
296             "percentFormats", "currencyFormats", "currencies", "decimalFormatLength", "decimalFormat",
297             "scientificFormatLength", "scientificFormat", "percentFormatLength", "percentFormat", "currencySpacing",
298             "currencyFormatLength", "beforeCurrency", "afterCurrency", "currencyMatch",
299             "surroundingMatch", "insertBetween", "currencyFormat", "currency", "symbol", "decimal", "group", "list",
300             "percentSign", "nativeZeroDigit", "patternDigit", "plusSign", "minusSign",
301             "exponential", "perMille", "infinity", "nan", "collation", "messages", "yesstr", "nostr", "yesexpr",
302             "noexpr", "segmentation", "variables", "segmentRules", "special", "variable", "rule",
303             "comment",
304             // collation
305             "base", "settings", "suppress_contractions", "optimize", "rules" };
306         String list = String.join(" ", elements);
307         String prefix = "//supplementalData[@version=\"1.4\"]/metaData/";
308         meta.add(prefix + "elementOrder", list);
309 
310         String[] attOrder = new String[] { "_q",
311             "type",
312             // always after
313             "key", "registry", "source", "target", "path", "day", "date", "version", "count", "lines", "characters",
314             "before", "from", "to", "number", "time", "casing", "list", "uri", "iso4217",
315             "digits", "rounding", "iso3166", "hex", "id", "request", "direction",
316             // collation stuff
317             "alternate", "backwards", "caseFirst", "caseLevel", "hiraganaQuarternary", "hiraganaQuaternary",
318             "normalization", "numeric", "strength",
319             // always near the end
320             "validSubLocales", "standard", "references", "elements", "element", "attributes", "attribute",
321             // these are always at the end
322             "alt", "draft", };
323         meta.add(prefix + "attributeOrder", String.join(" ", attOrder));
324 
325         String[] serialElements = new String[] { "variable", "comment",
326             "tRule",
327             // collation
328             "reset", "p", "pc", "s", "sc", "t", "tc", "i", "ic", "x", "extend", "first_variable", "last_variable",
329             "first_tertiary_ignorable", "last_tertiary_ignorable",
330             "first_secondary_ignorable", "last_secondary_ignorable", "first_primary_ignorable",
331             "last_primary_ignorable", "first_non_ignorable", "last_non_ignorable", "first_trailing", "last_trailing" };
332         meta.add(prefix + "serialElements", String.join(" ", serialElements));
333         /*
334          *
335          * <attributeValues elements="weekendStart weekendEnd" attributes="day"
336          * order="given"> sun mon tue wed thu fri sat</attributeValues>
337          *
338          * if (attribute.equals("day")) { // && (element.startsWith("weekend") comp =
339          * dayValueOrder; } else if (attribute.equals("type")) {
340          *
341          * else if (element.equals("day")) comp = dayValueOrder;
342          *
343          * else if (element.equals("zone")) comp = zoneOrder;
344          */
345         Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
346         GenerateAttributeList attributes = new GenerateAttributeList(cldrFactory);
347         Map<String, Map<String, Set<String>[]>> element_attribute_valueSet = attributes.getElement_attribute_valueSet();
348         for (Iterator<String> it = element_attribute_valueSet.keySet().iterator(); it.hasNext();) {
349             String element = it.next();
350             Map<String, Set<String>[]> attribute_valueSet = element_attribute_valueSet.get(element);
351             int size = attribute_valueSet.size();
352             if (size == 0)
353                 continue;
354             for (Iterator<String> it2 = attribute_valueSet.keySet().iterator(); it2.hasNext();) {
355                 String attribute = it2.next();
356                 Set<String>[] valueSets = attribute_valueSet.get(attribute);
357                 for (int i = 0; i < 2; ++i) {
358                     meta.add(prefix + "valid/attributeValues" + "[@elements=\"" + element + "\"]" + "[@attributes=\""
359                         + attribute + "\"]" + (i == 1 ? "[@x=\"true\"]" : ""),
360                         Joiner.on(" ").join(valueSets[i]));
361                 }
362             }
363         }
364 
365         String[] dayValueOrder = new String[] { "sun", "mon", "tue", "wed", "thu", "fri", "sat" };
366         meta.add(prefix + "valid/attributeValues[@order=\"given\"][@attributes=\"type\"][@elements=\"" + "day" + "\"]",
367             String.join(" ", dayValueOrder));
368         meta.add(prefix + "valid/attributeValues[@order=\"given\"][@attributes=\"" + "day" + "\"][@elements=\""
369             + "firstDay weekendEnd weekendStart" + "\"]", String.join(" ", dayValueOrder));
370 
371         String[] widths = { "monthWidth", "dayWidth", "quarterWidth" };
372         String[] widthOrder = new String[] { "abbreviated", "narrow", "wide" };
373         meta.add(prefix + "valid/attributeValues[@order=\"given\"][@attributes=\"type\"][@elements=\""
374             + String.join(" ", widths) + "\"]", String.join(" ", widthOrder));
375 
376         String[] formatLengths = { "dateFormatLength", "timeFormatLength", "dateTimeFormatLength",
377             "decimalFormatLength", "scientificFormatLength", "percentFormatLength", "currencyFormatLength" };
378         String[] lengthOrder = new String[] { "full", "long", "medium", "short" };
379         meta.add(prefix + "valid/attributeValues[@order=\"given\"][@attributes=\"type\"][@elements=\""
380             + String.join(" ", formatLengths) + "\"]", String.join(" ", lengthOrder));
381 
382         String[] dateFieldOrder = new String[] { "era", "year", "month", "week", "day", "weekday", "dayperiod", "hour",
383             "minute", "second", "zone" };
384         meta.add(prefix + "valid/attributeValues[@order=\"given\"][@attributes=\"type\"][@elements=\"field\"]",
385             String.join(" ", dateFieldOrder));
386 
387         String[][] suppressData = { { "ldml", "version", "*" }, { "orientation", "characters", "left-to-right" },
388             { "orientation", "lines", "top-to-bottom" }, { "weekendStart", "time", "00:00" },
389             { "weekendEnd", "time", "24:00" }, { "dateFormat", "type", "standard" },
390             { "timeFormat", "type", "standard" }, { "dateTimeFormat", "type", "standard" },
391             { "decimalFormat", "type", "standard" }, { "scientificFormat", "type", "standard" },
392             { "percentFormat", "type", "standard" }, { "currencyFormat", "type", "standard" },
393             { "pattern", "type", "standard" }, { "currency", "type", "standard" }, { "collation", "type", "standard" },
394             { "*", "_q", "*" }, };
395         for (int i = 0; i < suppressData.length; ++i) {
396             meta.add(prefix + "suppress/attributes" + "[@element=\"" + suppressData[i][0] + "\"][@attribute=\""
397                 + suppressData[i][1] + "\"][@attributeValue=\"" + suppressData[i][2] + "\"]", "");
398         }
399         // write out and look at
400         PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY + "meta/", "metaData.xml");
401         meta.write(out);
402         out.close();
403         XMLFileReader xfr = new XMLFileReader().setHandler(new MyHandler());
404         xfr.read(CLDRPaths.GEN_DIRECTORY + "meta/metaData.xml", XMLFileReader.CONTENT_HANDLER
405             | XMLFileReader.ERROR_HANDLER, false);
406     }
407 
testXMLFileReader()408     private static void testXMLFileReader() {
409         XMLFileReader xfr = new XMLFileReader().setHandler(new MyHandler());
410         xfr.read(CLDRPaths.MAIN_DIRECTORY + "root.xml", -1, true);
411     }
412 
413     static class MyHandler extends XMLFileReader.SimpleHandler {
414 
415         @Override
handleAttributeDecl(String eName, String aName, String type, String mode, String value)416         public void handleAttributeDecl(String eName, String aName, String type, String mode, String value) {
417             System.out.println("eName: " + eName + ",\t aName: " + aName + ",\t type: " + type + ",\t mode: " + mode
418                 + ",\t value: " + value);
419         }
420 
421         @Override
handleElementDecl(String name, String model)422         public void handleElementDecl(String name, String model) {
423             System.out.println("name: " + name + ",\t model: " + model);
424         }
425 
426         @Override
handlePathValue(String path, String value)427         public void handlePathValue(String path, String value) {
428             System.out.println("path: " + path + ",\t value: " + value);
429         }
430 
431         @Override
handleComment(String path, String comment)432         public void handleComment(String path, String comment) {
433             System.out.println("path: " + path + ",\t comment: " + comment);
434         }
435 
436     }
437 
testBreakIterator(String text)438     public static void testBreakIterator(String text) {
439         System.out.println(text);
440         String choice = "Line";
441 
442         String BASE_RULES = "'<' > '&lt;' ;" + "'<' < '&'[lL][Tt]';' ;" + "'&' > '&amp;' ;"
443             + "'&' < '&'[aA][mM][pP]';' ;" + "'>' < '&'[gG][tT]';' ;" + "'\"' < '&'[qQ][uU][oO][tT]';' ; "
444             + "'' < '&'[aA][pP][oO][sS]';' ; ";
445 
446         String CONTENT_RULES = "'>' > '&gt;' ;";
447 
448         String HTML_RULES = BASE_RULES + CONTENT_RULES + "'\"' > '&quot;' ; ";
449 
450         String HTML_RULES_CONTROLS = HTML_RULES
451             + "([[:C:][:Z:][:whitespace:][:Default_Ignorable_Code_Point:][\\u0080-\\U0010FFFF]]) > &hex/xml($1) ; ";
452 
453         Transliterator toHTML = Transliterator.createFromRules("any-xml", HTML_RULES_CONTROLS, Transliterator.FORWARD);
454 
455         RuleBasedBreakIterator b;
456         if (choice.equals("Word"))
457             b = (RuleBasedBreakIterator) BreakIterator.getWordInstance();
458         else if (choice.equals("Line"))
459             b = (RuleBasedBreakIterator) BreakIterator.getLineInstance();
460         else if (choice.equals("Sentence"))
461             b = (RuleBasedBreakIterator) BreakIterator.getSentenceInstance();
462         else
463             b = (RuleBasedBreakIterator) BreakIterator.getCharacterInstance();
464 
465         Matcher decimalEscapes = PatternCache.get("&#(x?)([0-9]+);").matcher(text);
466         // quick hack, since hex-any doesn't do decimal escapes
467         int start = 0;
468         StringBuffer result2 = new StringBuffer();
469         while (decimalEscapes.find(start)) {
470             int radix = decimalEscapes.group(2).length() == 0 ? 10 : 16;
471             int code = Integer.parseInt(decimalEscapes.group(2), radix);
472             result2.append(text.substring(start, decimalEscapes.start()) + UTF16.valueOf(code));
473             start = decimalEscapes.end();
474         }
475         result2.append(text.substring(start));
476         text = result2.toString();
477 
478         int lastBreak = 0;
479         StringBuffer result = new StringBuffer();
480         b.setText(text);
481         b.first();
482         for (int nextBreak = b.next(); nextBreak != BreakIterator.DONE; nextBreak = b.next()) {
483             b.getRuleStatus();
484             String piece = text.substring(lastBreak, nextBreak);
485             piece = toHTML.transliterate(piece);
486             piece = piece.replaceAll("&#xA;", "<br>");
487             result.append("<span class='break'>").append(piece).append("</span>");
488             lastBreak = nextBreak;
489         }
490 
491         System.out.println(result);
492     }
493 
checkStandardCodes()494     private static void checkStandardCodes() {
495         StandardCodes sc = StandardCodes.make();
496         showCodes(sc, "language");
497         showCodes(sc, "script");
498         showCodes(sc, "territory");
499         showCodes(sc, "tzid");
500         showCodes(sc, "currency");
501 
502         Map<String, Map<String, Map<String, String>>> m = StandardCodes.getLStreg();
503         // print lstreg first
504         if (false) {
505             System.out.println("Printing Data");
506             for (Iterator it = m.keySet().iterator(); it.hasNext();) {
507                 String type = (String) it.next();
508                 Map subtagData = m.get(type);
509                 for (Iterator it2 = subtagData.keySet().iterator(); it2.hasNext();) {
510                     String subtag = (String) it2.next();
511                     Map labelData = (Map) subtagData.get(subtag);
512                     System.out.println(type + "\t " + subtag + "\t " + labelData);
513                 }
514             }
515         }
516         for (Iterator<String> it = m.keySet().iterator(); it.hasNext();) {
517             String type = it.next();
518             Map<String, Map<String, String>> subtagData = m.get(type);
519 
520             String oldType = type.equals("region") ? "territory" : type;
521             Set<String> allCodes = sc.getAvailableCodes(oldType);
522             Set<String> temp = new TreeSet<>(subtagData.keySet());
523             temp.removeAll(allCodes);
524             System.out.println(type + "\t in new but not old\t" + temp);
525 
526             temp = new TreeSet<>(allCodes);
527             temp.removeAll(subtagData.keySet());
528             System.out.println(type + "\t in old but not new\t" + temp);
529         }
530         for (Iterator<String> it = m.keySet().iterator(); it.hasNext();) {
531             String type = it.next();
532             Map<String, Map<String, String>> subtagData = m.get(type);
533             String oldType = type.equals("region") ? "territory" : type;
534             Set<String> goodCodes = sc.getGoodAvailableCodes(oldType);
535 
536             for (Iterator<String> it2 = subtagData.keySet().iterator(); it2.hasNext();) {
537                 String tag = it2.next();
538                 Map<String, String> data = subtagData.get(tag);
539                 List<String> sdata = sc.getFullData(oldType, tag);
540                 if (sdata == null) {
541                     if (true)
542                         continue;
543                     System.out.println("new in ltru");
544                     System.out.println("\t" + type + "\t" + tag + "\t" + data);
545                     continue;
546                 }
547                 String description = sdata.get(0);
548                 boolean deprecated = !goodCodes.contains(tag);
549                 if (description.equalsIgnoreCase("PRIVATE USE")) {
550                     // description = "";
551                     deprecated = false;
552                 }
553                 String newDescription = data.get("Description");
554                 boolean newDeprecated = data.get("Deprecated") != null;
555                 if (!description.equals(newDescription)) {
556                     System.out.println(type + "\t" + tag + "\tDescriptions differ: {" + description + "} ### {"
557                         + newDescription + "}");
558                 }
559                 if (deprecated != newDeprecated) {
560                     System.out.println(type + "\t" + tag + "\tDeprecated differs: {" + deprecated + "} ### {"
561                         + newDeprecated + "}");
562                 }
563             }
564         }
565         // print metadata
566         for (Iterator<String> it = m.keySet().iterator(); it.hasNext();) {
567             String type = it.next();
568             Map<String, Map<String, String>> subtagData = m.get(type);
569             String oldType = type.equals("region") ? "territory" : type;
570 
571             String aliasType =oldType.equals("legacy") ? "language" : oldType;
572             Set<String> allCodes = new TreeSet<>();
573             Set<String> deprecatedCodes = new TreeSet<>();
574 
575             for (Iterator<String> it2 = subtagData.keySet().iterator(); it2.hasNext();) {
576                 String tag = it2.next();
577                 Map<String, String> data = subtagData.get(tag);
578                 if (data.get("Deprecated") != null) {
579                     String preferred = data.get("Preferred-Value");
580                     String cldr = null != data.get("CLDR") ? "CLDR: " : "";
581                     System.out.println("\t\t\t<" + aliasType + "Alias type=\"" + tag + "\""
582                         + (preferred == null || preferred.length() == 0 ? "" : " replacement=\"" + preferred + "\"")
583                         + "/> <!-- " + cldr
584                         + data.get("Description") + " -->");
585                     deprecatedCodes.add(tag);
586                 } else {
587                     allCodes.add(tag);
588                 }
589             }
590             // get old ones
591             Set<String> goodCodes = sc.getAvailableCodes(oldType);
592             TreeSet<String> oldAndNotNew = new TreeSet<>(goodCodes);
593             oldAndNotNew.removeAll(allCodes);
594             oldAndNotNew.removeAll(deprecatedCodes);
595             for (Iterator<String> it2 = oldAndNotNew.iterator(); it2.hasNext();) {
596                 String tag = it2.next();
597                 List<String> sdata = sc.getFullData(oldType, tag);
598                 String preferred = sdata.get(2);
599                 System.out.println("\t\t\t<" + aliasType + "Alias type=\"" + tag + "\" replacement=\"" + preferred
600                     + "\"/> <!-- CLDR:" + sdata.get(0) + " -->");
601             }
602             String allCodeString = Joiner.on(" ").join(allCodes);
603             System.out
604                 .println("\t\t\t<variable id=\"$" + oldType + "\" type=\"list\">" + allCodeString + "</variable>");
605         }
606     }
607 
showCodes(StandardCodes sc, String type)608     private static void showCodes(StandardCodes sc, String type) {
609         Set<String> codes = sc.getSurveyToolDisplayCodes(type);
610         System.out.println("Survey Tool Codes " + codes.size() + "\t" + type);
611         for (String code : codes) {
612             System.out.println("\t" + code + "\t" + sc.getFullData(type, code));
613         }
614     }
615 
checkLanguages()616     private static void checkLanguages() {
617         // TODO Auto-generated method stub
618 
619         Factory mainCldrFactory = Factory.make(CLDRPaths.COMMON_DIRECTORY + "main" + File.separator, ".*");
620         Set<String> availableLocales = mainCldrFactory.getAvailable();
621         Set<String> available = new TreeSet<>();
622         LocaleIDParser lip = new LocaleIDParser();
623         for (Iterator<String> it = availableLocales.iterator(); it.hasNext();) {
624             available.add(lip.set(it.next()).getLanguage());
625         }
626         Set<String> langHack = new TreeSet<>();
627         for (int i = 0; i < language_territory_hack.length; ++i) {
628             String lang = language_territory_hack[i][0];
629             langHack.add(lang);
630         }
631         if (langHack.containsAll(available))
632             System.out.println("All ok");
633         else {
634             available.removeAll(langHack);
635             for (Iterator<String> it = available.iterator(); it.hasNext();) {
636                 String item = it.next();
637                 System.out.println("{\"" + item + "\", \"XXX\"},/t//"
638                     + ULocale.getDisplayLanguage(item, ULocale.ENGLISH));
639             }
640         }
641     }
642 
643     /**
644      * @throws IOException
645      *
646      */
printCountries()647     private static void printCountries() throws IOException {
648         Factory mainCldrFactory = Factory.make(CLDRPaths.COMMON_DIRECTORY + "main" + File.separator, ".*");
649         CLDRFile english = mainCldrFactory.make("en", true);
650         PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "country_language_names.txt");
651         StandardCodes sc = StandardCodes.make();
652         for (Iterator<String> it = sc.getGoodAvailableCodes("language").iterator(); it.hasNext();) {
653             String code = it.next();
654             out.println(code + "\t" + english.getName(CLDRFile.LANGUAGE_NAME, code));
655         }
656         out.println("****");
657         for (Iterator<String> it = sc.getGoodAvailableCodes("territory").iterator(); it.hasNext();) {
658             String code = it.next();
659             out.println(code + "\t" + english.getName(CLDRFile.TERRITORY_NAME, code));
660         }
661         out.println("****");
662         for (Iterator<String> it = sc.getGoodAvailableCodes("script").iterator(); it.hasNext();) {
663             String code = it.next();
664             out.println(code + "\t" + english.getName(CLDRFile.SCRIPT_NAME, code));
665         }
666         out.close();
667     }
668 
669     /**
670      *
671      */
printCurrencies()672     private static void printCurrencies() {
673         StandardCodes sc = StandardCodes.make();
674         Set<String> s = sc.getAvailableCodes("currency");
675         for (Iterator<String> it = s.iterator(); it.hasNext();) {
676             String code = it.next();
677             String name = sc.getData("currency", code);
678             List<String> data = sc.getFullData("currency", code);
679             System.out.println(code + "\t" + name + "\t" + data);
680         }
681     }
682 
683     /**
684      * @throws IOException
685      * @throws ParseException
686      *
687      */
printZoneSamples()688     private static void printZoneSamples() throws Exception {
689         String[] locales = { "en", "en_GB", "de", "zh", "hi", "bg", "ru", "ja", "as" // picked
690             // deliberately
691             // because
692             // it
693             // has
694             // few
695             // itesm
696         };
697         String[] zones = { "America/Los_Angeles", "America/Argentina/Buenos_Aires", "America/Buenos_Aires",
698             "America/Havana", "Australia/ACT", "Australia/Sydney", "Europe/London", "Europe/Moscow",
699             "Etc/GMT+3" };
700         String[][] fields = { { "2004-01-15T00:00:00Z", "Z", "ZZZZ", "z", "zzzz" },
701             { "2004-07-15T00:00:00Z", "Z", "ZZZZ", "z", "zzzz", "v", "vvvv" } };
702         Factory mainCldrFactory = Factory.make(CLDRPaths.COMMON_DIRECTORY + "main" + File.separator, ".*");
703         PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "timezone_samples.txt");
704         long[] offsetMillis = new long[1];
705         ParsePosition parsePosition = new ParsePosition(0);
706 
707         for (int i = 0; i < locales.length; ++i) {
708             String locale = locales[i];
709             TimezoneFormatter tzf = new TimezoneFormatter(mainCldrFactory, locale, false).setSkipDraft(true);
710             for (int j = 0; j < zones.length; ++j) {
711                 String zone = zones[j];
712                 for (int k = 0; k < fields.length; ++k) {
713                     String type = fields[k][0];
714                     Date datetime = ICUServiceBuilder.isoDateParse(type);
715                     for (int m = 1; m < fields[k].length; ++m) {
716                         String field = fields[k][m];
717                         String formatted = tzf.getFormattedZone(zone, field, datetime.getTime(), false);
718                         parsePosition.setIndex(0);
719                         String parsed = tzf.parse(formatted, parsePosition, offsetMillis);
720                         if (parsed == null)
721                             parsed = "FAILED PARSE";
722                         else if (parsed.length() == 0)
723                             parsed = format(offsetMillis[0]);
724                         out.println("{\"" + locale + "\",\t\"" + zone + "\",\t\"" + type + "\",\t\"" + field
725                             + "\",\t\"" + formatted + "\",\t\"" + parsed + "\"},");
726                     }
727                 }
728                 out.println();
729             }
730             out.println("==========");
731             out.println();
732         }
733         out.close();
734     }
735 
736     /**
737      * quick & dirty format
738      */
format(long offsetMillis)739     private static String format(long offsetMillis) {
740         offsetMillis /= 60 * 1000;
741         String sign = "+";
742         if (offsetMillis < 0) {
743             offsetMillis = -offsetMillis;
744             sign = "-";
745         }
746         return sign + String.valueOf(offsetMillis / 60) + ":"
747             + String.valueOf(100 + (offsetMillis % 60)).substring(1, 3);
748     }
749 
750     private static final String[][] language_territory_hack = { { "af", "ZA" }, { "am", "ET" }, { "ar", "SA" },
751         { "as", "IN" }, { "ay", "PE" }, { "az", "AZ" }, { "bal", "PK" }, { "be", "BY" },
752         { "bg", "BG" }, { "bn", "IN" }, { "bs", "BA" }, { "ca", "ES" }, { "ch", "MP" }, { "cpe", "SL" },
753         { "cs", "CZ" }, { "cy", "GB" }, { "da", "DK" }, { "de", "DE" }, { "dv", "MV" }, { "dz", "BT" },
754         { "el", "GR" }, { "en", "US" }, { "es", "ES" }, { "et", "EE" }, { "eu", "ES" }, { "fa", "IR" }, { "fi", "FI" },
755         { "fil", "PH" }, { "fj", "FJ" }, { "fo", "FO" }, { "fr", "FR" }, { "ga", "IE" },
756         { "gd", "GB" }, { "gl", "ES" }, { "gn", "PY" }, { "gu", "IN" }, { "gv", "GB" }, { "ha", "NG" }, { "he", "IL" },
757         { "hi", "IN" }, { "ho", "PG" }, { "hr", "HR" }, { "ht", "HT" }, { "hu", "HU" },
758         { "hy", "AM" }, { "id", "ID" }, { "is", "IS" }, { "it", "IT" }, { "ja", "JP" }, { "ka", "GE" }, { "kk", "KZ" },
759         { "kl", "GL" }, { "km", "KH" }, { "kn", "IN" }, { "ko", "KR" }, { "kok", "IN" },
760         { "ks", "IN" }, { "ku", "TR" }, { "ky", "KG" }, { "la", "VA" }, { "lb", "LU" }, { "ln", "CG" }, { "lo", "LA" },
761         { "lt", "LT" }, { "lv", "LV" }, { "mai", "IN" }, { "men", "GN" }, { "mg", "MG" },
762         { "mh", "MH" }, { "mk", "MK" }, { "ml", "IN" }, { "mn", "MN" }, { "mni", "IN" }, { "mo", "MD" },
763         { "mr", "IN" }, { "ms", "MY" }, { "mt", "MT" }, { "my", "MM" }, { "na", "NR" }, { "nb", "NO" },
764         { "nd", "ZA" }, { "ne", "NP" }, { "niu", "NU" }, { "nl", "NL" }, { "nn", "NO" }, { "no", "NO" },
765         { "nr", "ZA" }, { "nso", "ZA" }, { "ny", "MW" }, { "om", "KE" }, { "or", "IN" }, { "pa", "IN" },
766         { "pau", "PW" }, { "pl", "PL" }, { "ps", "PK" }, { "pt", "BR" }, { "qu", "PE" }, { "rn", "BI" },
767         { "ro", "RO" }, { "ru", "RU" }, { "rw", "RW" }, { "sd", "IN" }, { "sg", "CF" }, { "si", "LK" },
768         { "sk", "SK" }, { "sl", "SI" }, { "sm", "WS" }, { "so", "DJ" }, { "sq", "CS" }, { "sr", "CS" }, { "ss", "ZA" },
769         { "st", "ZA" }, { "sv", "SE" }, { "sw", "KE" }, { "ta", "IN" }, { "te", "IN" },
770         { "tem", "SL" }, { "tet", "TL" }, { "th", "TH" }, { "ti", "ET" }, { "tg", "TJ" }, { "tk", "TM" },
771         { "tkl", "TK" }, { "tvl", "TV" }, { "tl", "PH" }, { "tn", "ZA" }, { "to", "TO" },
772         { "tpi", "PG" }, { "tr", "TR" }, { "ts", "ZA" }, { "uk", "UA" }, { "ur", "IN" }, { "uz", "UZ" },
773         { "ve", "ZA" }, { "vi", "VN" }, { "wo", "SN" }, { "xh", "ZA" }, { "zh", "CN" },
774         { "zh_Hant", "TW" }, { "zu", "ZA" }, { "aa", "ET" }, { "byn", "ER" }, { "eo", "DE" }, { "gez", "ET" },
775         { "haw", "US" }, { "iu", "CA" }, { "kw", "GB" }, { "sa", "IN" }, { "sh", "HR" },
776         { "sid", "ET" }, { "syr", "SY" }, { "tig", "ER" }, { "tt", "RU" }, { "wal", "ET" }, };
777 
778 }
779