1 /* 2 ****************************************************************************** 3 * Copyright (C) 2004, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ****************************************************************************** 6 */ 7 package org.unicode.cldr.test; 8 9 import java.io.IOException; 10 import java.io.PrintWriter; 11 import java.util.ArrayList; 12 import java.util.Arrays; 13 import java.util.Collection; 14 import java.util.Comparator; 15 import java.util.Date; 16 import java.util.HashMap; 17 import java.util.HashSet; 18 import java.util.Iterator; 19 import java.util.List; 20 import java.util.Map; 21 import java.util.Set; 22 23 import org.unicode.cldr.util.CLDRFile; 24 import org.unicode.cldr.util.CLDRPaths; 25 import org.unicode.cldr.util.CldrUtility; 26 import org.unicode.cldr.util.Factory; 27 import org.unicode.cldr.util.LocaleIDParser; 28 import org.unicode.cldr.util.XPathParts; 29 30 import com.ibm.icu.text.DateTimePatternGenerator; 31 import com.ibm.icu.text.DateTimePatternGenerator.FormatParser; 32 import com.ibm.icu.text.DateTimePatternGenerator.VariableField; 33 import com.ibm.icu.text.SimpleDateFormat; 34 import com.ibm.icu.text.UnicodeSet; 35 36 /** 37 * Test class for trying different approaches to flexible date/time. 38 * Internal Use. 39 * Once we figure out what approach to take, this should turn into the test file 40 * for the data. 41 */ 42 public class FlexibleDateTime { 43 static final boolean DEBUG = false; 44 static final boolean SHOW_MATCHING = false; 45 static final boolean SHOW2 = false; 46 static final boolean SHOW_OO = false; 47 static final String SEPARATOR = CldrUtility.LINE_SEPARATOR + "\t"; 48 49 /** 50 * Test different ways of doing flexible date/times. 51 * Internal Use. 52 * 53 * @throws IOException 54 */ main(String[] args)55 public static void main(String[] args) throws IOException { 56 // if (false) { // just for testing simple cases 57 // DateTimePatternGenerator.DateTimeMatcher a = new DateTimePatternGenerator.DateTimeMatcher().set("HH:mm"); 58 // DateTimePatternGenerator.DateTimeMatcher b = new DateTimePatternGenerator.DateTimeMatcher().set("kkmm"); 59 // DistanceInfo missingFields = new DistanceInfo(); 60 // int distance = a.getDistance(b, -1, missingFields); 61 // } 62 // generate(args); 63 // test(args); 64 } 65 66 public static PrintWriter log; 67 68 // private static void generate(String[] args) throws IOException { 69 // log = FileUtilities.openUTF8Writer(Utility.GEN_DIRECTORY + "/flex/", "log.txt"); 70 // String filter = ".*"; 71 // if (args.length > 0) 72 // filter = args[0]; 73 // 74 // Factory cldrFactory = Factory.make(Utility.BASE_DIRECTORY 75 // + "open_office/main/", filter); 76 // Factory mainCLDRFactory = Factory.make(Utility.MAIN_DIRECTORY, ".*"); 77 // FormatParser fp = new FormatParser(); 78 // // fix locale list 79 // Collection ooLocales = new LinkedHashSet(cldrFactory.getAvailable()); 80 // ooLocales.remove("nb_NO"); // hack, since no_NO is the main one, and subsumes nb 81 // Map localeMap = new LocaleIDFixer().fixLocales(ooLocales, new TreeMap()); 82 // //pw.println(localeMap); 83 // 84 // for (Iterator it = localeMap.keySet().iterator(); it.hasNext();) { 85 // String sourceLocale = (String) it.next(); 86 // String targetLocale = (String) localeMap.get(sourceLocale); 87 // ULocale uSourceLocale = new ULocale(sourceLocale); 88 // ULocale uTargetLocale = new ULocale(targetLocale); 89 // log.println(); 90 // log.println(uTargetLocale.getDisplayName(ULocale.ENGLISH) + " (" + uTargetLocale + ")"); 91 // System.out.println(sourceLocale + "\t\u2192" + uTargetLocale.getDisplayName(ULocale.ENGLISH) + " (" + 92 // uTargetLocale + ")"); 93 // if (!sourceLocale.equals(targetLocale)) { 94 // log.println("[oo: " + uSourceLocale.getDisplayName(ULocale.ENGLISH) + " (" + sourceLocale + ")]"); 95 // } 96 // Collection list = getOOData(cldrFactory, sourceLocale); 97 // // get the current values 98 // try { 99 // Collection currentList = getDateFormats(mainCLDRFactory, targetLocale); 100 // list.removeAll(currentList); 101 // } catch (RuntimeException e) { 102 // // ignore 103 // } 104 // 105 // if (list.size() == 0) { 106 // log.println(sourceLocale + "\tEMPTY!"); // skip empty 107 // continue; 108 // } 109 // CLDRFile temp = CLDRFile.make(targetLocale); 110 // String prefix = 111 // "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem[@_q=\""; 112 // 113 // int count = 0; 114 // Map previousID = new HashMap(); 115 // for (Iterator it2 = list.iterator(); it2.hasNext();) { 116 // String pattern = (String) it2.next(); 117 // new SimpleDateFormat(pattern); // check that compiles 118 // fp.set(pattern); 119 // String id = fp.getVariableFieldString(); 120 // if (!allowedDateTimeCharacters.containsAll(id)) throw new IllegalArgumentException("Illegal characters in: " + 121 // pattern); 122 // if (id.length() == 0) { 123 // throw new IllegalArgumentException("Empty id for: " + pattern); 124 // } 125 // String previous = (String) previousID.get(id); 126 // if (previous != null) { 127 // log.println("Skipping Duplicate pattern: " + pattern + " (already have " + previous + ")"); 128 // continue; 129 // } else { 130 // previousID.put(id, pattern); 131 // } 132 // String path = prefix + (count++) + "\"]"; 133 // temp.add(path, pattern); 134 // } 135 // PrintWriter pw = FileUtilities.openUTF8Writer(Utility.GEN_DIRECTORY + "/flex/", targetLocale + ".xml"); 136 // temp.write(pw); 137 // pw.close(); 138 // log.flush(); 139 // } 140 // System.out.println("done"); 141 // log.close(); 142 // } 143 // 144 // private static Collection<String> getDateFormats(Factory mainCLDRFactory, String targetLocale) { 145 // List<String> result = new ArrayList<String>(); 146 // XPathParts parts = new XPathParts(null, null); 147 // CLDRFile currentFile = null; 148 // String oldTargetLocale = targetLocale; 149 // // do fallback 150 // do { 151 // try { 152 // currentFile = mainCLDRFactory.make(targetLocale, true); 153 // } catch (RuntimeException e) { 154 // targetLocale = LocaleIDParser.getParent(targetLocale); 155 // if (targetLocale == null) { 156 // throw (IllegalArgumentException) new IllegalArgumentException("Couldn't open " + oldTargetLocale).initCause(e); 157 // } 158 // log.println("FALLING BACK TO " + targetLocale + " from " + oldTargetLocale); 159 // } 160 // } while (currentFile == null); 161 // for (String path : currentFile ) { 162 // if (!isGregorianPattern(path, parts)) continue; 163 // String value = currentFile.getWinningValue(path); 164 // result.add(value); 165 // //log.println("adding " + path + "\t" + value); 166 // } 167 // return result; 168 // } 169 isGregorianPattern(String path, XPathParts parts)170 public static boolean isGregorianPattern(String path, XPathParts parts) { 171 if (path.indexOf("Formats") < 0) return false; // quick exclude 172 parts.set(path); 173 if (parts.size() < 8 || !parts.getElement(7).equals("pattern")) return false; 174 if (!parts.containsAttributeValue("type", "gregorian")) return false; 175 return true; 176 } 177 178 static class LocaleIDFixer { 179 LocaleIDParser lip = new LocaleIDParser(); 180 static final Set<String> mainLocales = new HashSet<String>( 181 Arrays.asList(new String[] { "ar_EG", "bn_IN", "de_DE", "en_US", "es_ES", "fr_FR", "it_IT", "nl_NL", "pt_BR", "sv_SE", "zh_TW" })); 182 DeprecatedCodeFixer dcf = new DeprecatedCodeFixer(); 183 fixLocales(Collection<String> available, Map<String, String> result)184 Map<String, String> fixLocales(Collection<String> available, Map<String, String> result) { 185 // find the multi-country locales 186 Map<String, Set<String>> language_locales = new HashMap<String, Set<String>>(); 187 for (String locale : available) { 188 String fixedLocale = dcf.fixLocale(locale); 189 result.put(locale, fixedLocale); 190 String language = lip.set(fixedLocale).getLanguageScript(); 191 Set<String> locales = language_locales.get(language); 192 if (locales == null) { 193 language_locales.put(language, locales = new HashSet<String>()); 194 } 195 locales.add(locale); 196 } 197 // if a language has a single locale, use it 198 // otherwise use main 199 for (String language : language_locales.keySet()) { 200 Set<String> locales = language_locales.get(language); 201 if (locales.size() == 1) { 202 result.put(locales.iterator().next(), language); 203 continue; 204 } 205 Set<String> intersect = new HashSet<String>(mainLocales); 206 intersect.retainAll(locales); 207 if (intersect.size() == 1) { 208 // the intersection is the parent, so overwrite it 209 result.put(intersect.iterator().next(), language); 210 continue; 211 } 212 if (locales.contains("zh_CN")) { // special case, not worth extra code 213 result.put("zh_CN", "zh"); 214 continue; 215 } 216 throw new IllegalArgumentException("Need parent locale: " + locales); 217 } 218 return result; 219 } 220 } 221 222 static class DeprecatedCodeFixer { 223 Map<String, String> languageAlias = new HashMap<String, String>(); 224 Map<String, String> territoryAlias = new HashMap<String, String>(); 225 { 226 Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); 227 CLDRFile supp = cldrFactory.make(CLDRFile.SUPPLEMENTAL_NAME, false); 228 XPathParts parts = new XPathParts(null, null); 229 for (Iterator<String> it = supp.iterator("//supplementalData/metadata/alias/"); it.hasNext();) { 230 String path = it.next(); 231 // System.out.println(path); 232 // if (!path.startsWith("//supplementalData/metadata/alias/")) continue; supp.getFullXPath(path)233 parts.set(supp.getFullXPath(path)); 234 // Map attributes = parts.getAttributes(3); 235 String type = parts.getAttributeValue(3, "type"); 236 String replacement = parts.getAttributeValue(3, "replacement"); 237 if (parts.getElement(3).equals("languageAlias")) { languageAlias.put(type, replacement)238 languageAlias.put(type, replacement); 239 } else if (parts.getElement(3).equals("territoryAlias")) { territoryAlias.put(type, replacement)240 territoryAlias.put(type, replacement); 241 } else 242 throw new IllegalArgumentException("Unexpected type: " + path); 243 } 244 // special hack for OpenOffice 245 territoryAlias.put("CB", "029"); 246 languageAlias.put("no", "nb"); 247 } 248 LocaleIDParser lip = new LocaleIDParser(); 249 fixLocale(String locale)250 String fixLocale(String locale) { 251 lip.set(locale); 252 String territory = lip.getRegion(); 253 String replacement = (String) territoryAlias.get(territory); 254 if (replacement != null) { 255 lip.setRegion(replacement); 256 } 257 locale = lip.toString(); 258 for (String old : languageAlias.keySet()) { 259 if (!locale.startsWith(old)) continue; 260 if (locale.length() == old.length()) { 261 locale = languageAlias.get(old); 262 break; 263 } else if (locale.charAt(old.length()) == '_') { 264 locale = languageAlias.get(old) + locale.substring(old.length()); 265 break; 266 } 267 } 268 // if (!oldLocale.equals(locale)) System.out.println(oldLocale + " \u2192 " + locale); 269 return locale; 270 } 271 } 272 273 // private static void test(String[] args) { 274 // // get the locale to use, with default 275 // String filter = "en_US"; 276 // if (args.length > 0) 277 // filter = args[0]; 278 // 279 // Factory cldrFactory = Factory.make(CldrUtility.BASE_DIRECTORY 280 // + "open_office/main/", filter); 281 // for (String locale : cldrFactory.getAvailable()) { 282 // ULocale ulocale = new ULocale(locale); 283 // System.out.println(ulocale.getDisplayName(ULocale.ENGLISH) + " (" + locale + ")"); 284 // 285 // SimpleDateFormat df = (SimpleDateFormat) DateFormat 286 // .getDateTimeInstance(DateFormat.SHORT, DateFormat.SHORT, 287 // ulocale); 288 // 289 // Collection list = getOOData(cldrFactory, locale); 290 // 291 // 292 // String[] testData = { "YwE", // year, week of year, weekday 293 // "yD", // year, day of year 294 // "yMFE", // year, month, nth day of week in month 295 // "eG", "dMMy", "kh", "GHHmm", "yyyyHHmm", "Kmm", "kmm", 296 // "MMdd", "ddHH", "yyyyMMMd", "yyyyMMddHHmmss", 297 // "GEEEEyyyyMMddHHmmss", 298 // "GuuuuMMMMwwWddDDDFEEEEaHHmmssSSSvvvv", // bizarre case just for testing 299 // }; 300 // DateTimePatternGenerator fdt = DateTimePatternGenerator.getEmptyInstance(); 301 // add(fdt, list); 302 // Date now = new Date(99, 11, 23, 1, 2, 3); 303 // System.out.println("Sample Input: " + now); 304 // for (int i = 0; i < testData.length; ++i) { 305 // System.out.print("Input request: \t" + testData[i]); 306 // System.out.print(SEPARATOR + "Fields: \t" + fdt.getFields(testData[i])); 307 // String dfpattern; 308 // try { 309 // dfpattern = fdt.getBestPattern(testData[i]); 310 // } catch (Exception e) { 311 // System.out.println(SEPARATOR + e.getMessage()); 312 // continue; 313 // } 314 // System.out.print(SEPARATOR + "Localized Pattern: \t" + dfpattern); 315 // df.applyPattern(dfpattern); 316 // System.out.println(SEPARATOR + "Sample Results: \t?" + df.format(now) + "?"); 317 // } 318 // } 319 // } 320 add(DateTimePatternGenerator generator, Collection<String> list)321 public static void add(DateTimePatternGenerator generator, Collection<String> list) { 322 for (Iterator<String> it = list.iterator(); it.hasNext();) { 323 generator.addPattern(it.next(), false, null); 324 } 325 } 326 327 // ================= 328 329 static class OOConverter { 330 FormatParser fp = new FormatParser(); 331 convertOODate(String source, String locale)332 public String convertOODate(String source, String locale) { 333 if (source.length() == 0) return ""; 334 source = source.replace('"', '\''); // fix quoting convention 335 StringBuffer buffer = new StringBuffer(); 336 fp.set(source); 337 for (Iterator<Object> it = fp.getItems().iterator(); it.hasNext();) { 338 Object item = it.next(); 339 if (item instanceof VariableField) { 340 buffer.append(handleOODate(item.toString(), locale)); 341 } else { 342 buffer.append(item); 343 } 344 } 345 return buffer.toString(); 346 } 347 handleOODate(String string, String locale)348 private String handleOODate(String string, String locale) { 349 // preprocess hack for *localized* strings 350 if (locale.startsWith("de")) { 351 if (string.startsWith("T")) string = string.replace('T', 'D'); 352 if (string.startsWith("J")) string = string.replace('J', 'Y'); 353 } else if (locale.startsWith("nl")) { 354 if (string.startsWith("J")) string = string.replace('J', 'Y'); 355 } else if (locale.startsWith("fi")) { 356 if (string.startsWith("K")) string = string.replace('K', 'M'); 357 if (string.startsWith("V")) string = string.replace('V', 'Y'); 358 if (string.startsWith("P")) string = string.replace('P', 'D'); 359 } else if (locale.startsWith("fr")) { 360 if (string.startsWith("J")) string = string.replace('J', 'D'); 361 if (string.startsWith("A")) string = string.replace('A', 'Y'); 362 } else if (locale.startsWith("es") || locale.startsWith("pt")) { 363 if (string.startsWith("A")) string = string.replace('A', 'Y'); 364 } else if (locale.startsWith("it")) { 365 if (string.startsWith("A")) string = string.replace('A', 'Y'); 366 if (string.startsWith("G")) string = string.replace('G', 'D'); 367 } 368 // if (string.startsWith("M")) return string; 369 if (string.startsWith("A")) 370 string = string.replace('A', 'y'); // best we can do for now 371 else if (string.startsWith("Y") || string.startsWith("W") || 372 string.equals("D") || string.equals("DD")) 373 string = string.toLowerCase(); 374 else if (string.equals("DDD") || string.equals("NN")) 375 string = "EEE"; 376 else if (string.equals("DDDD") || string.equals("NNN")) 377 string = "EEEE"; 378 else if (string.equals("NNNN")) 379 return "EEEE, "; // RETURN WITHOUT TEST 380 else if (string.equals("G")) 381 string = "G"; // best we can do for now 382 else if (string.equals("GG")) 383 string = "G"; 384 else if (string.equals("GGG")) 385 string = "G"; // best we can do for now 386 else if (string.equals("E")) 387 string = "y"; 388 else if (string.equals("EE") || string.equals("R")) 389 string = "yy"; 390 else if (string.equals("RR")) string = "Gyy"; 391 // if (string.startsWith("Q")) string = string; // '\'' + string + '\''; 392 // char c = string.charAt(0); 393 // if (c < 0x80 && UCharacter.isLetter(c)else if rn string.replace(c,'x'); 394 if (!allowedDateTimeCharacters.containsAll(string)) { 395 throw new IllegalArgumentException("bad char in: " + string); 396 } 397 return string; 398 } 399 convertOOTime(String source, String locale)400 public String convertOOTime(String source, String locale) { 401 if (source.length() == 0) return ""; 402 source = source.replace('"', '\''); // fix quoting convention 403 int isAM = source.indexOf("AM/PM"); 404 if (isAM >= 0) { 405 source = source.substring(0, isAM) + "a" + source.substring(isAM + 5); 406 } 407 StringBuffer buffer = new StringBuffer(); 408 fp.set(source); 409 for (Iterator<Object> it = fp.getItems().iterator(); it.hasNext();) { 410 Object item = it.next(); 411 if (item instanceof VariableField) { 412 buffer.append(handleOOTime(item.toString(), locale, isAM >= 0)); 413 } else { 414 buffer.append(item); 415 } 416 } 417 return buffer.toString(); 418 } 419 handleOOTime(String string, String locale, boolean isAM)420 private String handleOOTime(String string, String locale, boolean isAM) { 421 char c = string.charAt(0); 422 switch (c) { 423 case 'h': 424 case 'H': 425 case 't': 426 case 'T': 427 case 'u': 428 case 'U': 429 string = string.replace(c, isAM ? 'h' : 'H'); 430 break; 431 case 'M': 432 case 'S': 433 string = string.toLowerCase(); 434 break; 435 case '0': 436 string = string.replace('0', 'S'); 437 break; // ought to be more sophisticated, but this should work for normal stuff. 438 // case 'a': case 's': case 'm': return string; // ok as is 439 // default: return "x"; // cause error 440 } 441 if (!allowedDateTimeCharacters.containsAll(string)) { 442 throw new IllegalArgumentException("bad char in: " + string); 443 } 444 return string; 445 } 446 } 447 448 static Date TEST_DATE = new Date(104, 8, 13, 23, 58, 59); 449 450 static Comparator<Collection<String>> VariableFieldComparator = new Comparator<Collection<String>>() { 451 public int compare(Collection<String> a, Collection<String> b) { 452 if (a.size() != b.size()) { 453 if (a.size() < b.size()) return 1; 454 return -1; 455 } 456 Iterator<String> itb = b.iterator(); 457 for (Iterator<String> ita = a.iterator(); ita.hasNext();) { 458 String aa = ita.next(); 459 String bb = itb.next(); 460 int result = -aa.compareTo(bb); 461 if (result != 0) return result; 462 } 463 return 0; 464 } 465 }; 466 467 public static UnicodeSet allowedDateTimeCharacters = new UnicodeSet( 468 "[A a c D d E e F G g h H K k L m M q Q s S u v W w Y y z Z]"); 469 getOOData(Factory cldrFactory, String locale)470 static Collection<String> getOOData(Factory cldrFactory, String locale) { 471 List<String> result = new ArrayList<String>(); 472 XPathParts parts = new XPathParts(null, null); 473 OOConverter ooConverter = new OOConverter(); 474 { 475 if (SHOW_OO) System.out.println(); 476 CLDRFile item = cldrFactory.make(locale, false); 477 for (String xpath : item) { 478 if (!isGregorianPattern(xpath, parts)) continue; 479 boolean isDate = parts.getElement(4).equals("dateFormats"); 480 boolean isTime = parts.getElement(4).equals("timeFormats"); 481 String value = item.getWinningValue(xpath); 482 if (isDate || isTime) { 483 String pattern = value; 484 String oldPattern = pattern; 485 if (oldPattern.indexOf('[') >= 0) { 486 log.println(locale + "\tSkipping [:\t" + xpath + "\t" + value); 487 continue; 488 } 489 try { 490 pattern = isDate ? ooConverter.convertOODate(pattern, locale) 491 : ooConverter.convertOOTime(pattern, locale); 492 } catch (RuntimeException e1) { 493 log.println(locale + "\tSkipping unknown char:\t" + xpath + "\t" + value); 494 continue; 495 } 496 497 // System.out.println(xpath + "\t" + pattern); 498 if (SHOW2) 499 System.out.print("\t" + (isDate ? "Date" : "Time") + ": " + oldPattern + "\t" + pattern + "\t"); 500 try { 501 SimpleDateFormat d = new SimpleDateFormat(pattern); 502 if (SHOW2) System.out.print(d.format(TEST_DATE)); 503 result.add(d.toPattern()); 504 if (SHOW_OO) System.out.println(d.toPattern()); 505 } catch (Exception e) { 506 if (SHOW2) System.out.print(e.getLocalizedMessage()); 507 } 508 if (SHOW2) System.out.println(); 509 } else { 510 log.println(locale + "\tSkipping datetime:\t" + xpath + "\t" + value); 511 } 512 } 513 return result; 514 } 515 } 516 } 517