1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2013, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Mark Davis 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.util; 10 11 import java.io.BufferedReader; 12 import java.io.File; 13 import java.io.FileReader; 14 import java.io.IOException; 15 import java.io.InputStream; 16 import java.io.PrintWriter; 17 import java.lang.reflect.Constructor; 18 import java.lang.reflect.Method; 19 import java.nio.charset.Charset; 20 import java.util.ArrayList; 21 import java.util.Arrays; 22 import java.util.Calendar; 23 import java.util.Collection; 24 import java.util.Collections; 25 import java.util.Comparator; 26 import java.util.Date; 27 import java.util.EnumSet; 28 import java.util.HashMap; 29 import java.util.HashSet; 30 import java.util.Iterator; 31 import java.util.LinkedHashMap; 32 import java.util.LinkedHashSet; 33 import java.util.List; 34 import java.util.Map; 35 import java.util.Map.Entry; 36 import java.util.Objects; 37 import java.util.Set; 38 import java.util.SortedMap; 39 import java.util.SortedSet; 40 import java.util.TreeMap; 41 import java.util.TreeSet; 42 import java.util.concurrent.ConcurrentHashMap; 43 import java.util.regex.Matcher; 44 import java.util.regex.Pattern; 45 46 import org.unicode.cldr.draft.FileUtilities; 47 import org.unicode.cldr.util.RegexLookup.Finder; 48 49 import com.google.common.base.Splitter; 50 import com.ibm.icu.dev.test.TestFmwk; 51 import com.ibm.icu.impl.Utility; 52 import com.ibm.icu.text.DateFormat; 53 import com.ibm.icu.text.SimpleDateFormat; 54 import com.ibm.icu.text.Transform; 55 import com.ibm.icu.text.Transliterator; 56 import com.ibm.icu.text.UTF16; 57 import com.ibm.icu.text.UnicodeSet; 58 import com.ibm.icu.text.UnicodeSetIterator; 59 import com.ibm.icu.util.Freezable; 60 import com.ibm.icu.util.Output; 61 import com.ibm.icu.util.TimeZone; 62 63 public class CldrUtility { 64 65 public static final Charset UTF8 = Charset.forName("utf-8"); 66 public static final boolean BETA = false; 67 68 public static final String LINE_SEPARATOR = "\n"; 69 public final static Pattern SEMI_SPLIT = PatternCache.get("\\s*;\\s*"); 70 71 private static final boolean HANDLEFILE_SHOW_SKIP = false; 72 // Constant for "∅∅∅". Indicates that a child locale has no value for a 73 // path even though a parent does. 74 public static final String NO_INHERITANCE_MARKER = new String(new char[] { 0x2205, 0x2205, 0x2205 }); 75 76 /** 77 * Define the constant INHERITANCE_MARKER for "↑↑↑", used by Survey Tool to indicate a "passthru" vote to the parent locale. 78 * If CLDRFile ever finds this value in a data field, writing of the field should be suppressed. 79 */ 80 public static final String INHERITANCE_MARKER = new String(new char[] { 0x2191, 0x2191, 0x2191 }); 81 82 public static final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze(); 83 84 /** 85 * Very simple class, used to replace variables in a string. For example 86 * <p> 87 * 88 * <pre> 89 * static VariableReplacer langTag = new VariableReplacer() 90 * .add("$alpha", "[a-zA-Z]") 91 * .add("$digit", "[0-9]") 92 * .add("$alphanum", "[a-zA-Z0-9]") 93 * .add("$x", "[xX]"); 94 * ... 95 * String langTagPattern = langTag.replace(...); 96 * </pre> 97 */ 98 public static class VariableReplacer { 99 // simple implementation for now 100 private Map<String, String> m = new TreeMap<String, String>(Collections.reverseOrder()); 101 add(String variable, String value)102 public VariableReplacer add(String variable, String value) { 103 m.put(variable, value); 104 return this; 105 } 106 replace(String source)107 public String replace(String source) { 108 String oldSource; 109 do { 110 oldSource = source; 111 for (Iterator<String> it = m.keySet().iterator(); it.hasNext();) { 112 String variable = it.next(); 113 String value = m.get(variable); 114 source = replaceAll(source, variable, value); 115 } 116 } while (!source.equals(oldSource)); 117 return source; 118 } 119 replaceAll(String source, String key, String value)120 public String replaceAll(String source, String key, String value) { 121 while (true) { 122 int pos = source.indexOf(key); 123 if (pos < 0) return source; 124 source = source.substring(0, pos) + value + source.substring(pos + key.length()); 125 } 126 } 127 } 128 129 public interface LineHandler { 130 /** 131 * Return false if line was skipped 132 * 133 * @param line 134 * @return 135 */ handle(String line)136 boolean handle(String line) throws Exception; 137 } 138 getPath(String path, String filename)139 public static String getPath(String path, String filename) { 140 if (path == null) { 141 return null; 142 } 143 final File file = filename == null ? new File(path) 144 : new File(path, filename); 145 try { 146 return file.getCanonicalPath() + File.separatorChar; 147 } catch (IOException e) { 148 return file.getPath() + File.separatorChar; 149 } 150 } 151 getPath(String path)152 static String getPath(String path) { 153 return getPath(path, null); 154 } 155 156 public static final String ANALYTICS = "<script type=\"text/javascript\">\n" 157 + "var gaJsHost = ((\"https:\" == document.location.protocol) ? \"https://ssl.\" : \"http://www.\");\n" 158 + "document.write(unescape(\"%3Cscript src='\" + gaJsHost + \"google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E\"));\n" 159 + "</script>\n" 160 + "<script type=\"text/javascript\">\n" 161 + "try {\n" 162 + "var pageTracker = _gat._getTracker(\"UA-7672775-1\");\n" 163 + "pageTracker._trackPageview();\n" 164 + "} catch(err) {}</script>"; 165 166 public static final List<String> MINIMUM_LANGUAGES = Arrays.asList(new String[] { "ar", "en", "de", "fr", "hi", 167 "it", "es", "pt", "ru", "zh", "ja" }); // plus language itself 168 public static final List<String> MINIMUM_TERRITORIES = Arrays.asList(new String[] { "US", "GB", "DE", "FR", "IT", 169 "JP", "CN", "IN", "RU", "BR" }); 170 171 public interface LineComparer { 172 static final int LINES_DIFFERENT = -1, LINES_SAME = 0, SKIP_FIRST = 1, SKIP_SECOND = 2; 173 174 /** 175 * Returns LINES_DIFFERENT, LINES_SAME, or if one of the lines is ignorable, SKIP_FIRST or SKIP_SECOND 176 * 177 * @param line1 178 * @param line2 179 * @return 180 */ compare(String line1, String line2)181 int compare(String line1, String line2); 182 } 183 184 public static class SimpleLineComparator implements LineComparer { 185 public static final int TRIM = 1, SKIP_SPACES = 2, SKIP_EMPTY = 4, SKIP_CVS_TAGS = 8; 186 StringIterator si1 = new StringIterator(); 187 StringIterator si2 = new StringIterator(); 188 int flags; 189 SimpleLineComparator(int flags)190 public SimpleLineComparator(int flags) { 191 this.flags = flags; 192 } 193 compare(String line1, String line2)194 public int compare(String line1, String line2) { 195 // first, see if we want to skip one or the other lines 196 int skipper = 0; 197 if (line1 == null) { 198 skipper = SKIP_FIRST; 199 } else { 200 if ((flags & TRIM) != 0) line1 = line1.trim(); 201 if ((flags & SKIP_EMPTY) != 0 && line1.length() == 0) skipper = SKIP_FIRST; 202 } 203 if (line2 == null) { 204 skipper = SKIP_SECOND; 205 } else { 206 if ((flags & TRIM) != 0) line2 = line2.trim(); 207 if ((flags & SKIP_EMPTY) != 0 && line2.length() == 0) skipper += SKIP_SECOND; 208 } 209 if (skipper != 0) { 210 if (skipper == SKIP_FIRST + SKIP_SECOND) return LINES_SAME; // ok, don't skip both 211 return skipper; 212 } 213 214 // check for null 215 if (line1 == null) { 216 if (line2 == null) return LINES_SAME; 217 return LINES_DIFFERENT; 218 } 219 if (line2 == null) { 220 return LINES_DIFFERENT; 221 } 222 223 // now check equality 224 if (line1.equals(line2)) return LINES_SAME; 225 226 // if not equal, see if we are skipping spaces 227 if ((flags & SKIP_CVS_TAGS) != 0) { 228 if (line1.indexOf('$') >= 0 && line2.indexOf('$') >= 0) { 229 line1 = stripTags(line1); 230 line2 = stripTags(line2); 231 if (line1.equals(line2)) return LINES_SAME; 232 } else if (line1.startsWith("<!DOCTYPE ldml SYSTEM \"../../common/dtd/") 233 && line2.startsWith("<!DOCTYPE ldml SYSTEM \"../../common/dtd/")) { 234 return LINES_SAME; 235 } 236 } 237 if ((flags & SKIP_SPACES) != 0 && si1.set(line1).matches(si2.set(line2))) return LINES_SAME; 238 return LINES_DIFFERENT; 239 } 240 241 // private Matcher dtdMatcher = PatternCache.get( 242 // "\\Q<!DOCTYPE ldml SYSTEM \"http://www.unicode.org/cldr/dtd/\\E.*\\Q/ldml.dtd\">\\E").matcher(""); 243 244 private String[] CVS_TAGS = { "Revision", "Date" }; 245 stripTags(String line)246 private String stripTags(String line) { 247 // $ 248 // Revision: 8994 $ 249 // $ 250 // Date: 2013-07-03 21:31:17 +0200 (Wed, 03 Jul 2013) $ 251 int pos = line.indexOf('$'); 252 if (pos < 0) return line; 253 pos++; 254 int endpos = line.indexOf('$', pos); 255 if (endpos < 0) return line; 256 for (int i = 0; i < CVS_TAGS.length; ++i) { 257 if (!line.startsWith(CVS_TAGS[i], pos)) continue; 258 line = line.substring(0, pos + CVS_TAGS[i].length()) + line.substring(endpos); 259 } 260 return line; 261 } 262 263 } 264 265 /** 266 * 267 * @param file1 268 * @param file2 269 * @param failureLines 270 * on input, String[2], on output, failing lines 271 * @param lineComparer 272 * @return 273 * @throws IOException 274 */ areFileIdentical(String file1, String file2, String[] failureLines, LineComparer lineComparer)275 public static boolean areFileIdentical(String file1, String file2, String[] failureLines, 276 LineComparer lineComparer) throws IOException { 277 try (BufferedReader br1 = new BufferedReader(new FileReader(file1), 32 * 1024); 278 BufferedReader br2 = new BufferedReader(new FileReader(file2), 32 * 1024);) { 279 String line1 = ""; 280 String line2 = ""; 281 int skip = 0; 282 283 while (true) { 284 if ((skip & LineComparer.SKIP_FIRST) == 0) line1 = br1.readLine(); 285 if ((skip & LineComparer.SKIP_SECOND) == 0) line2 = br2.readLine(); 286 if (line1 == null && line2 == null) return true; 287 if (line1 == null || line2 == null) { 288 // System.out.println("debug"); 289 } 290 skip = lineComparer.compare(line1, line2); 291 if (skip == LineComparer.LINES_DIFFERENT) { 292 break; 293 } 294 } 295 failureLines[0] = line1 != null ? line1 : "<end of file>"; 296 failureLines[1] = line2 != null ? line2 : "<end of file>"; 297 return false; 298 } 299 } 300 301 /* 302 * static String getLineWithoutFluff(BufferedReader br1, boolean first, int flags) throws IOException { 303 * while (true) { 304 * String line1 = br1.readLine(); 305 * if (line1 == null) return line1; 306 * if ((flags & TRIM)!= 0) line1 = line1.trim(); 307 * if ((flags & SKIP_EMPTY)!= 0 && line1.length() == 0) continue; 308 * return line1; 309 * } 310 * } 311 */ 312 313 public final static class StringIterator { 314 String string; 315 int position = 0; 316 next()317 char next() { 318 while (true) { 319 if (position >= string.length()) return '\uFFFF'; 320 char ch = string.charAt(position++); 321 if (ch != ' ' && ch != '\t') return ch; 322 } 323 } 324 reset()325 StringIterator reset() { 326 position = 0; 327 return this; 328 } 329 set(String string)330 StringIterator set(String string) { 331 this.string = string; 332 position = 0; 333 return this; 334 } 335 matches(StringIterator other)336 boolean matches(StringIterator other) { 337 while (true) { 338 char c1 = next(); 339 char c2 = other.next(); 340 if (c1 != c2) return false; 341 if (c1 == '\uFFFF') return true; 342 } 343 } 344 345 /** 346 * @return Returns the position. 347 */ getPosition()348 public int getPosition() { 349 return position; 350 } 351 } 352 splitArray(String source, char separator)353 public static String[] splitArray(String source, char separator) { 354 return splitArray(source, separator, false); 355 } 356 splitArray(String source, char separator, boolean trim)357 public static String[] splitArray(String source, char separator, boolean trim) { 358 List<String> piecesList = splitList(source, separator, trim); 359 String[] pieces = new String[piecesList.size()]; 360 piecesList.toArray(pieces); 361 return pieces; 362 } 363 splitCommaSeparated(String line)364 public static String[] splitCommaSeparated(String line) { 365 // items are separated by ',' 366 // each item is of the form abc... 367 // or "..." (required if a comma or quote is contained) 368 // " in a field is represented by "" 369 List<String> result = new ArrayList<String>(); 370 StringBuilder item = new StringBuilder(); 371 boolean inQuote = false; 372 for (int i = 0; i < line.length(); ++i) { 373 char ch = line.charAt(i); // don't worry about supplementaries 374 switch (ch) { 375 case '"': 376 inQuote = !inQuote; 377 // at start or end, that's enough 378 // if get a quote when we are not in a quote, and not at start, then add it and return to inQuote 379 if (inQuote && item.length() != 0) { 380 item.append('"'); 381 inQuote = true; 382 } 383 break; 384 case ',': 385 if (!inQuote) { 386 result.add(item.toString()); 387 item.setLength(0); 388 } else { 389 item.append(ch); 390 } 391 break; 392 default: 393 item.append(ch); 394 break; 395 } 396 } 397 result.add(item.toString()); 398 return result.toArray(new String[result.size()]); 399 } 400 splitList(String source, char separator)401 public static List<String> splitList(String source, char separator) { 402 return splitList(source, separator, false, null); 403 } 404 splitList(String source, char separator, boolean trim)405 public static List<String> splitList(String source, char separator, boolean trim) { 406 return splitList(source, separator, trim, null); 407 } 408 splitList(String source, char separator, boolean trim, List<String> output)409 public static List<String> splitList(String source, char separator, boolean trim, List<String> output) { 410 return splitList(source, Character.toString(separator), trim, output); 411 } 412 splitList(String source, String separator)413 public static List<String> splitList(String source, String separator) { 414 return splitList(source, separator, false, null); 415 } 416 splitList(String source, String separator, boolean trim)417 public static List<String> splitList(String source, String separator, boolean trim) { 418 return splitList(source, separator, trim, null); 419 } 420 splitList(String source, String separator, boolean trim, List<String> output)421 public static List<String> splitList(String source, String separator, boolean trim, List<String> output) { 422 if (output == null) output = new ArrayList<String>(); 423 if (source.length() == 0) return output; 424 int pos = 0; 425 do { 426 int npos = source.indexOf(separator, pos); 427 if (npos < 0) npos = source.length(); 428 String piece = source.substring(pos, npos); 429 if (trim) piece = piece.trim(); 430 output.add(piece); 431 pos = npos + 1; 432 } while (pos < source.length()); 433 return output; 434 } 435 436 /** 437 * Protect a collection (as much as Java lets us!) from modification. 438 * Really, really ugly code, since Java doesn't let us do better. 439 */ 440 @SuppressWarnings({ "rawtypes", "unchecked" }) protectCollection(T source)441 public static <T> T protectCollection(T source) { 442 // TODO - exclude UnmodifiableMap, Set, ... 443 if (source instanceof Map) { 444 Map sourceMap = (Map) source; 445 Map resultMap = clone(sourceMap); 446 if (resultMap == null) return (T) sourceMap; // failed 447 resultMap.clear(); 448 for (Object key : sourceMap.keySet()) { 449 resultMap.put(protectCollection(key), protectCollection(sourceMap.get(key))); 450 } 451 return resultMap instanceof SortedMap ? (T) Collections.unmodifiableSortedMap((SortedMap) resultMap) 452 : (T) Collections.unmodifiableMap(resultMap); 453 } else if (source instanceof Collection) { 454 Collection sourceCollection = (Collection) source; 455 Collection<Object> resultCollection = clone(sourceCollection); 456 if (resultCollection == null) return (T) sourceCollection; // failed 457 resultCollection.clear(); 458 459 for (Object item : sourceCollection) { 460 resultCollection.add(protectCollection(item)); 461 } 462 463 return sourceCollection instanceof List ? (T) Collections.unmodifiableList((List) sourceCollection) 464 : sourceCollection instanceof SortedSet ? (T) Collections 465 .unmodifiableSortedSet((SortedSet) sourceCollection) 466 : sourceCollection instanceof Set ? (T) Collections.unmodifiableSet((Set) sourceCollection) 467 : (T) Collections.unmodifiableCollection(sourceCollection); 468 } else if (source instanceof Freezable) { 469 Freezable freezableSource = (Freezable) source; 470 if (freezableSource.isFrozen()) return source; 471 return (T) ((Freezable) (freezableSource.cloneAsThawed())).freeze(); 472 } else { 473 return source; // can't protect 474 } 475 } 476 477 /** 478 * Protect a collections where we don't need to clone. 479 * @param source 480 * @return 481 */ 482 @SuppressWarnings({ "rawtypes", "unchecked" }) protectCollectionX(T source)483 public static <T> T protectCollectionX(T source) { 484 // TODO - exclude UnmodifiableMap, Set, ... 485 if (isImmutable(source)) { 486 return source; 487 } 488 if (source instanceof Map) { 489 Map sourceMap = (Map) source; 490 // recurse 491 LinkedHashMap tempMap = new LinkedHashMap<>(sourceMap); // copy contents 492 sourceMap.clear(); 493 for (Object key : tempMap.keySet()) { 494 sourceMap.put(protectCollection(key), protectCollectionX(tempMap.get(key))); 495 } 496 return sourceMap instanceof SortedMap ? (T) Collections.unmodifiableSortedMap((SortedMap) sourceMap) 497 : (T) Collections.unmodifiableMap(sourceMap); 498 } else if (source instanceof Collection) { 499 Collection sourceCollection = (Collection) source; 500 LinkedHashSet tempSet = new LinkedHashSet<>(sourceCollection); // copy contents 501 502 sourceCollection.clear(); 503 for (Object item : tempSet) { 504 sourceCollection.add(protectCollectionX(item)); 505 } 506 507 return sourceCollection instanceof List ? (T) Collections.unmodifiableList((List) sourceCollection) 508 : sourceCollection instanceof SortedSet ? (T) Collections 509 .unmodifiableSortedSet((SortedSet) sourceCollection) 510 : sourceCollection instanceof Set ? (T) Collections.unmodifiableSet((Set) sourceCollection) 511 : (T) Collections.unmodifiableCollection(sourceCollection); 512 } else if (source instanceof Freezable) { 513 Freezable freezableSource = (Freezable) source; 514 return (T) freezableSource.freeze(); 515 } else { 516 throw new IllegalArgumentException("Can’t protect: " + source.getClass().toString()); 517 } 518 } 519 520 private static final Set<Object> KNOWN_IMMUTABLES = new HashSet<Object>(Arrays.asList( 521 String.class)); 522 isImmutable(Object source)523 public static boolean isImmutable(Object source) { 524 return source == null 525 || source instanceof Enum 526 || source instanceof Number 527 || KNOWN_IMMUTABLES.contains(source.getClass()); 528 } 529 530 /** 531 * Clones T if we can; otherwise returns null. 532 * 533 * @param <T> 534 * @param source 535 * @return 536 */ 537 @SuppressWarnings("unchecked") clone(T source)538 private static <T> T clone(T source) { 539 final Class<? extends Object> class1 = source.getClass(); 540 try { 541 final Method declaredMethod = class1.getDeclaredMethod("clone", (Class<?>) null); 542 return (T) declaredMethod.invoke(source, (Object) null); 543 } catch (Exception e) { 544 } 545 try { 546 final Constructor<? extends Object> declaredMethod = class1.getConstructor((Class<?>) null); 547 return (T) declaredMethod.newInstance((Object) null); 548 } catch (Exception e) { 549 } 550 return null; // uncloneable 551 } 552 553 /** 554 * Appends two strings, inserting separator if either is empty 555 */ joinWithSeparation(String a, String separator, String b)556 public static String joinWithSeparation(String a, String separator, String b) { 557 if (a.length() == 0) return b; 558 if (b.length() == 0) return a; 559 return a + separator + b; 560 } 561 562 /** 563 * Appends two strings, inserting separator if either is empty. Modifies first map 564 */ joinWithSeparation(Map<String, String> a, String separator, Map<String, String> b)565 public static Map<String, String> joinWithSeparation(Map<String, String> a, String separator, Map<String, String> b) { 566 for (Iterator<String> it = b.keySet().iterator(); it.hasNext();) { 567 String key = it.next(); 568 String bvalue = b.get(key); 569 String avalue = a.get(key); 570 if (avalue != null) { 571 if (avalue.trim().equals(bvalue.trim())) continue; 572 bvalue = joinWithSeparation(avalue, separator, bvalue); 573 } 574 a.put(key, bvalue); 575 } 576 return a; 577 } 578 join(Collection<T> c, String separator)579 public static <T> String join(Collection<T> c, String separator) { 580 return join(c, separator, null); 581 } 582 join(Object[] c, String separator)583 public static String join(Object[] c, String separator) { 584 return join(c, separator, null); 585 } 586 join(Collection<T> c, String separator, Transform<T, String> transform)587 public static <T> String join(Collection<T> c, String separator, Transform<T, String> transform) { 588 StringBuffer output = new StringBuffer(); 589 boolean isFirst = true; 590 for (T item : c) { 591 if (isFirst) { 592 isFirst = false; 593 } else { 594 output.append(separator); 595 } 596 output.append(transform != null ? transform.transform(item) : item == null ? item : item.toString()); 597 } 598 return output.toString(); 599 } 600 join(T[] c, String separator, Transform<T, String> transform)601 public static <T> String join(T[] c, String separator, Transform<T, String> transform) { 602 return join(Arrays.asList(c), separator, transform); 603 } 604 605 /** 606 * Utility like Arrays.asList() 607 */ 608 @SuppressWarnings("unchecked") asMap(Object[][] source, Map<K, V> target, boolean reverse)609 public static <K, V> Map<K, V> asMap(Object[][] source, Map<K, V> target, boolean reverse) { 610 int from = 0, to = 1; 611 if (reverse) { 612 from = 1; 613 to = 0; 614 } 615 for (int i = 0; i < source.length; ++i) { 616 if (source[i].length != 2) { 617 throw new IllegalArgumentException("Source must be array of pairs of strings: " 618 + Arrays.asList(source[i])); 619 } 620 target.put((K) source[i][from], (V) source[i][to]); 621 } 622 return target; 623 } 624 asMap(Object[][] source)625 public static <K, V> Map<K, V> asMap(Object[][] source) { 626 return asMap(source, new HashMap<K, V>(), false); 627 } 628 629 /** 630 * Returns the canonical name for a file. 631 */ getCanonicalName(String file)632 public static String getCanonicalName(String file) { 633 try { 634 return new File(file).getCanonicalPath(); 635 } catch (Exception e) { 636 return file; 637 } 638 } 639 640 /** 641 * Convert a UnicodeSet into a string that can be embedded into a Regex. Handles strings that are in the UnicodeSet, 642 * Supplementary ranges, and escaping 643 * 644 * @param source 645 * The source set 646 * @param escaper 647 * A transliterator that is used to escape the characters according to the requirements of the regex. 648 * @return 649 */ toRegex(UnicodeSet source)650 public static String toRegex(UnicodeSet source) { 651 return toRegex(source, null, false); 652 } 653 654 private static final Transliterator DEFAULT_REGEX_ESCAPER = Transliterator.createFromRules( 655 "foo", 656 "([ \\- \\\\ \\[ \\] ]) > '\\' $1 ;" 657 // + " ([:c:]) > &hex($1);" 658 + " ([[:control:][[:z:]&[:ascii:]]]) > &hex($1);", 659 Transliterator.FORWARD); 660 661 /** 662 * Convert a UnicodeSet into a string that can be embedded into a Regex. 663 * Handles strings that are in the UnicodeSet, Supplementary ranges, and 664 * escaping 665 * 666 * @param source 667 * The source set 668 * @param escaper 669 * A transliterator that is used to escape the characters according 670 * to the requirements of the regex. The default puts a \\ before [, -, 671 * \, and ], and converts controls and Ascii whitespace to hex. 672 * Alternatives can be supplied. Note that some Regex engines, 673 * including Java 1.5, don't really deal with escaped supplementaries 674 * well. 675 * @param onlyBmp 676 * Set to true if the Regex only accepts BMP characters. In that 677 * case, ranges of supplementary characters are converted to lists of 678 * ranges. For example, [\uFFF0-\U0010000F \U0010100F-\U0010300F] 679 * converts into: 680 * 681 * <pre> 682 * [\uD800][\uDC00-\uDFFF] 683 * [\uD801-\uDBBF][\uDC00-\uDFFF] 684 * [\uDBC0][\uDC00-\uDC0F] 685 * </pre> 686 * 687 * and 688 * 689 * <pre> 690 * [\uDBC4][\uDC0F-\uDFFF] 691 * [\uDBC5-\uDBCB][\uDC00-\uDFFF] 692 * [\uDBCC][\uDC00-\uDC0F] 693 * </pre> 694 * 695 * These are then coalesced into a list of alternatives by sharing 696 * parts where feasible. For example, the above turns into 3 pairs of ranges: 697 * 698 * <pre> 699 * [\uDBC0\uDBCC][\uDC00-\uDC0F]|\uDBC4[\uDC0F-\uDFFF]|[\uD800-\uDBBF\uDBC5-\uDBCB][\uDC00-\uDFFF] 700 * </pre> 701 * 702 * @return escaped string. Something like [a-z] or (?:[a-m]|{zh}) if there is 703 * a string zh in the set, or a more complicated case for 704 * supplementaries. <br> 705 * Special cases: [] returns "", single item returns a string 706 * (escaped), like [a] => "a", or [{abc}] => "abc"<br> 707 * Supplementaries are handled specially, as described under onlyBmp. 708 */ toRegex(UnicodeSet source, Transliterator escaper, boolean onlyBmp)709 public static String toRegex(UnicodeSet source, Transliterator escaper, boolean onlyBmp) { 710 if (escaper == null) { 711 escaper = DEFAULT_REGEX_ESCAPER; 712 } 713 UnicodeSetIterator it = new UnicodeSetIterator(source); 714 // if there is only one item, return it 715 if (source.size() == 0) { 716 return ""; 717 } 718 if (source.size() == 1) { 719 it.next(); 720 return escaper.transliterate(it.getString()); 721 } 722 // otherwise, we figure out what is in the set, and will return 723 StringBuilder base = new StringBuilder("["); 724 StringBuilder alternates = new StringBuilder(); 725 Map<UnicodeSet, UnicodeSet> lastToFirst = new TreeMap<UnicodeSet, UnicodeSet>(new UnicodeSetComparator()); 726 int alternateCount = 0; 727 while (it.nextRange()) { 728 if (it.codepoint == UnicodeSetIterator.IS_STRING) { 729 ++alternateCount; 730 alternates.append('|').append(escaper.transliterate(it.string)); 731 } else if (!onlyBmp || it.codepointEnd <= 0xFFFF) { // BMP 732 addBmpRange(it.codepoint, it.codepointEnd, escaper, base); 733 } else { // supplementary 734 if (it.codepoint <= 0xFFFF) { 735 addBmpRange(it.codepoint, 0xFFFF, escaper, base); 736 it.codepoint = 0x10000; // reset the range 737 } 738 // this gets a bit ugly; we are trying to minimize the extra ranges for supplementaries 739 // we do this by breaking up X-Y based on the Lead and Trail values for X and Y 740 // Lx [Tx - Ty]) (if Lx == Ly) 741 // Lx [Tx - DFFF] | Ly [DC00-Ty] (if Lx == Ly - 1) 742 // Lx [Tx - DFFF] | [Lx+1 - Ly-1][DC00-DFFF] | Ly [DC00-Ty] (otherwise) 743 int leadX = UTF16.getLeadSurrogate(it.codepoint); 744 int trailX = UTF16.getTrailSurrogate(it.codepoint); 745 int leadY = UTF16.getLeadSurrogate(it.codepointEnd); 746 int trailY = UTF16.getTrailSurrogate(it.codepointEnd); 747 if (leadX == leadY) { 748 addSupplementalRange(leadX, leadX, trailX, trailY, escaper, lastToFirst); 749 } else { 750 addSupplementalRange(leadX, leadX, trailX, 0xDFFF, escaper, lastToFirst); 751 if (leadX != leadY - 1) { 752 addSupplementalRange(leadX + 1, leadY - 1, 0xDC00, 0xDFFF, escaper, lastToFirst); 753 } 754 addSupplementalRange(leadY, leadY, 0xDC00, trailY, escaper, lastToFirst); 755 } 756 } 757 } 758 // add in the supplementary ranges 759 if (lastToFirst.size() != 0) { 760 for (UnicodeSet last : lastToFirst.keySet()) { 761 ++alternateCount; 762 alternates.append('|').append(toRegex(lastToFirst.get(last), escaper, onlyBmp)) 763 .append(toRegex(last, escaper, onlyBmp)); 764 } 765 } 766 // Return the output. We separate cases in order to get the minimal extra apparatus 767 base.append("]"); 768 if (alternateCount == 0) { 769 return base.toString(); 770 } else if (base.length() > 2) { 771 return "(?:" + base + "|" + alternates.substring(1) + ")"; 772 } else if (alternateCount == 1) { 773 return alternates.substring(1); 774 } else { 775 return "(?:" + alternates.substring(1) + ")"; 776 } 777 } 778 addSupplementalRange(int leadX, int leadY, int trailX, int trailY, Transliterator escaper, Map<UnicodeSet, UnicodeSet> lastToFirst)779 private static void addSupplementalRange(int leadX, int leadY, int trailX, int trailY, Transliterator escaper, 780 Map<UnicodeSet, UnicodeSet> lastToFirst) { 781 System.out.println("\tadding: " + new UnicodeSet(leadX, leadY) + "\t" + new UnicodeSet(trailX, trailY)); 782 UnicodeSet last = new UnicodeSet(trailX, trailY); 783 UnicodeSet first = lastToFirst.get(last); 784 if (first == null) { 785 lastToFirst.put(last, first = new UnicodeSet()); 786 } 787 first.add(leadX, leadY); 788 } 789 addBmpRange(int start, int limit, Transliterator escaper, StringBuilder base)790 private static void addBmpRange(int start, int limit, Transliterator escaper, StringBuilder base) { 791 base.append(escaper.transliterate(UTF16.valueOf(start))); 792 if (start != limit) { 793 base.append("-").append(escaper.transliterate(UTF16.valueOf(limit))); 794 } 795 } 796 797 public static class UnicodeSetComparator implements Comparator<UnicodeSet> { compare(UnicodeSet o1, UnicodeSet o2)798 public int compare(UnicodeSet o1, UnicodeSet o2) { 799 return o1.compareTo(o2); 800 } 801 } 802 803 public static class CollectionComparator<T extends Comparable<T>> implements Comparator<Collection<T>> { compare(Collection<T> o1, Collection<T> o2)804 public int compare(Collection<T> o1, Collection<T> o2) { 805 return UnicodeSet.compare(o1, o2, UnicodeSet.ComparisonStyle.SHORTER_FIRST); 806 } 807 } 808 809 public static class ComparableComparator<T extends Comparable<T>> implements Comparator<T> { compare(T arg0, T arg1)810 public int compare(T arg0, T arg1) { 811 return Utility.checkCompare(arg0, arg1); 812 } 813 } 814 815 @SuppressWarnings({ "rawtypes", "unchecked" }) addTreeMapChain(Map coverageData, Object... objects)816 public static void addTreeMapChain(Map coverageData, Object... objects) { 817 Map<Object, Object> base = coverageData; 818 for (int i = 0; i < objects.length - 2; ++i) { 819 Map<Object, Object> nextOne = (Map<Object, Object>) base.get(objects[i]); 820 if (nextOne == null) base.put(objects[i], nextOne = new TreeMap<Object, Object>()); 821 base = nextOne; 822 } 823 base.put(objects[objects.length - 2], objects[objects.length - 1]); 824 } 825 826 public static abstract class CollectionTransform<S, T> implements Transform<S, T> { transform(S source)827 public abstract T transform(S source); 828 transform(Collection<S> input, Collection<T> output)829 public Collection<T> transform(Collection<S> input, Collection<T> output) { 830 return CldrUtility.transform(input, this, output); 831 } 832 transform(Collection<S> input)833 public Collection<T> transform(Collection<S> input) { 834 return transform(input, new ArrayList<T>()); 835 } 836 } 837 transform(SC source, Transform<S, T> transform, TC target)838 public static <S, T, SC extends Collection<S>, TC extends Collection<T>> TC transform(SC source, Transform<S, T> transform, TC target) { 839 for (S sourceItem : source) { 840 T targetItem = transform.transform(sourceItem); 841 if (targetItem != null) { 842 target.add(targetItem); 843 } 844 } 845 return target; 846 } 847 transform( SM source, Transform<SK, TK> transformKey, Transform<SV, TV> transformValue, TM target)848 public static <SK, SV, TK, TV, SM extends Map<SK, SV>, TM extends Map<TK, TV>> TM transform( 849 SM source, Transform<SK, TK> transformKey, Transform<SV, TV> transformValue, TM target) { 850 for (Entry<SK, SV> sourceEntry : source.entrySet()) { 851 TK targetKey = transformKey.transform(sourceEntry.getKey()); 852 TV targetValue = transformValue.transform(sourceEntry.getValue()); 853 if (targetKey != null && targetValue != null) { 854 target.put(targetKey, targetValue); 855 } 856 } 857 return target; 858 } 859 860 public static abstract class Apply<T> { apply(T item)861 public abstract void apply(T item); 862 applyTo(U collection)863 public <U extends Collection<T>> void applyTo(U collection) { 864 for (T item : collection) { 865 apply(item); 866 } 867 } 868 } 869 870 public static abstract class Filter<T> { 871 contains(T item)872 public abstract boolean contains(T item); 873 retainAll(U c)874 public <U extends Collection<T>> U retainAll(U c) { 875 for (Iterator<T> it = c.iterator(); it.hasNext();) { 876 if (!contains(it.next())) it.remove(); 877 } 878 return c; 879 } 880 extractMatches(U c, U target)881 public <U extends Collection<T>> U extractMatches(U c, U target) { 882 for (Iterator<T> it = c.iterator(); it.hasNext();) { 883 T item = it.next(); 884 if (contains(item)) { 885 target.add(item); 886 } 887 } 888 return target; 889 } 890 removeAll(U c)891 public <U extends Collection<T>> U removeAll(U c) { 892 for (Iterator<T> it = c.iterator(); it.hasNext();) { 893 if (contains(it.next())) it.remove(); 894 } 895 return c; 896 } 897 extractNonMatches(U c, U target)898 public <U extends Collection<T>> U extractNonMatches(U c, U target) { 899 for (Iterator<T> it = c.iterator(); it.hasNext();) { 900 T item = it.next(); 901 if (!contains(item)) { 902 target.add(item); 903 } 904 } 905 return target; 906 } 907 } 908 909 public static class MatcherFilter<T> extends Filter<T> { 910 private Matcher matcher; 911 MatcherFilter(String pattern)912 public MatcherFilter(String pattern) { 913 this.matcher = PatternCache.get(pattern).matcher(""); 914 } 915 MatcherFilter(Matcher matcher)916 public MatcherFilter(Matcher matcher) { 917 this.matcher = matcher; 918 } 919 set(Matcher matcher)920 public MatcherFilter<T> set(Matcher matcher) { 921 this.matcher = matcher; 922 return this; 923 } 924 set(String pattern)925 public MatcherFilter<T> set(String pattern) { 926 this.matcher = PatternCache.get(pattern).matcher(""); 927 return this; 928 } 929 contains(T o)930 public boolean contains(T o) { 931 return matcher.reset(o.toString()).matches(); 932 } 933 } 934 935 // static final class HandlingTransform implements Transform<String, Handling> { 936 // @Override 937 // public Handling transform(String source) { 938 // return Handling.valueOf(source); 939 // } 940 // } 941 942 public static final class PairComparator<K extends Comparable<K>, V extends Comparable<V>> implements java.util.Comparator<Pair<K, V>> { 943 944 private Comparator<K> comp1; 945 private Comparator<V> comp2; 946 PairComparator(Comparator<K> comp1, Comparator<V> comp2)947 public PairComparator(Comparator<K> comp1, Comparator<V> comp2) { 948 this.comp1 = comp1; 949 this.comp2 = comp2; 950 } 951 952 @Override compare(Pair<K, V> o1, Pair<K, V> o2)953 public int compare(Pair<K, V> o1, Pair<K, V> o2) { 954 { 955 K o1First = o1.getFirst(); 956 K o2First = o2.getFirst(); 957 int diff = o1First == null ? (o2First == null ? 0 : -1) 958 : o2First == null ? 1 959 : comp1 == null ? o1First.compareTo(o2First) 960 : comp1.compare(o1First, o2First); 961 if (diff != 0) { 962 return diff; 963 } 964 } 965 V o1Second = o1.getSecond(); 966 V o2Second = o2.getSecond(); 967 return o1Second == null ? (o2Second == null ? 0 : -1) 968 : o2Second == null ? 1 969 : comp2 == null ? o1Second.compareTo(o2Second) 970 : comp2.compare(o1Second, o2Second); 971 } 972 973 } 974 975 /** 976 * Fetch data from jar 977 * 978 * @param name 979 * a name residing in the org/unicode/cldr/util/data/ directory, or loading from a jar will break. 980 */ getUTF8Data(String name)981 public static BufferedReader getUTF8Data(String name) { 982 if (new File(name).isAbsolute()) { 983 throw new IllegalArgumentException( 984 "Path must be relative to org/unicode/cldr/util/data such as 'file.txt' or 'casing/file.txt', but got '" 985 + name + "'."); 986 } 987 988 return FileReaders.openFile(CldrUtility.class, "data/" + name); 989 } 990 991 /** 992 * Fetch data from jar 993 * 994 * @param name 995 * a name residing in the org/unicode/cldr/util/data/ directory, or loading from a jar will break. 996 */ getInputStream(String name)997 public static InputStream getInputStream(String name) { 998 if (new File(name).isAbsolute()) { 999 throw new IllegalArgumentException( 1000 "Path must be relative to org/unicode/cldr/util/data such as 'file.txt' or 'casing/file.txt', but got '" 1001 + name + "'."); 1002 } 1003 return getInputStream(CldrUtility.class, "data/" + name); 1004 } 1005 1006 @SuppressWarnings("resource") getInputStream(Class<?> callingClass, String relativePath)1007 public static InputStream getInputStream(Class<?> callingClass, String relativePath) { 1008 InputStream is = callingClass.getResourceAsStream(relativePath); 1009 // add buffering 1010 return InputStreamFactory.buffer(is); 1011 } 1012 1013 /** 1014 * Takes a Map that goes from Object to Set, and fills in the transpose 1015 * 1016 * @param source_key_valueSet 1017 * @param output_value_key 1018 */ putAllTransposed(Map<Object, Set<Object>> source_key_valueSet, Map<Object, Object> output_value_key)1019 public static void putAllTransposed(Map<Object, Set<Object>> source_key_valueSet, Map<Object, Object> output_value_key) { 1020 for (Iterator<Object> it = source_key_valueSet.keySet().iterator(); it.hasNext();) { 1021 Object key = it.next(); 1022 Set<Object> values = source_key_valueSet.get(key); 1023 for (Iterator<Object> it2 = values.iterator(); it2.hasNext();) { 1024 Object value = it2.next(); 1025 output_value_key.put(value, key); 1026 } 1027 } 1028 } 1029 countInstances(String source, String substring)1030 public static int countInstances(String source, String substring) { 1031 int count = 0; 1032 int pos = 0; 1033 while (true) { 1034 pos = source.indexOf(substring, pos) + 1; 1035 if (pos <= 0) break; 1036 count++; 1037 } 1038 return count; 1039 } 1040 registerTransliteratorFromFile(String id, String dir, String filename)1041 public static void registerTransliteratorFromFile(String id, String dir, String filename) { 1042 registerTransliteratorFromFile(id, dir, filename, Transliterator.FORWARD, true); 1043 registerTransliteratorFromFile(id, dir, filename, Transliterator.REVERSE, true); 1044 } 1045 registerTransliteratorFromFile(String id, String dir, String filename, int direction, boolean reverseID)1046 public static void registerTransliteratorFromFile(String id, String dir, String filename, int direction, 1047 boolean reverseID) { 1048 if (filename == null) { 1049 filename = id.replace('-', '_'); 1050 filename = filename.replace('/', '_'); 1051 filename += ".txt"; 1052 } 1053 String rules = getText(dir, filename); 1054 Transliterator t; 1055 int pos = id.indexOf('-'); 1056 String rid; 1057 if (pos < 0) { 1058 rid = id + "-Any"; 1059 id = "Any-" + id; 1060 } else { 1061 rid = id.substring(pos + 1) + "-" + id.substring(0, pos); 1062 } 1063 if (!reverseID) rid = id; 1064 1065 if (direction == Transliterator.FORWARD) { 1066 Transliterator.unregister(id); 1067 t = Transliterator.createFromRules(id, rules, Transliterator.FORWARD); 1068 Transliterator.registerInstance(t); 1069 System.out.println("Registered new Transliterator: " + id); 1070 } 1071 1072 /* 1073 * String test = "\u049A\u0430\u0437\u0430\u049B"; 1074 * System.out.println(t.transliterate(test)); 1075 * t = Transliterator.getInstance(id); 1076 * System.out.println(t.transliterate(test)); 1077 */ 1078 1079 if (direction == Transliterator.REVERSE) { 1080 Transliterator.unregister(rid); 1081 t = Transliterator.createFromRules(rid, rules, Transliterator.REVERSE); 1082 Transliterator.registerInstance(t); 1083 System.out.println("Registered new Transliterator: " + rid); 1084 } 1085 } 1086 getText(String dir, String filename)1087 public static String getText(String dir, String filename) { 1088 try { 1089 BufferedReader br = FileUtilities.openUTF8Reader(dir, filename); 1090 StringBuffer buffer = new StringBuffer(); 1091 while (true) { 1092 String line = br.readLine(); 1093 if (line == null) break; 1094 if (line.length() > 0 && line.charAt(0) == '\uFEFF') line = line.substring(1); 1095 if (line.startsWith("//")) continue; 1096 buffer.append(line).append(CldrUtility.LINE_SEPARATOR); 1097 } 1098 br.close(); 1099 String rules = buffer.toString(); 1100 return rules; 1101 } catch (IOException e) { 1102 throw (IllegalArgumentException) new IllegalArgumentException("Can't open " + dir + ", " + filename) 1103 .initCause(e); 1104 } 1105 } 1106 callMethod(String methodNames, Class<?> cls)1107 public static void callMethod(String methodNames, Class<?> cls) { 1108 for (String methodName : methodNames.split(",")) { 1109 try { 1110 Method method; 1111 try { 1112 method = cls.getMethod(methodName, (Class[]) null); 1113 try { 1114 method.invoke(null, (Object[]) null); 1115 } catch (Exception e) { 1116 e.printStackTrace(); 1117 } 1118 } catch (Exception e) { 1119 System.out.println("No such method: " + methodName); 1120 showMethods(cls); 1121 } 1122 } catch (ClassNotFoundException e) { 1123 e.printStackTrace(); 1124 } 1125 } 1126 } 1127 showMethods(Class<?> cls)1128 public static void showMethods(Class<?> cls) throws ClassNotFoundException { 1129 System.out.println("Possible methods of " + cls.getCanonicalName() + " are: "); 1130 Method[] methods = cls.getMethods(); 1131 Set<String> names = new TreeSet<String>(); 1132 for (int i = 0; i < methods.length; ++i) { 1133 if (methods[i].getGenericParameterTypes().length != 0) continue; 1134 //int mods = methods[i].getModifiers(); 1135 // if (!Modifier.isStatic(mods)) continue; 1136 String name = methods[i].getName(); 1137 names.add(name); 1138 } 1139 for (Iterator<String> it = names.iterator(); it.hasNext();) { 1140 System.out.println("\t" + it.next()); 1141 } 1142 } 1143 1144 /** 1145 * Breaks lines if they are too long, or if matcher.group(1) != last. Only breaks just before matcher. 1146 * 1147 * @param input 1148 * @param separator 1149 * @param matcher 1150 * must match each possible item. The first group is significant; if different, will cause break 1151 * @return 1152 */ breakLines(CharSequence input, String separator, Matcher matcher, int width)1153 static public String breakLines(CharSequence input, String separator, Matcher matcher, int width) { 1154 StringBuffer output = new StringBuffer(); 1155 String lastPrefix = ""; 1156 int lastEnd = 0; 1157 int lastBreakPos = 0; 1158 matcher.reset(input); 1159 while (true) { 1160 boolean match = matcher.find(); 1161 if (!match) { 1162 output.append(input.subSequence(lastEnd, input.length())); 1163 break; 1164 } 1165 String prefix = matcher.group(1); 1166 if (!prefix.equalsIgnoreCase(lastPrefix) || matcher.end() - lastBreakPos > width) { // break before? 1167 output.append(separator); 1168 lastBreakPos = lastEnd; 1169 } else if (lastEnd != 0) { 1170 output.append(' '); 1171 } 1172 output.append(input.subSequence(lastEnd, matcher.end()).toString().trim()); 1173 lastEnd = matcher.end(); 1174 lastPrefix = prefix; 1175 } 1176 return output.toString(); 1177 } 1178 showOptions(String[] args)1179 public static void showOptions(String[] args) { 1180 // Properties props = System.getProperties(); 1181 System.out.println("Arguments: " + join(args, " ")); // + (props == null ? "" : " " + props)); 1182 } 1183 roundToDecimals(double input, int places)1184 public static double roundToDecimals(double input, int places) { 1185 double log10 = Math.log10(input); // 15000 => 4.xxx 1186 double intLog10 = Math.floor(log10); 1187 double scale = Math.pow(10, intLog10 - places + 1); 1188 double factored = Math.round(input / scale) * scale; 1189 // System.out.println("###\t" +input + "\t" + factored); 1190 return factored; 1191 } 1192 1193 /** 1194 * Get a property value, returning the value if there is one (eg -Dkey=value), 1195 * otherwise the default value (for either empty or null). 1196 * 1197 * @param key 1198 * @param valueIfNull 1199 * @param valueIfEmpty 1200 * @return 1201 */ getProperty(String key, String defaultValue)1202 public static String getProperty(String key, String defaultValue) { 1203 return getProperty(key, defaultValue, defaultValue); 1204 } 1205 1206 /** 1207 * Get a property value, returning the value if there is one, otherwise null. 1208 */ getProperty(String key)1209 public static String getProperty(String key) { 1210 return getProperty(key, null, null); 1211 } 1212 1213 /** 1214 * Get a property value, returning the value if there is one (eg -Dkey=value), 1215 * the valueIfEmpty if there is one with no value (eg -Dkey) and the valueIfNull 1216 * if there is no property. 1217 * 1218 * @param key 1219 * @param valueIfNull 1220 * @param valueIfEmpty 1221 * @return 1222 */ getProperty(String key, String valueIfNull, String valueIfEmpty)1223 public static String getProperty(String key, String valueIfNull, String valueIfEmpty) { 1224 String result = CLDRConfig.getInstance().getProperty(key); 1225 if (result == null) { 1226 result = valueIfNull; 1227 } else if (result.length() == 0) { 1228 result = valueIfEmpty; 1229 } 1230 return result; 1231 } 1232 hex(byte[] bytes, int start, int end, String separator)1233 public static String hex(byte[] bytes, int start, int end, String separator) { 1234 StringBuilder result = new StringBuilder(); 1235 for (int i = 0; i < end; ++i) { 1236 if (result.length() != 0) { 1237 result.append(separator); 1238 } 1239 result.append(Utility.hex(bytes[i] & 0xFF, 2)); 1240 } 1241 return result.toString(); 1242 } 1243 getProperty(String string, boolean b)1244 public static boolean getProperty(String string, boolean b) { 1245 return getProperty(string, b ? "true" : "false", "true").matches("(?i)T|TRUE"); 1246 } 1247 checkValidDirectory(String sourceDirectory)1248 public static String checkValidDirectory(String sourceDirectory) { 1249 return checkValidFile(sourceDirectory, true, null); 1250 } 1251 checkValidDirectory(String sourceDirectory, String correction)1252 public static String checkValidDirectory(String sourceDirectory, String correction) { 1253 return checkValidFile(sourceDirectory, true, correction); 1254 } 1255 checkValidFile(String sourceDirectory, boolean checkForDirectory, String correction)1256 public static String checkValidFile(String sourceDirectory, boolean checkForDirectory, String correction) { 1257 File file = null; 1258 String canonicalPath = null; 1259 try { 1260 file = new File(sourceDirectory); 1261 canonicalPath = file.getCanonicalPath() + File.separatorChar; 1262 } catch (Exception e) { 1263 } 1264 if (file == null || canonicalPath == null || checkForDirectory && !file.isDirectory()) { 1265 throw new RuntimeException("Directory not found: " + sourceDirectory 1266 + (canonicalPath == null ? "" : " => " + canonicalPath) 1267 + (correction == null ? "" : CldrUtility.LINE_SEPARATOR + correction)); 1268 } 1269 return canonicalPath; 1270 } 1271 1272 /** 1273 * Copy up to matching line (not included). If output is null, then just skip until. 1274 * 1275 * @param oldFile 1276 * file to copy 1277 * @param readUntilPattern 1278 * pattern to search for. If null, goes to end of file. 1279 * @param output 1280 * into to copy into. If null, just skips in the input. 1281 * @param includeMatchingLine 1282 * inclde the matching line when copying. 1283 * @throws IOException 1284 */ copyUpTo(BufferedReader oldFile, final Pattern readUntilPattern, final PrintWriter output, boolean includeMatchingLine)1285 public static void copyUpTo(BufferedReader oldFile, final Pattern readUntilPattern, 1286 final PrintWriter output, boolean includeMatchingLine) throws IOException { 1287 Matcher readUntil = readUntilPattern == null ? null : readUntilPattern.matcher(""); 1288 while (true) { 1289 String line = oldFile.readLine(); 1290 if (line == null) { 1291 break; 1292 } 1293 if (line.startsWith("\uFEFF")) { 1294 line = line.substring(1); 1295 } 1296 if (readUntil != null && readUntil.reset(line).matches()) { 1297 if (includeMatchingLine && output != null) { 1298 output.println(line); 1299 } 1300 break; 1301 } 1302 if (output != null) { 1303 output.println(line); 1304 } 1305 } 1306 } 1307 1308 private static DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss 'GMT'"); 1309 private static DateFormat DATE_ONLY = new SimpleDateFormat("yyyy-MM-dd"); 1310 static { 1311 df.setTimeZone(TimeZone.getTimeZone("GMT")); 1312 DATE_ONLY.setTimeZone(TimeZone.getTimeZone("GMT")); 1313 } 1314 isoFormat(Date date)1315 public static String isoFormat(Date date) { 1316 synchronized (df) { 1317 return df.format(date); 1318 } 1319 } 1320 isoFormatDateOnly(Date date)1321 public static String isoFormatDateOnly(Date date) { 1322 synchronized (DATE_ONLY) { 1323 return DATE_ONLY.format(date); 1324 } 1325 } 1326 newConcurrentHashMap()1327 public static <K, V> ConcurrentHashMap<K, V> newConcurrentHashMap() { 1328 // http://ria101.wordpress.com/2011/12/12/concurrenthashmap-avoid-a-common-misuse/ 1329 return new ConcurrentHashMap<K, V>(4, 0.9f, 1); 1330 } 1331 newConcurrentHashMap(Map<K, V> source)1332 public static <K, V> ConcurrentHashMap<K, V> newConcurrentHashMap(Map<K, V> source) { 1333 ConcurrentHashMap<K, V> result = newConcurrentHashMap(); 1334 result.putAll(source); 1335 return result; 1336 } 1337 equals(Object a, Object b)1338 public static boolean equals(Object a, Object b) { 1339 return a == b ? true 1340 : a == null || b == null ? false 1341 : a.equals(b); 1342 } 1343 getDoubleLink(String code)1344 public static String getDoubleLink(String code) { 1345 final String anchorSafe = TransliteratorUtilities.toHTML.transliterate(code).replace(" ", "_"); 1346 return "<a name='" + anchorSafe + "' href='#" + anchorSafe + "'>"; 1347 } 1348 getDoubleLinkedText(String anchor, String anchorText)1349 public static String getDoubleLinkedText(String anchor, String anchorText) { 1350 return getDoubleLink(anchor) + TransliteratorUtilities.toHTML.transliterate(anchorText).replace("_", " ") 1351 + "</a>"; 1352 } 1353 getDoubleLinkedText(String anchor)1354 public static String getDoubleLinkedText(String anchor) { 1355 return getDoubleLinkedText(anchor, anchor); 1356 } 1357 getDoubleLinkMsg()1358 public static String getDoubleLinkMsg() { 1359 return "<a name=''{0}'' href=''#{0}''>{0}</a>"; 1360 } 1361 getDoubleLinkMsg2()1362 public static String getDoubleLinkMsg2() { 1363 return "<a name=''{0}{1}'' href=''#{0}{1}''>{0}</a>"; 1364 } 1365 getCopyrightString()1366 public static String getCopyrightString() { 1367 // now do the rest 1368 return "Copyright \u00A9 1991-" + Calendar.getInstance().get(Calendar.YEAR) + " Unicode, Inc." + CldrUtility.LINE_SEPARATOR 1369 + "For terms of use, see http://www.unicode.org/copyright.html" + CldrUtility.LINE_SEPARATOR 1370 + "Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries." + CldrUtility.LINE_SEPARATOR 1371 + "CLDR data files are interpreted according to the LDML specification " + "(http://unicode.org/reports/tr35/)"; 1372 } 1373 1374 // TODO Move to collection utilities 1375 /** 1376 * Type-safe get 1377 * @param map 1378 * @param key 1379 * @return value 1380 */ get(M map, K key)1381 public static <K, V, M extends Map<K, V>> V get(M map, K key) { 1382 return map.get(key); 1383 } 1384 1385 /** 1386 * Type-safe contains 1387 * @param map 1388 * @param key 1389 * @return value 1390 */ contains(C collection, K key)1391 public static <K, C extends Collection<K>> boolean contains(C collection, K key) { 1392 return collection.contains(key); 1393 } 1394 toEnumSet(Class<E> classValue, Collection<String> stringValues)1395 public static <E extends Enum<E>> EnumSet<E> toEnumSet(Class<E> classValue, Collection<String> stringValues) { 1396 EnumSet<E> result = EnumSet.noneOf(classValue); 1397 for (String s : stringValues) { 1398 result.add(Enum.valueOf(classValue, s)); 1399 } 1400 return result; 1401 } 1402 putNew(M map, K key, V value)1403 public static <K, V, M extends Map<K, V>> M putNew(M map, K key, V value) { 1404 if (!map.containsKey(key)) { 1405 map.put(key, value); 1406 } 1407 return map; 1408 } 1409 cleanSemiFields(String line)1410 public static String[] cleanSemiFields(String line) { 1411 line = cleanLine(line); 1412 return line.isEmpty() ? null : SEMI_SPLIT.split(line); 1413 } 1414 cleanLine(String line)1415 private static String cleanLine(String line) { 1416 int comment = line.indexOf("#"); 1417 if (comment >= 0) { 1418 line = line.substring(0, comment); 1419 } 1420 if (line.startsWith("\uFEFF")) { 1421 line = line.substring(1); 1422 } 1423 return line.trim(); 1424 } 1425 handleFile(String filename, LineHandler handler)1426 public static void handleFile(String filename, LineHandler handler) throws IOException { 1427 try (BufferedReader in = getUTF8Data(filename);) { 1428 String line = null; 1429 while ((line = in.readLine()) != null) { 1430 // String line = in.readLine(); 1431 // if (line == null) { 1432 // break; 1433 // } 1434 try { 1435 if (!handler.handle(line)) { 1436 if (HANDLEFILE_SHOW_SKIP) { 1437 System.out.println("Skipping line: " + line); 1438 } 1439 } 1440 } catch (Exception e) { 1441 throw (RuntimeException) new IllegalArgumentException("Problem with line: " + line) 1442 .initCause(e); 1443 } 1444 } 1445 } 1446 // in.close(); 1447 } 1448 ifNull(T x, T y)1449 public static <T> T ifNull(T x, T y) { 1450 return x == null 1451 ? y 1452 : x; 1453 } 1454 ifSame(T source, T replaceIfSame, T replacement)1455 public static <T> T ifSame(T source, T replaceIfSame, T replacement) { 1456 return source == replaceIfSame ? replacement : source; 1457 } 1458 ifEqual(T source, T replaceIfSame, T replacement)1459 public static <T> T ifEqual(T source, T replaceIfSame, T replacement) { 1460 return Objects.equals(source, replaceIfSame) ? replacement : source; 1461 } 1462 intersect(Set<T> a, Collection<T> b)1463 public static <T> Set<T> intersect(Set<T> a, Collection<T> b) { 1464 Set<T> result = new LinkedHashSet<>(a); 1465 result.retainAll(b); 1466 return result; 1467 } 1468 subtract(Set<T> a, Collection<T> b)1469 public static <T> Set<T> subtract(Set<T> a, Collection<T> b) { 1470 Set<T> result = new LinkedHashSet<>(a); 1471 result.removeAll(b); 1472 return result; 1473 } 1474 logRegexLookup(TestFmwk testFramework, RegexLookup<T> lookup, String toLookup)1475 public static <T> void logRegexLookup(TestFmwk testFramework, RegexLookup<T> lookup, String toLookup) { 1476 Output<String[]> arguments = new Output<>(); 1477 Output<Finder> matcherFound = new Output<>(); 1478 List<String> failures = new ArrayList<String>(); 1479 lookup.get(toLookup, null, arguments, matcherFound, failures); 1480 testFramework.logln("lookup arguments: " + (arguments.value == null ? "null" : Arrays.asList(arguments.value))); 1481 testFramework.logln("lookup matcherFound: " + matcherFound); 1482 for (String s : failures) { 1483 testFramework.logln(s); 1484 } 1485 } 1486 deepEquals(Object... pairs)1487 public static boolean deepEquals(Object... pairs) { 1488 for (int item = 0; item < pairs.length;) { 1489 if (!Objects.deepEquals(pairs[item++], pairs[item++])) { 1490 return false; 1491 } 1492 } 1493 return true; 1494 } 1495 array(Splitter splitter, String source)1496 public static String[] array(Splitter splitter, String source) { 1497 List<String> list = splitter.splitToList(source); 1498 return list.toArray(new String[list.size()]); 1499 } 1500 toHex(String in, boolean javaStyle)1501 public static String toHex(String in, boolean javaStyle) { 1502 StringBuilder result = new StringBuilder(); 1503 for (int i = 0; i < in.length(); ++i) { 1504 result.append(toHex(in.charAt(i), javaStyle)); 1505 } 1506 return result.toString(); 1507 } 1508 toHex(int j, boolean javaStyle)1509 public static String toHex(int j, boolean javaStyle) { 1510 if (j == '\"') { 1511 return "\\\""; 1512 } else if (j == '\\') { 1513 return "\\\\"; 1514 } else if (0x20 < j && j < 0x7F) { 1515 return String.valueOf((char) j); 1516 } 1517 final String hexString = Integer.toHexString(j).toUpperCase(); 1518 int gap = 4 - hexString.length(); 1519 if (gap < 0) { 1520 gap = 0; 1521 } 1522 String prefix = javaStyle ? "\\u" : "U+"; 1523 return prefix + "000".substring(0, gap) + hexString; 1524 } 1525 1526 /** 1527 * get string format for debugging, since Java has a useless display for many items 1528 * @param item 1529 * @return 1530 */ toString(Object item)1531 public static String toString(Object item) { 1532 if (item instanceof Object[]) { 1533 return toString(Arrays.asList((Object[]) item)); 1534 } else if (item instanceof Entry) { 1535 return toString(((Entry) item).getKey()) + "≔" + toString(((Entry) item).getValue()); 1536 } else if (item instanceof Map) { 1537 return "{" + toString(((Map) item).entrySet()) + "}"; 1538 } else if (item instanceof Collection) { 1539 List<String> result = new ArrayList<>(); 1540 for (Object subitem : (Collection) item) { 1541 result.add(toString(subitem)); 1542 } 1543 return result.toString(); 1544 } 1545 return item.toString(); 1546 } 1547 } 1548