1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2013, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Mark Davis 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.util; 10 11 import java.io.BufferedReader; 12 import java.io.File; 13 import java.io.FileReader; 14 import java.io.IOException; 15 import java.io.InputStream; 16 import java.io.InputStreamReader; 17 import java.io.PrintWriter; 18 import java.lang.reflect.Constructor; 19 import java.lang.reflect.Method; 20 import java.nio.charset.Charset; 21 import java.nio.file.Files; 22 import java.nio.file.Path; 23 import java.nio.file.Paths; 24 import java.util.ArrayList; 25 import java.util.Arrays; 26 import java.util.Calendar; 27 import java.util.Collection; 28 import java.util.Collections; 29 import java.util.Comparator; 30 import java.util.Date; 31 import java.util.EnumSet; 32 import java.util.HashMap; 33 import java.util.HashSet; 34 import java.util.Iterator; 35 import java.util.LinkedHashMap; 36 import java.util.LinkedHashSet; 37 import java.util.List; 38 import java.util.Map; 39 import java.util.Map.Entry; 40 import java.util.Objects; 41 import java.util.Set; 42 import java.util.SortedMap; 43 import java.util.SortedSet; 44 import java.util.TreeMap; 45 import java.util.TreeSet; 46 import java.util.concurrent.ConcurrentHashMap; 47 import java.util.regex.Matcher; 48 import java.util.regex.Pattern; 49 50 import org.unicode.cldr.draft.FileUtilities; 51 52 import com.google.common.base.Splitter; 53 import com.google.common.collect.ImmutableMap; 54 import com.google.common.collect.ImmutableMultimap; 55 import com.google.common.collect.Multimap; 56 import com.ibm.icu.impl.Utility; 57 import com.ibm.icu.text.DateFormat; 58 import com.ibm.icu.text.SimpleDateFormat; 59 import com.ibm.icu.text.Transform; 60 import com.ibm.icu.text.Transliterator; 61 import com.ibm.icu.text.UTF16; 62 import com.ibm.icu.text.UnicodeSet; 63 import com.ibm.icu.text.UnicodeSetIterator; 64 import com.ibm.icu.util.Freezable; 65 import com.ibm.icu.util.TimeZone; 66 67 public class CldrUtility { 68 69 public static final boolean DEBUG_MISSING_DIRECTORIES = false; 70 71 public static final Charset UTF8 = Charset.forName("utf-8"); 72 public static final boolean BETA = false; 73 74 public static final String LINE_SEPARATOR = "\n"; 75 public final static Pattern SEMI_SPLIT = PatternCache.get("\\s*;\\s*"); 76 77 private static final boolean HANDLEFILE_SHOW_SKIP = false; 78 // Constant for "∅∅∅". Indicates that a child locale has no value for a 79 // path even though a parent does. 80 public static final String NO_INHERITANCE_MARKER = new String(new char[] { 0x2205, 0x2205, 0x2205 }); 81 82 /** 83 * Define the constant INHERITANCE_MARKER for "↑↑↑", used by Survey Tool to indicate a "passthru" vote to the parent locale. 84 * If CLDRFile ever finds this value in a data field, writing of the field should be suppressed. 85 */ 86 public static final String INHERITANCE_MARKER = new String(new char[] { 0x2191, 0x2191, 0x2191 }); 87 88 public static final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze(); 89 90 /** 91 * Very simple class, used to replace variables in a string. For example 92 * <p> 93 * 94 * <pre> 95 * static VariableReplacer langTag = new VariableReplacer() 96 * .add("$alpha", "[a-zA-Z]") 97 * .add("$digit", "[0-9]") 98 * .add("$alphanum", "[a-zA-Z0-9]") 99 * .add("$x", "[xX]"); 100 * ... 101 * String langTagPattern = langTag.replace(...); 102 * </pre> 103 */ 104 public static class VariableReplacer { 105 // simple implementation for now 106 private Map<String, String> m = new TreeMap<>(Collections.reverseOrder()); 107 add(String variable, String value)108 public VariableReplacer add(String variable, String value) { 109 m.put(variable, value); 110 return this; 111 } 112 replace(String source)113 public String replace(String source) { 114 String oldSource; 115 do { 116 oldSource = source; 117 for (Iterator<String> it = m.keySet().iterator(); it.hasNext();) { 118 String variable = it.next(); 119 String value = m.get(variable); 120 source = replaceAll(source, variable, value); 121 } 122 } while (!source.equals(oldSource)); 123 return source; 124 } 125 replaceAll(String source, String key, String value)126 public String replaceAll(String source, String key, String value) { 127 while (true) { 128 int pos = source.indexOf(key); 129 if (pos < 0) return source; 130 source = source.substring(0, pos) + value + source.substring(pos + key.length()); 131 } 132 } 133 } 134 135 public interface LineHandler { 136 /** 137 * Return false if line was skipped 138 * 139 * @param line 140 * @return 141 */ handle(String line)142 boolean handle(String line) throws Exception; 143 } 144 getPath(String fileOrDir, String filename)145 public static String getPath(String fileOrDir, String filename) { 146 // Required for cases where a system property is read but not default is given. 147 // TODO: Fix callers to not fail silently if properties are missing. 148 if (fileOrDir == null) { 149 return null; 150 } 151 Path path = Paths.get(fileOrDir); 152 if (filename != null) { 153 path = path.resolve(filename); 154 } 155 if (DEBUG_MISSING_DIRECTORIES && !Files.exists(path)) { 156 System.err.println("Warning: directory doesn't exist: " + path); 157 } 158 return PathUtilities.getNormalizedPathString(path) + File.separatorChar; 159 } 160 getPath(String path)161 static String getPath(String path) { 162 return getPath(path, null); 163 } 164 165 public static final String ANALYTICS = "<script>\n" 166 + "var gaJsHost = ((\"https:\" == document.location.protocol) ? \"https://ssl.\" : \"http://www.\");\n" 167 + "document.write(unescape(\"%3Cscript src='\" + gaJsHost + \"google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E\"));\n" 168 + "</script>\n" 169 + "<script>\n" 170 + "try {\n" 171 + "var pageTracker = _gat._getTracker(\"UA-7672775-1\");\n" 172 + "pageTracker._trackPageview();\n" 173 + "} catch(err) {}</script>"; 174 175 public static final List<String> MINIMUM_LANGUAGES = Arrays.asList(new String[] { "ar", "en", "de", "fr", "hi", 176 "it", "es", "pt", "ru", "zh", "ja" }); // plus language itself 177 public static final List<String> MINIMUM_TERRITORIES = Arrays.asList(new String[] { "US", "GB", "DE", "FR", "IT", 178 "JP", "CN", "IN", "RU", "BR" }); 179 180 public interface LineComparer { 181 static final int LINES_DIFFERENT = -1, LINES_SAME = 0, SKIP_FIRST = 1, SKIP_SECOND = 2; 182 183 /** 184 * Returns LINES_DIFFERENT, LINES_SAME, or if one of the lines is ignorable, SKIP_FIRST or SKIP_SECOND 185 * 186 * @param line1 187 * @param line2 188 * @return 189 */ compare(String line1, String line2)190 int compare(String line1, String line2); 191 } 192 193 public static class SimpleLineComparator implements LineComparer { 194 public static final int TRIM = 1, SKIP_SPACES = 2, SKIP_EMPTY = 4, SKIP_CVS_TAGS = 8; 195 StringIterator si1 = new StringIterator(); 196 StringIterator si2 = new StringIterator(); 197 int flags; 198 SimpleLineComparator(int flags)199 public SimpleLineComparator(int flags) { 200 this.flags = flags; 201 } 202 203 @Override compare(String line1, String line2)204 public int compare(String line1, String line2) { 205 // first, see if we want to skip one or the other lines 206 int skipper = 0; 207 if (line1 == null) { 208 skipper = SKIP_FIRST; 209 } else { 210 if ((flags & TRIM) != 0) line1 = line1.trim(); 211 if ((flags & SKIP_EMPTY) != 0 && line1.length() == 0) skipper = SKIP_FIRST; 212 } 213 if (line2 == null) { 214 skipper = SKIP_SECOND; 215 } else { 216 if ((flags & TRIM) != 0) line2 = line2.trim(); 217 if ((flags & SKIP_EMPTY) != 0 && line2.length() == 0) skipper += SKIP_SECOND; 218 } 219 if (skipper != 0) { 220 if (skipper == SKIP_FIRST + SKIP_SECOND) return LINES_SAME; // ok, don't skip both 221 return skipper; 222 } 223 224 // check for null 225 if (line1 == null) { 226 if (line2 == null) return LINES_SAME; 227 return LINES_DIFFERENT; 228 } 229 if (line2 == null) { 230 return LINES_DIFFERENT; 231 } 232 233 // now check equality 234 if (line1.equals(line2)) return LINES_SAME; 235 236 // if not equal, see if we are skipping spaces 237 if ((flags & SKIP_CVS_TAGS) != 0) { 238 if (line1.indexOf('$') >= 0 && line2.indexOf('$') >= 0) { 239 line1 = stripTags(line1); 240 line2 = stripTags(line2); 241 if (line1.equals(line2)) return LINES_SAME; 242 } else if (line1.startsWith("<!DOCTYPE ldml SYSTEM \"../../common/dtd/") 243 && line2.startsWith("<!DOCTYPE ldml SYSTEM \"../../common/dtd/")) { 244 return LINES_SAME; 245 } 246 } 247 if ((flags & SKIP_SPACES) != 0 && si1.set(line1).matches(si2.set(line2))) return LINES_SAME; 248 return LINES_DIFFERENT; 249 } 250 251 // private Matcher dtdMatcher = PatternCache.get( 252 // "\\Q<!DOCTYPE ldml SYSTEM \"http://www.unicode.org/cldr/dtd/\\E.*\\Q/ldml.dtd\">\\E").matcher(""); 253 254 private String[] CVS_TAGS = { "Revision", "Date" }; 255 stripTags(String line)256 private String stripTags(String line) { 257 // $ 258 // Revision: 8994 $ 259 // $ 260 // Date: 2013-07-03 21:31:17 +0200 (Wed, 03 Jul 2013) $ 261 int pos = line.indexOf('$'); 262 if (pos < 0) return line; 263 pos++; 264 int endpos = line.indexOf('$', pos); 265 if (endpos < 0) return line; 266 for (int i = 0; i < CVS_TAGS.length; ++i) { 267 if (!line.startsWith(CVS_TAGS[i], pos)) continue; 268 line = line.substring(0, pos + CVS_TAGS[i].length()) + line.substring(endpos); 269 } 270 return line; 271 } 272 273 } 274 275 /** 276 * 277 * @param file1 278 * @param file2 279 * @param failureLines 280 * on input, String[2], on output, failing lines 281 * @param lineComparer 282 * @return 283 * @throws IOException 284 */ areFileIdentical(String file1, String file2, String[] failureLines, LineComparer lineComparer)285 public static boolean areFileIdentical(String file1, String file2, String[] failureLines, 286 LineComparer lineComparer) throws IOException { 287 try (BufferedReader br1 = new BufferedReader(new FileReader(file1), 32 * 1024); 288 BufferedReader br2 = new BufferedReader(new FileReader(file2), 32 * 1024);) { 289 String line1 = ""; 290 String line2 = ""; 291 int skip = 0; 292 293 while (true) { 294 if ((skip & LineComparer.SKIP_FIRST) == 0) line1 = br1.readLine(); 295 if ((skip & LineComparer.SKIP_SECOND) == 0) line2 = br2.readLine(); 296 if (line1 == null && line2 == null) return true; 297 if (line1 == null || line2 == null) { 298 // System.out.println("debug"); 299 } 300 skip = lineComparer.compare(line1, line2); 301 if (skip == LineComparer.LINES_DIFFERENT) { 302 break; 303 } 304 } 305 failureLines[0] = line1 != null ? line1 : "<end of file>"; 306 failureLines[1] = line2 != null ? line2 : "<end of file>"; 307 return false; 308 } 309 } 310 311 /* 312 * static String getLineWithoutFluff(BufferedReader br1, boolean first, int flags) throws IOException { 313 * while (true) { 314 * String line1 = br1.readLine(); 315 * if (line1 == null) return line1; 316 * if ((flags & TRIM)!= 0) line1 = line1.trim(); 317 * if ((flags & SKIP_EMPTY)!= 0 && line1.length() == 0) continue; 318 * return line1; 319 * } 320 * } 321 */ 322 323 public final static class StringIterator { 324 String string; 325 int position = 0; 326 next()327 char next() { 328 while (true) { 329 if (position >= string.length()) return '\uFFFF'; 330 char ch = string.charAt(position++); 331 if (ch != ' ' && ch != '\t') return ch; 332 } 333 } 334 reset()335 StringIterator reset() { 336 position = 0; 337 return this; 338 } 339 set(String string)340 StringIterator set(String string) { 341 this.string = string; 342 position = 0; 343 return this; 344 } 345 matches(StringIterator other)346 boolean matches(StringIterator other) { 347 while (true) { 348 char c1 = next(); 349 char c2 = other.next(); 350 if (c1 != c2) return false; 351 if (c1 == '\uFFFF') return true; 352 } 353 } 354 355 /** 356 * @return Returns the position. 357 */ getPosition()358 public int getPosition() { 359 return position; 360 } 361 } 362 splitArray(String source, char separator)363 public static String[] splitArray(String source, char separator) { 364 return splitArray(source, separator, false); 365 } 366 splitArray(String source, char separator, boolean trim)367 public static String[] splitArray(String source, char separator, boolean trim) { 368 List<String> piecesList = splitList(source, separator, trim); 369 String[] pieces = new String[piecesList.size()]; 370 piecesList.toArray(pieces); 371 return pieces; 372 } 373 splitCommaSeparated(String line)374 public static String[] splitCommaSeparated(String line) { 375 // items are separated by ',' 376 // each item is of the form abc... 377 // or "..." (required if a comma or quote is contained) 378 // " in a field is represented by "" 379 List<String> result = new ArrayList<>(); 380 StringBuilder item = new StringBuilder(); 381 boolean inQuote = false; 382 for (int i = 0; i < line.length(); ++i) { 383 char ch = line.charAt(i); // don't worry about supplementaries 384 switch (ch) { 385 case '"': 386 inQuote = !inQuote; 387 // at start or end, that's enough 388 // if get a quote when we are not in a quote, and not at start, then add it and return to inQuote 389 if (inQuote && item.length() != 0) { 390 item.append('"'); 391 inQuote = true; 392 } 393 break; 394 case ',': 395 if (!inQuote) { 396 result.add(item.toString()); 397 item.setLength(0); 398 } else { 399 item.append(ch); 400 } 401 break; 402 default: 403 item.append(ch); 404 break; 405 } 406 } 407 result.add(item.toString()); 408 return result.toArray(new String[result.size()]); 409 } 410 splitList(String source, char separator)411 public static List<String> splitList(String source, char separator) { 412 return splitList(source, separator, false, null); 413 } 414 splitList(String source, char separator, boolean trim)415 public static List<String> splitList(String source, char separator, boolean trim) { 416 return splitList(source, separator, trim, null); 417 } 418 splitList(String source, char separator, boolean trim, List<String> output)419 public static List<String> splitList(String source, char separator, boolean trim, List<String> output) { 420 return splitList(source, Character.toString(separator), trim, output); 421 } 422 splitList(String source, String separator)423 public static List<String> splitList(String source, String separator) { 424 return splitList(source, separator, false, null); 425 } 426 splitList(String source, String separator, boolean trim)427 public static List<String> splitList(String source, String separator, boolean trim) { 428 return splitList(source, separator, trim, null); 429 } 430 splitList(String source, String separator, boolean trim, List<String> output)431 public static List<String> splitList(String source, String separator, boolean trim, List<String> output) { 432 if (output == null) output = new ArrayList<>(); 433 if (source.length() == 0) return output; 434 int pos = 0; 435 do { 436 int npos = source.indexOf(separator, pos); 437 if (npos < 0) npos = source.length(); 438 String piece = source.substring(pos, npos); 439 if (trim) piece = piece.trim(); 440 output.add(piece); 441 pos = npos + 1; 442 } while (pos < source.length()); 443 return output; 444 } 445 446 /** 447 * Protect a collection (as much as Java lets us!) from modification. 448 * Really, really ugly code, since Java doesn't let us do better. 449 */ 450 @SuppressWarnings({ "rawtypes", "unchecked" }) protectCollection(T source)451 public static <T> T protectCollection(T source) { 452 // TODO - exclude UnmodifiableMap, Set, ... 453 if (source instanceof Map) { 454 Map<Object,Object> sourceMap = (Map) source; 455 ImmutableMap.Builder<Object,Object> builder = ImmutableMap.builder(); 456 for (Entry<Object,Object> entry : sourceMap.entrySet()) { 457 final Object key = entry.getKey(); 458 final Object value = entry.getValue(); 459 builder.put(protectCollection(key), protectCollection(value)); 460 } 461 return (T) builder.build(); 462 } else if (source instanceof Multimap) { 463 Multimap<Object,Object> sourceMap = (Multimap) source; 464 ImmutableMultimap.Builder<Object,Object> builder = ImmutableMultimap.builder(); 465 for (Entry<Object,Object> entry : sourceMap.entries()) { 466 builder.put(protectCollection(entry.getKey()), protectCollection(entry.getValue())); 467 } 468 return (T) builder.build(); 469 } else if (source instanceof Collection) { 470 // TODO use ImmutableSet, List, ... 471 Collection sourceCollection = (Collection) source; 472 Collection<Object> resultCollection = clone(sourceCollection); 473 if (resultCollection == null) return (T) sourceCollection; // failed 474 resultCollection.clear(); 475 476 for (Object item : sourceCollection) { 477 resultCollection.add(protectCollection(item)); 478 } 479 480 return sourceCollection instanceof List ? (T) Collections.unmodifiableList((List) sourceCollection) 481 : sourceCollection instanceof SortedSet ? (T) Collections 482 .unmodifiableSortedSet((SortedSet) sourceCollection) 483 : sourceCollection instanceof Set ? (T) Collections.unmodifiableSet((Set) sourceCollection) 484 : (T) Collections.unmodifiableCollection(sourceCollection); 485 } else if (source instanceof Freezable) { 486 Freezable freezableSource = (Freezable) source; 487 if (freezableSource.isFrozen()) return source; 488 return (T) ((Freezable) (freezableSource.cloneAsThawed())).freeze(); 489 } else { 490 return source; // can't protect 491 } 492 } 493 494 /** 495 * Protect a collections where we don't need to clone. 496 * @param source 497 * @return 498 */ 499 @SuppressWarnings({ "rawtypes", "unchecked" }) protectCollectionX(T source)500 public static <T> T protectCollectionX(T source) { 501 // TODO - exclude UnmodifiableMap, Set, ... 502 if (isImmutable(source)) { 503 return source; 504 } 505 if (source instanceof Map) { 506 Map sourceMap = (Map) source; 507 // recurse 508 LinkedHashMap tempMap = new LinkedHashMap<>(sourceMap); // copy contents 509 sourceMap.clear(); 510 for (Object key : tempMap.keySet()) { 511 sourceMap.put(protectCollection(key), protectCollectionX(tempMap.get(key))); 512 } 513 return sourceMap instanceof SortedMap ? (T) Collections.unmodifiableSortedMap((SortedMap) sourceMap) 514 : (T) Collections.unmodifiableMap(sourceMap); 515 } else if (source instanceof Collection) { 516 Collection sourceCollection = (Collection) source; 517 LinkedHashSet tempSet = new LinkedHashSet<>(sourceCollection); // copy contents 518 519 sourceCollection.clear(); 520 for (Object item : tempSet) { 521 sourceCollection.add(protectCollectionX(item)); 522 } 523 524 return sourceCollection instanceof List ? (T) Collections.unmodifiableList((List) sourceCollection) 525 : sourceCollection instanceof SortedSet ? (T) Collections 526 .unmodifiableSortedSet((SortedSet) sourceCollection) 527 : sourceCollection instanceof Set ? (T) Collections.unmodifiableSet((Set) sourceCollection) 528 : (T) Collections.unmodifiableCollection(sourceCollection); 529 } else if (source instanceof Freezable) { 530 Freezable freezableSource = (Freezable) source; 531 return (T) freezableSource.freeze(); 532 } else { 533 throw new IllegalArgumentException("Can’t protect: " + source.getClass().toString()); 534 } 535 } 536 537 private static final Set<Object> KNOWN_IMMUTABLES = new HashSet<>(Arrays.asList( 538 String.class)); 539 isImmutable(Object source)540 public static boolean isImmutable(Object source) { 541 return source == null 542 || source instanceof Enum 543 || source instanceof Number 544 || KNOWN_IMMUTABLES.contains(source.getClass()); 545 } 546 547 /** 548 * Clones T if we can; otherwise returns null. 549 * 550 * @param <T> 551 * @param source 552 * @return 553 */ 554 @SuppressWarnings("unchecked") clone(T source)555 private static <T> T clone(T source) { 556 final Class<? extends Object> class1 = source.getClass(); 557 try { 558 final Method declaredMethod = class1.getDeclaredMethod("clone", (Class<?>) null); 559 return (T) declaredMethod.invoke(source, (Object) null); 560 } catch (Exception e) { 561 } 562 try { 563 final Constructor<? extends Object> declaredMethod = class1.getConstructor((Class<?>) null); 564 return (T) declaredMethod.newInstance((Object) null); 565 } catch (Exception e) { 566 } 567 return null; // uncloneable 568 } 569 570 /** 571 * Appends two strings, inserting separator if either is empty 572 */ joinWithSeparation(String a, String separator, String b)573 public static String joinWithSeparation(String a, String separator, String b) { 574 if (a.length() == 0) return b; 575 if (b.length() == 0) return a; 576 return a + separator + b; 577 } 578 579 /** 580 * Appends two strings, inserting separator if either is empty. Modifies first map 581 */ joinWithSeparation(Map<String, String> a, String separator, Map<String, String> b)582 public static Map<String, String> joinWithSeparation(Map<String, String> a, String separator, Map<String, String> b) { 583 for (Iterator<String> it = b.keySet().iterator(); it.hasNext();) { 584 String key = it.next(); 585 String bvalue = b.get(key); 586 String avalue = a.get(key); 587 if (avalue != null) { 588 if (avalue.trim().equals(bvalue.trim())) continue; 589 bvalue = joinWithSeparation(avalue, separator, bvalue); 590 } 591 a.put(key, bvalue); 592 } 593 return a; 594 } 595 join(Collection<T> c, String separator)596 public static <T> String join(Collection<T> c, String separator) { 597 return join(c, separator, null); 598 } 599 join(Object[] c, String separator)600 public static String join(Object[] c, String separator) { 601 return join(c, separator, null); 602 } 603 join(Collection<T> c, String separator, Transform<T, String> transform)604 public static <T> String join(Collection<T> c, String separator, Transform<T, String> transform) { 605 StringBuffer output = new StringBuffer(); 606 boolean isFirst = true; 607 for (T item : c) { 608 if (isFirst) { 609 isFirst = false; 610 } else { 611 output.append(separator); 612 } 613 output.append(transform != null ? transform.transform(item) : item); 614 } 615 return output.toString(); 616 } 617 join(T[] c, String separator, Transform<T, String> transform)618 public static <T> String join(T[] c, String separator, Transform<T, String> transform) { 619 return join(Arrays.asList(c), separator, transform); 620 } 621 622 /** 623 * Utility like Arrays.asList() 624 */ 625 @SuppressWarnings("unchecked") asMap(Object[][] source, Map<K, V> target, boolean reverse)626 public static <K, V> Map<K, V> asMap(Object[][] source, Map<K, V> target, boolean reverse) { 627 int from = 0, to = 1; 628 if (reverse) { 629 from = 1; 630 to = 0; 631 } 632 for (int i = 0; i < source.length; ++i) { 633 if (source[i].length != 2) { 634 throw new IllegalArgumentException("Source must be array of pairs of strings: " 635 + Arrays.asList(source[i])); 636 } 637 target.put((K) source[i][from], (V) source[i][to]); 638 } 639 return target; 640 } 641 asMap(Object[][] source)642 public static <K, V> Map<K, V> asMap(Object[][] source) { 643 return asMap(source, new HashMap<K, V>(), false); 644 } 645 646 /** 647 * Returns the canonical name for a file. 648 */ getCanonicalName(String file)649 public static String getCanonicalName(String file) { 650 try { 651 return PathUtilities.getNormalizedPathString(file); 652 } catch (Exception e) { 653 return file; 654 } 655 } 656 657 /** 658 * Convert a UnicodeSet into a string that can be embedded into a Regex. Handles strings that are in the UnicodeSet, 659 * Supplementary ranges, and escaping 660 * 661 * @param source 662 * The source set 663 * @param escaper 664 * A transliterator that is used to escape the characters according to the requirements of the regex. 665 * @return 666 */ toRegex(UnicodeSet source)667 public static String toRegex(UnicodeSet source) { 668 return toRegex(source, null, false); 669 } 670 671 private static final Transliterator DEFAULT_REGEX_ESCAPER = Transliterator.createFromRules( 672 "foo", 673 "([ \\- \\\\ \\[ \\] ]) > '\\' $1 ;" 674 // + " ([:c:]) > &hex($1);" 675 + " ([[:control:][[:z:]&[:ascii:]]]) > &hex($1);", 676 Transliterator.FORWARD); 677 678 /** 679 * Convert a UnicodeSet into a string that can be embedded into a Regex. 680 * Handles strings that are in the UnicodeSet, Supplementary ranges, and 681 * escaping 682 * 683 * @param source 684 * The source set 685 * @param escaper 686 * A transliterator that is used to escape the characters according 687 * to the requirements of the regex. The default puts a \\ before [, -, 688 * \, and ], and converts controls and Ascii whitespace to hex. 689 * Alternatives can be supplied. Note that some Regex engines, 690 * including Java 1.5, don't really deal with escaped supplementaries 691 * well. 692 * @param onlyBmp 693 * Set to true if the Regex only accepts BMP characters. In that 694 * case, ranges of supplementary characters are converted to lists of 695 * ranges. For example, [\uFFF0-\U0010000F \U0010100F-\U0010300F] 696 * converts into: 697 * 698 * <pre> 699 * [\uD800][\uDC00-\uDFFF] 700 * [\uD801-\uDBBF][\uDC00-\uDFFF] 701 * [\uDBC0][\uDC00-\uDC0F] 702 * </pre> 703 * 704 * and 705 * 706 * <pre> 707 * [\uDBC4][\uDC0F-\uDFFF] 708 * [\uDBC5-\uDBCB][\uDC00-\uDFFF] 709 * [\uDBCC][\uDC00-\uDC0F] 710 * </pre> 711 * 712 * These are then coalesced into a list of alternatives by sharing 713 * parts where feasible. For example, the above turns into 3 pairs of ranges: 714 * 715 * <pre> 716 * [\uDBC0\uDBCC][\uDC00-\uDC0F]|\uDBC4[\uDC0F-\uDFFF]|[\uD800-\uDBBF\uDBC5-\uDBCB][\uDC00-\uDFFF] 717 * </pre> 718 * 719 * @return escaped string. Something like [a-z] or (?:[a-m]|{zh}) if there is 720 * a string zh in the set, or a more complicated case for 721 * supplementaries. <br> 722 * Special cases: [] returns "", single item returns a string 723 * (escaped), like [a] => "a", or [{abc}] => "abc"<br> 724 * Supplementaries are handled specially, as described under onlyBmp. 725 */ toRegex(UnicodeSet source, Transliterator escaper, boolean onlyBmp)726 public static String toRegex(UnicodeSet source, Transliterator escaper, boolean onlyBmp) { 727 if (escaper == null) { 728 escaper = DEFAULT_REGEX_ESCAPER; 729 } 730 UnicodeSetIterator it = new UnicodeSetIterator(source); 731 // if there is only one item, return it 732 if (source.size() == 0) { 733 return ""; 734 } 735 if (source.size() == 1) { 736 it.next(); 737 return escaper.transliterate(it.getString()); 738 } 739 // otherwise, we figure out what is in the set, and will return 740 StringBuilder base = new StringBuilder("["); 741 StringBuilder alternates = new StringBuilder(); 742 Map<UnicodeSet, UnicodeSet> lastToFirst = new TreeMap<>(new UnicodeSetComparator()); 743 int alternateCount = 0; 744 while (it.nextRange()) { 745 if (it.codepoint == UnicodeSetIterator.IS_STRING) { 746 ++alternateCount; 747 alternates.append('|').append(escaper.transliterate(it.string)); 748 } else if (!onlyBmp || it.codepointEnd <= 0xFFFF) { // BMP 749 addBmpRange(it.codepoint, it.codepointEnd, escaper, base); 750 } else { // supplementary 751 if (it.codepoint <= 0xFFFF) { 752 addBmpRange(it.codepoint, 0xFFFF, escaper, base); 753 it.codepoint = 0x10000; // reset the range 754 } 755 // this gets a bit ugly; we are trying to minimize the extra ranges for supplementaries 756 // we do this by breaking up X-Y based on the Lead and Trail values for X and Y 757 // Lx [Tx - Ty]) (if Lx == Ly) 758 // Lx [Tx - DFFF] | Ly [DC00-Ty] (if Lx == Ly - 1) 759 // Lx [Tx - DFFF] | [Lx+1 - Ly-1][DC00-DFFF] | Ly [DC00-Ty] (otherwise) 760 int leadX = UTF16.getLeadSurrogate(it.codepoint); 761 int trailX = UTF16.getTrailSurrogate(it.codepoint); 762 int leadY = UTF16.getLeadSurrogate(it.codepointEnd); 763 int trailY = UTF16.getTrailSurrogate(it.codepointEnd); 764 if (leadX == leadY) { 765 addSupplementalRange(leadX, leadX, trailX, trailY, escaper, lastToFirst); 766 } else { 767 addSupplementalRange(leadX, leadX, trailX, 0xDFFF, escaper, lastToFirst); 768 if (leadX != leadY - 1) { 769 addSupplementalRange(leadX + 1, leadY - 1, 0xDC00, 0xDFFF, escaper, lastToFirst); 770 } 771 addSupplementalRange(leadY, leadY, 0xDC00, trailY, escaper, lastToFirst); 772 } 773 } 774 } 775 // add in the supplementary ranges 776 if (lastToFirst.size() != 0) { 777 for (UnicodeSet last : lastToFirst.keySet()) { 778 ++alternateCount; 779 alternates.append('|').append(toRegex(lastToFirst.get(last), escaper, onlyBmp)) 780 .append(toRegex(last, escaper, onlyBmp)); 781 } 782 } 783 // Return the output. We separate cases in order to get the minimal extra apparatus 784 base.append("]"); 785 if (alternateCount == 0) { 786 return base.toString(); 787 } else if (base.length() > 2) { 788 return "(?:" + base + "|" + alternates.substring(1) + ")"; 789 } else if (alternateCount == 1) { 790 return alternates.substring(1); 791 } else { 792 return "(?:" + alternates.substring(1) + ")"; 793 } 794 } 795 addSupplementalRange(int leadX, int leadY, int trailX, int trailY, Transliterator escaper, Map<UnicodeSet, UnicodeSet> lastToFirst)796 private static void addSupplementalRange(int leadX, int leadY, int trailX, int trailY, Transliterator escaper, 797 Map<UnicodeSet, UnicodeSet> lastToFirst) { 798 System.out.println("\tadding: " + new UnicodeSet(leadX, leadY) + "\t" + new UnicodeSet(trailX, trailY)); 799 UnicodeSet last = new UnicodeSet(trailX, trailY); 800 UnicodeSet first = lastToFirst.get(last); 801 if (first == null) { 802 lastToFirst.put(last, first = new UnicodeSet()); 803 } 804 first.add(leadX, leadY); 805 } 806 addBmpRange(int start, int limit, Transliterator escaper, StringBuilder base)807 private static void addBmpRange(int start, int limit, Transliterator escaper, StringBuilder base) { 808 base.append(escaper.transliterate(UTF16.valueOf(start))); 809 if (start != limit) { 810 base.append("-").append(escaper.transliterate(UTF16.valueOf(limit))); 811 } 812 } 813 814 public static class UnicodeSetComparator implements Comparator<UnicodeSet> { 815 @Override compare(UnicodeSet o1, UnicodeSet o2)816 public int compare(UnicodeSet o1, UnicodeSet o2) { 817 return o1.compareTo(o2); 818 } 819 } 820 821 public static class CollectionComparator<T extends Comparable<T>> implements Comparator<Collection<T>> { 822 @Override compare(Collection<T> o1, Collection<T> o2)823 public int compare(Collection<T> o1, Collection<T> o2) { 824 return UnicodeSet.compare(o1, o2, UnicodeSet.ComparisonStyle.SHORTER_FIRST); 825 } 826 } 827 828 public static class ComparableComparator<T extends Comparable<T>> implements Comparator<T> { 829 @Override compare(T arg0, T arg1)830 public int compare(T arg0, T arg1) { 831 return Utility.checkCompare(arg0, arg1); 832 } 833 } 834 835 @SuppressWarnings({ "rawtypes", "unchecked" }) addTreeMapChain(Map coverageData, Object... objects)836 public static void addTreeMapChain(Map coverageData, Object... objects) { 837 Map<Object, Object> base = coverageData; 838 for (int i = 0; i < objects.length - 2; ++i) { 839 Map<Object, Object> nextOne = (Map<Object, Object>) base.get(objects[i]); 840 if (nextOne == null) base.put(objects[i], nextOne = new TreeMap<>()); 841 base = nextOne; 842 } 843 base.put(objects[objects.length - 2], objects[objects.length - 1]); 844 } 845 846 public static abstract class CollectionTransform<S, T> implements Transform<S, T> { 847 @Override transform(S source)848 public abstract T transform(S source); 849 transform(Collection<S> input, Collection<T> output)850 public Collection<T> transform(Collection<S> input, Collection<T> output) { 851 return CldrUtility.transform(input, this, output); 852 } 853 transform(Collection<S> input)854 public Collection<T> transform(Collection<S> input) { 855 return transform(input, new ArrayList<T>()); 856 } 857 } 858 transform(SC source, Transform<S, T> transform, TC target)859 public static <S, T, SC extends Collection<S>, TC extends Collection<T>> TC transform(SC source, Transform<S, T> transform, TC target) { 860 for (S sourceItem : source) { 861 T targetItem = transform.transform(sourceItem); 862 if (targetItem != null) { 863 target.add(targetItem); 864 } 865 } 866 return target; 867 } 868 transform( SM source, Transform<SK, TK> transformKey, Transform<SV, TV> transformValue, TM target)869 public static <SK, SV, TK, TV, SM extends Map<SK, SV>, TM extends Map<TK, TV>> TM transform( 870 SM source, Transform<SK, TK> transformKey, Transform<SV, TV> transformValue, TM target) { 871 for (Entry<SK, SV> sourceEntry : source.entrySet()) { 872 TK targetKey = transformKey.transform(sourceEntry.getKey()); 873 TV targetValue = transformValue.transform(sourceEntry.getValue()); 874 if (targetKey != null && targetValue != null) { 875 target.put(targetKey, targetValue); 876 } 877 } 878 return target; 879 } 880 881 public static abstract class Apply<T> { apply(T item)882 public abstract void apply(T item); 883 applyTo(U collection)884 public <U extends Collection<T>> void applyTo(U collection) { 885 for (T item : collection) { 886 apply(item); 887 } 888 } 889 } 890 891 public static abstract class Filter<T> { 892 contains(T item)893 public abstract boolean contains(T item); 894 retainAll(U c)895 public <U extends Collection<T>> U retainAll(U c) { 896 for (Iterator<T> it = c.iterator(); it.hasNext();) { 897 if (!contains(it.next())) it.remove(); 898 } 899 return c; 900 } 901 extractMatches(U c, U target)902 public <U extends Collection<T>> U extractMatches(U c, U target) { 903 for (Iterator<T> it = c.iterator(); it.hasNext();) { 904 T item = it.next(); 905 if (contains(item)) { 906 target.add(item); 907 } 908 } 909 return target; 910 } 911 removeAll(U c)912 public <U extends Collection<T>> U removeAll(U c) { 913 for (Iterator<T> it = c.iterator(); it.hasNext();) { 914 if (contains(it.next())) it.remove(); 915 } 916 return c; 917 } 918 extractNonMatches(U c, U target)919 public <U extends Collection<T>> U extractNonMatches(U c, U target) { 920 for (Iterator<T> it = c.iterator(); it.hasNext();) { 921 T item = it.next(); 922 if (!contains(item)) { 923 target.add(item); 924 } 925 } 926 return target; 927 } 928 } 929 930 public static class MatcherFilter<T> extends Filter<T> { 931 private Matcher matcher; 932 MatcherFilter(String pattern)933 public MatcherFilter(String pattern) { 934 this.matcher = PatternCache.get(pattern).matcher(""); 935 } 936 MatcherFilter(Matcher matcher)937 public MatcherFilter(Matcher matcher) { 938 this.matcher = matcher; 939 } 940 set(Matcher matcher)941 public MatcherFilter<T> set(Matcher matcher) { 942 this.matcher = matcher; 943 return this; 944 } 945 set(String pattern)946 public MatcherFilter<T> set(String pattern) { 947 this.matcher = PatternCache.get(pattern).matcher(""); 948 return this; 949 } 950 951 @Override contains(T o)952 public boolean contains(T o) { 953 return matcher.reset(o.toString()).matches(); 954 } 955 } 956 957 // static final class HandlingTransform implements Transform<String, Handling> { 958 // @Override 959 // public Handling transform(String source) { 960 // return Handling.valueOf(source); 961 // } 962 // } 963 964 public static final class PairComparator<K extends Comparable<K>, V extends Comparable<V>> implements java.util.Comparator<Pair<K, V>> { 965 966 private Comparator<K> comp1; 967 private Comparator<V> comp2; 968 PairComparator(Comparator<K> comp1, Comparator<V> comp2)969 public PairComparator(Comparator<K> comp1, Comparator<V> comp2) { 970 this.comp1 = comp1; 971 this.comp2 = comp2; 972 } 973 974 @Override compare(Pair<K, V> o1, Pair<K, V> o2)975 public int compare(Pair<K, V> o1, Pair<K, V> o2) { 976 { 977 K o1First = o1.getFirst(); 978 K o2First = o2.getFirst(); 979 int diff = o1First == null ? (o2First == null ? 0 : -1) 980 : o2First == null ? 1 981 : comp1 == null ? o1First.compareTo(o2First) 982 : comp1.compare(o1First, o2First); 983 if (diff != 0) { 984 return diff; 985 } 986 } 987 V o1Second = o1.getSecond(); 988 V o2Second = o2.getSecond(); 989 return o1Second == null ? (o2Second == null ? 0 : -1) 990 : o2Second == null ? 1 991 : comp2 == null ? o1Second.compareTo(o2Second) 992 : comp2.compare(o1Second, o2Second); 993 } 994 995 } 996 997 /** 998 * Fetch data from jar 999 * 1000 * @param name 1001 * a name residing in the org/unicode/cldr/util/data/ directory, or loading from a jar will break. 1002 */ getUTF8Data(String name)1003 public static BufferedReader getUTF8Data(String name) { 1004 if (new File(name).isAbsolute()) { 1005 throw new IllegalArgumentException( 1006 "Path must be relative to org/unicode/cldr/util/data such as 'file.txt' or 'casing/file.txt', but got '" 1007 + name + "'."); 1008 } 1009 return FileReaders.openFile(CldrUtility.class, "data/" + name); 1010 } 1011 1012 /** 1013 * Fetch data from jar 1014 * 1015 * @param name 1016 * a name residing in the org/unicode/cldr/util/data/ directory, or loading from a jar will break. 1017 */ getInputStream(String name)1018 public static InputStream getInputStream(String name) { 1019 if (new File(name).isAbsolute()) { 1020 throw new IllegalArgumentException( 1021 "Path must be relative to org/unicode/cldr/util/data such as 'file.txt' or 'casing/file.txt', but got '" 1022 + name + "'."); 1023 } 1024 return getInputStream(CldrUtility.class, "data/" + name); 1025 } 1026 getInputStream(Class<?> callingClass, String relativePath)1027 public static InputStream getInputStream(Class<?> callingClass, String relativePath) { 1028 InputStream is = callingClass.getResourceAsStream(relativePath); 1029 // add buffering 1030 return InputStreamFactory.buffer(is); 1031 } 1032 1033 /** 1034 * Takes a Map that goes from Object to Set, and fills in the transpose 1035 * 1036 * @param source_key_valueSet 1037 * @param output_value_key 1038 */ putAllTransposed(Map<Object, Set<Object>> source_key_valueSet, Map<Object, Object> output_value_key)1039 public static void putAllTransposed(Map<Object, Set<Object>> source_key_valueSet, Map<Object, Object> output_value_key) { 1040 for (Iterator<Object> it = source_key_valueSet.keySet().iterator(); it.hasNext();) { 1041 Object key = it.next(); 1042 Set<Object> values = source_key_valueSet.get(key); 1043 for (Iterator<Object> it2 = values.iterator(); it2.hasNext();) { 1044 Object value = it2.next(); 1045 output_value_key.put(value, key); 1046 } 1047 } 1048 } 1049 countInstances(String source, String substring)1050 public static int countInstances(String source, String substring) { 1051 int count = 0; 1052 int pos = 0; 1053 while (true) { 1054 pos = source.indexOf(substring, pos) + 1; 1055 if (pos <= 0) break; 1056 count++; 1057 } 1058 return count; 1059 } 1060 registerTransliteratorFromFile(String id, String dir, String filename)1061 public static void registerTransliteratorFromFile(String id, String dir, String filename) { 1062 registerTransliteratorFromFile(id, dir, filename, Transliterator.FORWARD, true); 1063 registerTransliteratorFromFile(id, dir, filename, Transliterator.REVERSE, true); 1064 } 1065 registerTransliteratorFromFile(String id, String dir, String filename, int direction, boolean reverseID)1066 public static void registerTransliteratorFromFile(String id, String dir, String filename, int direction, 1067 boolean reverseID) { 1068 if (filename == null) { 1069 filename = id.replace('-', '_'); 1070 filename = filename.replace('/', '_'); 1071 filename += ".txt"; 1072 } 1073 String rules = getText(dir, filename); 1074 Transliterator t; 1075 int pos = id.indexOf('-'); 1076 String rid; 1077 if (pos < 0) { 1078 rid = id + "-Any"; 1079 id = "Any-" + id; 1080 } else { 1081 rid = id.substring(pos + 1) + "-" + id.substring(0, pos); 1082 } 1083 if (!reverseID) rid = id; 1084 1085 if (direction == Transliterator.FORWARD) { 1086 Transliterator.unregister(id); 1087 t = Transliterator.createFromRules(id, rules, Transliterator.FORWARD); 1088 Transliterator.registerInstance(t); 1089 System.out.println("Registered new Transliterator: " + id); 1090 } 1091 1092 /* 1093 * String test = "\u049A\u0430\u0437\u0430\u049B"; 1094 * System.out.println(t.transliterate(test)); 1095 * t = Transliterator.getInstance(id); 1096 * System.out.println(t.transliterate(test)); 1097 */ 1098 1099 if (direction == Transliterator.REVERSE) { 1100 Transliterator.unregister(rid); 1101 t = Transliterator.createFromRules(rid, rules, Transliterator.REVERSE); 1102 Transliterator.registerInstance(t); 1103 System.out.println("Registered new Transliterator: " + rid); 1104 } 1105 } 1106 getText(String dir, String filename)1107 public static String getText(String dir, String filename) { 1108 try { 1109 BufferedReader br = FileUtilities.openUTF8Reader(dir, filename); 1110 StringBuffer buffer = new StringBuffer(); 1111 while (true) { 1112 String line = br.readLine(); 1113 if (line == null) break; 1114 if (line.length() > 0 && line.charAt(0) == '\uFEFF') line = line.substring(1); 1115 if (line.startsWith("//")) continue; 1116 buffer.append(line).append(CldrUtility.LINE_SEPARATOR); 1117 } 1118 br.close(); 1119 String rules = buffer.toString(); 1120 return rules; 1121 } catch (IOException e) { 1122 throw (IllegalArgumentException) new IllegalArgumentException("Can't open " + dir + ", " + filename) 1123 .initCause(e); 1124 } 1125 } 1126 callMethod(String methodNames, Class<?> cls)1127 public static void callMethod(String methodNames, Class<?> cls) { 1128 for (String methodName : methodNames.split(",")) { 1129 try { 1130 Method method; 1131 try { 1132 method = cls.getMethod(methodName, (Class[]) null); 1133 try { 1134 method.invoke(null, (Object[]) null); 1135 } catch (Exception e) { 1136 e.printStackTrace(); 1137 } 1138 } catch (Exception e) { 1139 System.out.println("No such method: " + methodName); 1140 showMethods(cls); 1141 } 1142 } catch (ClassNotFoundException e) { 1143 e.printStackTrace(); 1144 } 1145 } 1146 } 1147 showMethods(Class<?> cls)1148 public static void showMethods(Class<?> cls) throws ClassNotFoundException { 1149 System.out.println("Possible methods of " + cls.getCanonicalName() + " are: "); 1150 Method[] methods = cls.getMethods(); 1151 Set<String> names = new TreeSet<>(); 1152 for (int i = 0; i < methods.length; ++i) { 1153 if (methods[i].getGenericParameterTypes().length != 0) continue; 1154 //int mods = methods[i].getModifiers(); 1155 // if (!Modifier.isStatic(mods)) continue; 1156 String name = methods[i].getName(); 1157 names.add(name); 1158 } 1159 for (Iterator<String> it = names.iterator(); it.hasNext();) { 1160 System.out.println("\t" + it.next()); 1161 } 1162 } 1163 1164 /** 1165 * Breaks lines if they are too long, or if matcher.group(1) != last. Only breaks just before matcher. 1166 * 1167 * @param input 1168 * @param separator 1169 * @param matcher 1170 * must match each possible item. The first group is significant; if different, will cause break 1171 * @return 1172 */ breakLines(CharSequence input, String separator, Matcher matcher, int width)1173 static public String breakLines(CharSequence input, String separator, Matcher matcher, int width) { 1174 StringBuffer output = new StringBuffer(); 1175 String lastPrefix = ""; 1176 int lastEnd = 0; 1177 int lastBreakPos = 0; 1178 matcher.reset(input); 1179 while (true) { 1180 boolean match = matcher.find(); 1181 if (!match) { 1182 output.append(input.subSequence(lastEnd, input.length())); 1183 break; 1184 } 1185 String prefix = matcher.group(1); 1186 if (!prefix.equalsIgnoreCase(lastPrefix) || matcher.end() - lastBreakPos > width) { // break before? 1187 output.append(separator); 1188 lastBreakPos = lastEnd; 1189 } else if (lastEnd != 0) { 1190 output.append(' '); 1191 } 1192 output.append(input.subSequence(lastEnd, matcher.end()).toString().trim()); 1193 lastEnd = matcher.end(); 1194 lastPrefix = prefix; 1195 } 1196 return output.toString(); 1197 } 1198 showOptions(String[] args)1199 public static void showOptions(String[] args) { 1200 // Properties props = System.getProperties(); 1201 System.out.println("Arguments: " + join(args, " ")); // + (props == null ? "" : " " + props)); 1202 } 1203 roundToDecimals(double input, int places)1204 public static double roundToDecimals(double input, int places) { 1205 double log10 = Math.log10(input); // 15000 => 4.xxx 1206 double intLog10 = Math.floor(log10); 1207 double scale = Math.pow(10, intLog10 - places + 1); 1208 double factored = Math.round(input / scale) * scale; 1209 // System.out.println("###\t" +input + "\t" + factored); 1210 return factored; 1211 } 1212 1213 /** 1214 * Get a property value, returning the value if there is one (eg -Dkey=value), 1215 * otherwise the default value (for either empty or null). 1216 * 1217 * @param key 1218 * @param valueIfNull 1219 * @param valueIfEmpty 1220 * @return 1221 */ getProperty(String key, String defaultValue)1222 public static String getProperty(String key, String defaultValue) { 1223 return getProperty(key, defaultValue, defaultValue); 1224 } 1225 1226 /** 1227 * Get a property value, returning the value if there is one, otherwise null. 1228 */ getProperty(String key)1229 public static String getProperty(String key) { 1230 return getProperty(key, null, null); 1231 } 1232 1233 /** 1234 * Get a property value, returning the value if there is one (eg -Dkey=value), 1235 * the valueIfEmpty if there is one with no value (eg -Dkey) and the valueIfNull 1236 * if there is no property. 1237 * 1238 * @param key 1239 * @param valueIfNull 1240 * @param valueIfEmpty 1241 * @return 1242 */ getProperty(String key, String valueIfNull, String valueIfEmpty)1243 public static String getProperty(String key, String valueIfNull, String valueIfEmpty) { 1244 String result = CLDRConfig.getInstance().getProperty(key); 1245 if (result == null) { 1246 result = valueIfNull; 1247 } else if (result.length() == 0) { 1248 result = valueIfEmpty; 1249 } 1250 return result; 1251 } 1252 hex(byte[] bytes, int start, int end, String separator)1253 public static String hex(byte[] bytes, int start, int end, String separator) { 1254 StringBuilder result = new StringBuilder(); 1255 for (int i = 0; i < end; ++i) { 1256 if (result.length() != 0) { 1257 result.append(separator); 1258 } 1259 result.append(Utility.hex(bytes[i] & 0xFF, 2)); 1260 } 1261 return result.toString(); 1262 } 1263 getProperty(String string, boolean b)1264 public static boolean getProperty(String string, boolean b) { 1265 return getProperty(string, b ? "true" : "false", "true").matches("(?i)T|TRUE"); 1266 } 1267 checkValidDirectory(String sourceDirectory)1268 public static String checkValidDirectory(String sourceDirectory) { 1269 return checkValidFile(sourceDirectory, true, null); 1270 } 1271 checkValidDirectory(String sourceDirectory, String correction)1272 public static String checkValidDirectory(String sourceDirectory, String correction) { 1273 return checkValidFile(sourceDirectory, true, correction); 1274 } 1275 checkValidFile(String sourceDirectory, boolean checkForDirectory, String correction)1276 public static String checkValidFile(String sourceDirectory, boolean checkForDirectory, String correction) { 1277 File file = null; 1278 String normalizedPath = null; 1279 try { 1280 file = new File(sourceDirectory); 1281 normalizedPath = PathUtilities.getNormalizedPathString(file) + File.separatorChar; 1282 } catch (Exception e) { 1283 } 1284 if (file == null || normalizedPath == null || checkForDirectory && !file.isDirectory()) { 1285 throw new RuntimeException("Directory not found: " + sourceDirectory 1286 + (normalizedPath == null ? "" : " => " + normalizedPath) 1287 + (correction == null ? "" : CldrUtility.LINE_SEPARATOR + correction)); 1288 } 1289 return normalizedPath; 1290 } 1291 1292 /** 1293 * Copy up to matching line (not included). If output is null, then just skip until. 1294 * 1295 * @param oldFile 1296 * file to copy 1297 * @param readUntilPattern 1298 * pattern to search for. If null, goes to end of file. 1299 * @param output 1300 * into to copy into. If null, just skips in the input. 1301 * @param includeMatchingLine 1302 * inclde the matching line when copying. 1303 * @throws IOException 1304 */ copyUpTo(BufferedReader oldFile, final Pattern readUntilPattern, final PrintWriter output, boolean includeMatchingLine)1305 public static void copyUpTo(BufferedReader oldFile, final Pattern readUntilPattern, 1306 final PrintWriter output, boolean includeMatchingLine) throws IOException { 1307 Matcher readUntil = readUntilPattern == null ? null : readUntilPattern.matcher(""); 1308 while (true) { 1309 String line = oldFile.readLine(); 1310 if (line == null) { 1311 break; 1312 } 1313 if (line.startsWith("\uFEFF")) { 1314 line = line.substring(1); 1315 } 1316 if (readUntil != null && readUntil.reset(line).matches()) { 1317 if (includeMatchingLine && output != null) { 1318 output.println(line); 1319 } 1320 break; 1321 } 1322 if (output != null) { 1323 output.println(line); 1324 } 1325 } 1326 } 1327 1328 private static DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss 'GMT'"); 1329 private static DateFormat DATE_ONLY = new SimpleDateFormat("yyyy-MM-dd"); 1330 static { 1331 df.setTimeZone(TimeZone.getTimeZone("GMT")); 1332 DATE_ONLY.setTimeZone(TimeZone.getTimeZone("GMT")); 1333 } 1334 isoFormat(Date date)1335 public static String isoFormat(Date date) { 1336 synchronized (df) { 1337 return df.format(date); 1338 } 1339 } 1340 isoFormatDateOnly(Date date)1341 public static String isoFormatDateOnly(Date date) { 1342 synchronized (DATE_ONLY) { 1343 return DATE_ONLY.format(date); 1344 } 1345 } 1346 newConcurrentHashMap()1347 public static <K, V> ConcurrentHashMap<K, V> newConcurrentHashMap() { 1348 // http://ria101.wordpress.com/2011/12/12/concurrenthashmap-avoid-a-common-misuse/ 1349 return new ConcurrentHashMap<>(4, 0.9f, 1); 1350 } 1351 newConcurrentHashMap(Map<K, V> source)1352 public static <K, V> ConcurrentHashMap<K, V> newConcurrentHashMap(Map<K, V> source) { 1353 ConcurrentHashMap<K, V> result = newConcurrentHashMap(); 1354 result.putAll(source); 1355 return result; 1356 } 1357 equals(Object a, Object b)1358 public static boolean equals(Object a, Object b) { 1359 return a == b ? true 1360 : a == null || b == null ? false 1361 : a.equals(b); 1362 } 1363 getDoubleLink(String code)1364 public static String getDoubleLink(String code) { 1365 final String anchorSafe = TransliteratorUtilities.toHTML.transliterate(code).replace(" ", "_"); 1366 return "<a name='" + anchorSafe + "' href='#" + anchorSafe + "'>"; 1367 } 1368 getDoubleLinkedText(String anchor, String anchorText)1369 public static String getDoubleLinkedText(String anchor, String anchorText) { 1370 return getDoubleLink(anchor) + TransliteratorUtilities.toHTML.transliterate(anchorText).replace("_", " ") 1371 + "</a>"; 1372 } 1373 getDoubleLinkedText(String anchor)1374 public static String getDoubleLinkedText(String anchor) { 1375 return getDoubleLinkedText(anchor, anchor); 1376 } 1377 getDoubleLinkMsg()1378 public static String getDoubleLinkMsg() { 1379 return "<a name=''{0}'' href=''#{0}''>{0}</a>"; 1380 } 1381 getDoubleLinkMsg2()1382 public static String getDoubleLinkMsg2() { 1383 return "<a name=''{0}{1}'' href=''#{0}{1}''>{0}</a>"; 1384 } 1385 getCopyrightString()1386 public static String getCopyrightString() { 1387 return getCopyrightString(""); 1388 } 1389 getCopyrightString(String linePrefix)1390 public static String getCopyrightString(String linePrefix) { 1391 // now do the rest 1392 return linePrefix + "Copyright \u00A9 1991-" + Calendar.getInstance().get(Calendar.YEAR) + " Unicode, Inc." + CldrUtility.LINE_SEPARATOR 1393 + linePrefix + "For terms of use, see http://www.unicode.org/copyright.html" + CldrUtility.LINE_SEPARATOR 1394 + linePrefix + "Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries." + CldrUtility.LINE_SEPARATOR 1395 + linePrefix + "CLDR data files are interpreted according to the LDML specification " + "(http://unicode.org/reports/tr35/)"; 1396 } 1397 1398 // TODO Move to collection utilities 1399 /** 1400 * Type-safe get 1401 * @param map 1402 * @param key 1403 * @return value 1404 */ get(M map, K key)1405 public static <K, V, M extends Map<K, V>> V get(M map, K key) { 1406 return map.get(key); 1407 } 1408 1409 /** 1410 * Type-safe contains 1411 * @param map 1412 * @param key 1413 * @return value 1414 */ contains(C collection, K key)1415 public static <K, C extends Collection<K>> boolean contains(C collection, K key) { 1416 return collection.contains(key); 1417 } 1418 toEnumSet(Class<E> classValue, Collection<String> stringValues)1419 public static <E extends Enum<E>> EnumSet<E> toEnumSet(Class<E> classValue, Collection<String> stringValues) { 1420 EnumSet<E> result = EnumSet.noneOf(classValue); 1421 for (String s : stringValues) { 1422 result.add(Enum.valueOf(classValue, s)); 1423 } 1424 return result; 1425 } 1426 putNew(M map, K key, V value)1427 public static <K, V, M extends Map<K, V>> M putNew(M map, K key, V value) { 1428 if (!map.containsKey(key)) { 1429 map.put(key, value); 1430 } 1431 return map; 1432 } 1433 cleanSemiFields(String line)1434 public static String[] cleanSemiFields(String line) { 1435 line = cleanLine(line); 1436 return line.isEmpty() ? null : SEMI_SPLIT.split(line); 1437 } 1438 cleanLine(String line)1439 private static String cleanLine(String line) { 1440 int comment = line.indexOf("#"); 1441 if (comment >= 0) { 1442 line = line.substring(0, comment); 1443 } 1444 if (line.startsWith("\uFEFF")) { 1445 line = line.substring(1); 1446 } 1447 return line.trim(); 1448 } 1449 handleFile(String filename, LineHandler handler)1450 public static void handleFile(String filename, LineHandler handler) throws IOException { 1451 try (BufferedReader in = getUTF8Data(filename);) { 1452 String line = null; 1453 while ((line = in.readLine()) != null) { 1454 // String line = in.readLine(); 1455 // if (line == null) { 1456 // break; 1457 // } 1458 try { 1459 if (!handler.handle(line)) { 1460 if (HANDLEFILE_SHOW_SKIP) { 1461 System.out.println("Skipping line: " + line); 1462 } 1463 } 1464 } catch (Exception e) { 1465 throw (RuntimeException) new IllegalArgumentException("Problem with line: " + line) 1466 .initCause(e); 1467 } 1468 } 1469 } 1470 // in.close(); 1471 } 1472 ifNull(T x, T y)1473 public static <T> T ifNull(T x, T y) { 1474 return x == null 1475 ? y 1476 : x; 1477 } 1478 ifSame(T source, T replaceIfSame, T replacement)1479 public static <T> T ifSame(T source, T replaceIfSame, T replacement) { 1480 return source == replaceIfSame ? replacement : source; 1481 } 1482 ifEqual(T source, T replaceIfSame, T replacement)1483 public static <T> T ifEqual(T source, T replaceIfSame, T replacement) { 1484 return Objects.equals(source, replaceIfSame) ? replacement : source; 1485 } 1486 intersect(Set<T> a, Collection<T> b)1487 public static <T> Set<T> intersect(Set<T> a, Collection<T> b) { 1488 Set<T> result = new LinkedHashSet<>(a); 1489 result.retainAll(b); 1490 return result; 1491 } 1492 subtract(Set<T> a, Collection<T> b)1493 public static <T> Set<T> subtract(Set<T> a, Collection<T> b) { 1494 Set<T> result = new LinkedHashSet<>(a); 1495 result.removeAll(b); 1496 return result; 1497 } 1498 deepEquals(Object... pairs)1499 public static boolean deepEquals(Object... pairs) { 1500 for (int item = 0; item < pairs.length;) { 1501 if (!Objects.deepEquals(pairs[item++], pairs[item++])) { 1502 return false; 1503 } 1504 } 1505 return true; 1506 } 1507 array(Splitter splitter, String source)1508 public static String[] array(Splitter splitter, String source) { 1509 List<String> list = splitter.splitToList(source); 1510 return list.toArray(new String[list.size()]); 1511 } 1512 toHex(String in, boolean javaStyle)1513 public static String toHex(String in, boolean javaStyle) { 1514 StringBuilder result = new StringBuilder(); 1515 for (int i = 0; i < in.length(); ++i) { 1516 result.append(toHex(in.charAt(i), javaStyle)); 1517 } 1518 return result.toString(); 1519 } 1520 toHex(int j, boolean javaStyle)1521 public static String toHex(int j, boolean javaStyle) { 1522 if (j == '\"') { 1523 return "\\\""; 1524 } else if (j == '\\') { 1525 return "\\\\"; 1526 } else if (0x20 < j && j < 0x7F) { 1527 return String.valueOf((char) j); 1528 } 1529 final String hexString = Integer.toHexString(j).toUpperCase(); 1530 int gap = 4 - hexString.length(); 1531 if (gap < 0) { 1532 gap = 0; 1533 } 1534 String prefix = javaStyle ? "\\u" : "U+"; 1535 return prefix + "000".substring(0, gap) + hexString; 1536 } 1537 1538 /** 1539 * get string format for debugging, since Java has a useless display for many items 1540 * @param item 1541 * @return 1542 */ toString(Object item)1543 public static String toString(Object item) { 1544 if (item instanceof Object[]) { 1545 return toString(Arrays.asList((Object[]) item)); 1546 } else if (item instanceof Entry) { 1547 return toString(((Entry) item).getKey()) + "≔" + toString(((Entry) item).getValue()); 1548 } else if (item instanceof Map) { 1549 return "{" + toString(((Map) item).entrySet()) + "}"; 1550 } else if (item instanceof Collection) { 1551 List<String> result = new ArrayList<>(); 1552 for (Object subitem : (Collection) item) { 1553 result.add(toString(subitem)); 1554 } 1555 return result.toString(); 1556 } 1557 return item.toString(); 1558 } 1559 1560 /** 1561 * Return the git hash for the CLDR base directory. 1562 * 1563 * @return the hash, like "9786e05e95a2e4f02687fa3b84126782f9f698a3" 1564 */ getCldrBaseDirHash()1565 public static String getCldrBaseDirHash() { 1566 final File baseDir = CLDRConfig.getInstance().getCldrBaseDirectory(); 1567 return getGitHashForDir(baseDir.toString()); 1568 } 1569 1570 /** 1571 * Return the git hash for a directory. 1572 * 1573 * @param dir the directory name 1574 * @return the hash, like "9786e05e95a2e4f02687fa3b84126782f9f698a3" 1575 */ getGitHashForDir(String dir)1576 public final static String getGitHashForDir(String dir) { 1577 final String GIT_HASH_COMMANDS[] = { "git", "rev-parse", "HEAD" }; 1578 try { 1579 if (dir == null) { 1580 return CLDRURLS.UNKNOWN_REVISION; // no dir 1581 } 1582 File f = new File(dir); 1583 if (!f.isDirectory()) { 1584 return CLDRURLS.UNKNOWN_REVISION; // does not exist 1585 } 1586 Process p = Runtime.getRuntime().exec(GIT_HASH_COMMANDS, null, f); 1587 try (BufferedReader is = new BufferedReader(new InputStreamReader(p.getInputStream()))) { 1588 String str = is.readLine(); 1589 if (str.length() == 0) { 1590 throw new Exception("git returned empty"); 1591 } 1592 return str; 1593 } 1594 } catch(Throwable t) { 1595 // We do not expect this to be called frequently. 1596 System.err.println("While trying to get 'git' hash for " + dir + " : " + t.getMessage()); 1597 t.printStackTrace(); 1598 return CLDRURLS.UNKNOWN_REVISION; 1599 } 1600 } 1601 } 1602