1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2013, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Mark Davis 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.util; 10 11 import java.io.BufferedReader; 12 import java.io.File; 13 import java.io.FileReader; 14 import java.io.IOException; 15 import java.io.InputStream; 16 import java.io.InputStreamReader; 17 import java.io.PrintWriter; 18 import java.lang.reflect.Constructor; 19 import java.lang.reflect.Method; 20 import java.nio.file.Files; 21 import java.nio.file.Path; 22 import java.nio.file.Paths; 23 import java.util.ArrayList; 24 import java.util.Arrays; 25 import java.util.Calendar; 26 import java.util.Collection; 27 import java.util.Collections; 28 import java.util.Comparator; 29 import java.util.Date; 30 import java.util.EnumSet; 31 import java.util.HashMap; 32 import java.util.HashSet; 33 import java.util.Iterator; 34 import java.util.LinkedHashMap; 35 import java.util.LinkedHashSet; 36 import java.util.List; 37 import java.util.Map; 38 import java.util.Map.Entry; 39 import java.util.Objects; 40 import java.util.Set; 41 import java.util.SortedMap; 42 import java.util.SortedSet; 43 import java.util.TreeMap; 44 import java.util.TreeSet; 45 import java.util.concurrent.ConcurrentHashMap; 46 import java.util.regex.Matcher; 47 import java.util.regex.Pattern; 48 49 import org.unicode.cldr.draft.FileUtilities; 50 51 import com.google.common.base.Splitter; 52 import com.google.common.collect.ImmutableMap; 53 import com.google.common.collect.ImmutableMultimap; 54 import com.google.common.collect.Multimap; 55 import com.ibm.icu.impl.Utility; 56 import com.ibm.icu.text.DateFormat; 57 import com.ibm.icu.text.SimpleDateFormat; 58 import com.ibm.icu.text.Transform; 59 import com.ibm.icu.text.Transliterator; 60 import com.ibm.icu.text.UTF16; 61 import com.ibm.icu.text.UnicodeSet; 62 import com.ibm.icu.text.UnicodeSetIterator; 63 import com.ibm.icu.util.Freezable; 64 import com.ibm.icu.util.TimeZone; 65 66 public class CldrUtility { 67 68 public static final boolean DEBUG_MISSING_DIRECTORIES = false; 69 70 public static final boolean BETA = false; 71 72 public static final String LINE_SEPARATOR = "\n"; 73 public final static Pattern SEMI_SPLIT = PatternCache.get("\\s*;\\s*"); 74 75 private static final boolean HANDLEFILE_SHOW_SKIP = false; 76 // Constant for "∅∅∅". Indicates that a child locale has no value for a 77 // path even though a parent does. 78 public static final String NO_INHERITANCE_MARKER = new String(new char[] { 0x2205, 0x2205, 0x2205 }); 79 80 /** 81 * Define the constant INHERITANCE_MARKER for "↑↑↑", used by Survey Tool to indicate a "passthru" vote to the parent locale. 82 * If CLDRFile ever finds this value in a data field, writing of the field should be suppressed. 83 */ 84 public static final String INHERITANCE_MARKER = new String(new char[] { 0x2191, 0x2191, 0x2191 }); 85 86 public static final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze(); 87 88 /** 89 * Very simple class, used to replace variables in a string. For example 90 * <p> 91 * 92 * <pre> 93 * static VariableReplacer langTag = new VariableReplacer() 94 * .add("$alpha", "[a-zA-Z]") 95 * .add("$digit", "[0-9]") 96 * .add("$alphanum", "[a-zA-Z0-9]") 97 * .add("$x", "[xX]"); 98 * ... 99 * String langTagPattern = langTag.replace(...); 100 * </pre> 101 */ 102 public static class VariableReplacer { 103 // simple implementation for now 104 private Map<String, String> m = new TreeMap<>(Collections.reverseOrder()); 105 add(String variable, String value)106 public VariableReplacer add(String variable, String value) { 107 m.put(variable, value); 108 return this; 109 } 110 replace(String source)111 public String replace(String source) { 112 String oldSource; 113 do { 114 oldSource = source; 115 for (Iterator<String> it = m.keySet().iterator(); it.hasNext();) { 116 String variable = it.next(); 117 String value = m.get(variable); 118 source = replaceAll(source, variable, value); 119 } 120 } while (!source.equals(oldSource)); 121 return source; 122 } 123 replaceAll(String source, String key, String value)124 public String replaceAll(String source, String key, String value) { 125 while (true) { 126 int pos = source.indexOf(key); 127 if (pos < 0) return source; 128 source = source.substring(0, pos) + value + source.substring(pos + key.length()); 129 } 130 } 131 } 132 133 public interface LineHandler { 134 /** 135 * Return false if line was skipped 136 * 137 * @param line 138 * @return 139 */ handle(String line)140 boolean handle(String line) throws Exception; 141 } 142 getPath(String fileOrDir, String filename)143 public static String getPath(String fileOrDir, String filename) { 144 // Required for cases where a system property is read but not default is given. 145 // TODO: Fix callers to not fail silently if properties are missing. 146 if (fileOrDir == null) { 147 return null; 148 } 149 Path path = Paths.get(fileOrDir); 150 if (filename != null) { 151 path = path.resolve(filename); 152 } 153 if (DEBUG_MISSING_DIRECTORIES && !Files.exists(path)) { 154 System.err.println("Warning: directory doesn't exist: " + path); 155 } 156 return PathUtilities.getNormalizedPathString(path) + File.separatorChar; 157 } 158 getPath(String path)159 public static String getPath(String path) { 160 return getPath(path, null); 161 } 162 163 public static final String ANALYTICS = "<script>\n" 164 + "var gaJsHost = ((\"https:\" == document.location.protocol) ? \"https://ssl.\" : \"http://www.\");\n" 165 + "document.write(unescape(\"%3Cscript src='\" + gaJsHost + \"google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E\"));\n" 166 + "</script>\n" 167 + "<script>\n" 168 + "try {\n" 169 + "var pageTracker = _gat._getTracker(\"UA-7672775-1\");\n" 170 + "pageTracker._trackPageview();\n" 171 + "} catch(err) {}</script>"; 172 173 public static final List<String> MINIMUM_LANGUAGES = Arrays.asList(new String[] { "ar", "en", "de", "fr", "hi", 174 "it", "es", "pt", "ru", "zh", "ja" }); // plus language itself 175 public static final List<String> MINIMUM_TERRITORIES = Arrays.asList(new String[] { "US", "GB", "DE", "FR", "IT", 176 "JP", "CN", "IN", "RU", "BR" }); 177 178 public interface LineComparer { 179 static final int LINES_DIFFERENT = -1, LINES_SAME = 0, SKIP_FIRST = 1, SKIP_SECOND = 2; 180 181 /** 182 * Returns LINES_DIFFERENT, LINES_SAME, or if one of the lines is ignorable, SKIP_FIRST or SKIP_SECOND 183 * 184 * @param line1 185 * @param line2 186 * @return 187 */ compare(String line1, String line2)188 int compare(String line1, String line2); 189 } 190 191 public static class SimpleLineComparator implements LineComparer { 192 public static final int TRIM = 1, SKIP_SPACES = 2, SKIP_EMPTY = 4, SKIP_CVS_TAGS = 8; 193 StringIterator si1 = new StringIterator(); 194 StringIterator si2 = new StringIterator(); 195 int flags; 196 SimpleLineComparator(int flags)197 public SimpleLineComparator(int flags) { 198 this.flags = flags; 199 } 200 201 @Override compare(String line1, String line2)202 public int compare(String line1, String line2) { 203 // first, see if we want to skip one or the other lines 204 int skipper = 0; 205 if (line1 == null) { 206 skipper = SKIP_FIRST; 207 } else { 208 if ((flags & TRIM) != 0) line1 = line1.trim(); 209 if ((flags & SKIP_EMPTY) != 0 && line1.length() == 0) skipper = SKIP_FIRST; 210 } 211 if (line2 == null) { 212 skipper = SKIP_SECOND; 213 } else { 214 if ((flags & TRIM) != 0) line2 = line2.trim(); 215 if ((flags & SKIP_EMPTY) != 0 && line2.length() == 0) skipper += SKIP_SECOND; 216 } 217 if (skipper != 0) { 218 if (skipper == SKIP_FIRST + SKIP_SECOND) return LINES_SAME; // ok, don't skip both 219 return skipper; 220 } 221 222 // check for null 223 if (line1 == null) { 224 if (line2 == null) return LINES_SAME; 225 return LINES_DIFFERENT; 226 } 227 if (line2 == null) { 228 return LINES_DIFFERENT; 229 } 230 231 // now check equality 232 if (line1.equals(line2)) return LINES_SAME; 233 234 // if not equal, see if we are skipping spaces 235 if ((flags & SKIP_CVS_TAGS) != 0) { 236 if (line1.indexOf('$') >= 0 && line2.indexOf('$') >= 0) { 237 line1 = stripTags(line1); 238 line2 = stripTags(line2); 239 if (line1.equals(line2)) return LINES_SAME; 240 } else if (line1.startsWith("<!DOCTYPE ldml SYSTEM \"../../common/dtd/") 241 && line2.startsWith("<!DOCTYPE ldml SYSTEM \"../../common/dtd/")) { 242 return LINES_SAME; 243 } 244 } 245 if ((flags & SKIP_SPACES) != 0 && si1.set(line1).matches(si2.set(line2))) return LINES_SAME; 246 return LINES_DIFFERENT; 247 } 248 249 // private Matcher dtdMatcher = PatternCache.get( 250 // "\\Q<!DOCTYPE ldml SYSTEM \"http://www.unicode.org/cldr/dtd/\\E.*\\Q/ldml.dtd\">\\E").matcher(""); 251 252 private String[] CVS_TAGS = { "Revision", "Date" }; 253 stripTags(String line)254 private String stripTags(String line) { 255 // $ 256 // Revision: 8994 $ 257 // $ 258 // Date: 2013-07-03 21:31:17 +0200 (Wed, 03 Jul 2013) $ 259 int pos = line.indexOf('$'); 260 if (pos < 0) return line; 261 pos++; 262 int endpos = line.indexOf('$', pos); 263 if (endpos < 0) return line; 264 for (int i = 0; i < CVS_TAGS.length; ++i) { 265 if (!line.startsWith(CVS_TAGS[i], pos)) continue; 266 line = line.substring(0, pos + CVS_TAGS[i].length()) + line.substring(endpos); 267 } 268 return line; 269 } 270 271 } 272 273 /** 274 * 275 * @param file1 276 * @param file2 277 * @param failureLines 278 * on input, String[2], on output, failing lines 279 * @param lineComparer 280 * @return 281 * @throws IOException 282 */ areFileIdentical(String file1, String file2, String[] failureLines, LineComparer lineComparer)283 public static boolean areFileIdentical(String file1, String file2, String[] failureLines, 284 LineComparer lineComparer) throws IOException { 285 try (BufferedReader br1 = new BufferedReader(new FileReader(file1), 32 * 1024); 286 BufferedReader br2 = new BufferedReader(new FileReader(file2), 32 * 1024);) { 287 String line1 = ""; 288 String line2 = ""; 289 int skip = 0; 290 291 while (true) { 292 if ((skip & LineComparer.SKIP_FIRST) == 0) line1 = br1.readLine(); 293 if ((skip & LineComparer.SKIP_SECOND) == 0) line2 = br2.readLine(); 294 if (line1 == null && line2 == null) return true; 295 if (line1 == null || line2 == null) { 296 // System.out.println("debug"); 297 } 298 skip = lineComparer.compare(line1, line2); 299 if (skip == LineComparer.LINES_DIFFERENT) { 300 break; 301 } 302 } 303 failureLines[0] = line1 != null ? line1 : "<end of file>"; 304 failureLines[1] = line2 != null ? line2 : "<end of file>"; 305 return false; 306 } 307 } 308 309 /* 310 * static String getLineWithoutFluff(BufferedReader br1, boolean first, int flags) throws IOException { 311 * while (true) { 312 * String line1 = br1.readLine(); 313 * if (line1 == null) return line1; 314 * if ((flags & TRIM)!= 0) line1 = line1.trim(); 315 * if ((flags & SKIP_EMPTY)!= 0 && line1.length() == 0) continue; 316 * return line1; 317 * } 318 * } 319 */ 320 321 public final static class StringIterator { 322 String string; 323 int position = 0; 324 next()325 char next() { 326 while (true) { 327 if (position >= string.length()) return '\uFFFF'; 328 char ch = string.charAt(position++); 329 if (ch != ' ' && ch != '\t') return ch; 330 } 331 } 332 reset()333 StringIterator reset() { 334 position = 0; 335 return this; 336 } 337 set(String string)338 StringIterator set(String string) { 339 this.string = string; 340 position = 0; 341 return this; 342 } 343 matches(StringIterator other)344 boolean matches(StringIterator other) { 345 while (true) { 346 char c1 = next(); 347 char c2 = other.next(); 348 if (c1 != c2) return false; 349 if (c1 == '\uFFFF') return true; 350 } 351 } 352 353 /** 354 * @return Returns the position. 355 */ getPosition()356 public int getPosition() { 357 return position; 358 } 359 } 360 splitArray(String source, char separator)361 public static String[] splitArray(String source, char separator) { 362 return splitArray(source, separator, false); 363 } 364 splitArray(String source, char separator, boolean trim)365 public static String[] splitArray(String source, char separator, boolean trim) { 366 List<String> piecesList = splitList(source, separator, trim); 367 String[] pieces = new String[piecesList.size()]; 368 piecesList.toArray(pieces); 369 return pieces; 370 } 371 splitCommaSeparated(String line)372 public static String[] splitCommaSeparated(String line) { 373 // items are separated by ',' 374 // each item is of the form abc... 375 // or "..." (required if a comma or quote is contained) 376 // " in a field is represented by "" 377 List<String> result = new ArrayList<>(); 378 StringBuilder item = new StringBuilder(); 379 boolean inQuote = false; 380 for (int i = 0; i < line.length(); ++i) { 381 char ch = line.charAt(i); // don't worry about supplementaries 382 switch (ch) { 383 case '"': 384 inQuote = !inQuote; 385 // at start or end, that's enough 386 // if get a quote when we are not in a quote, and not at start, then add it and return to inQuote 387 if (inQuote && item.length() != 0) { 388 item.append('"'); 389 inQuote = true; 390 } 391 break; 392 case ',': 393 if (!inQuote) { 394 result.add(item.toString()); 395 item.setLength(0); 396 } else { 397 item.append(ch); 398 } 399 break; 400 default: 401 item.append(ch); 402 break; 403 } 404 } 405 result.add(item.toString()); 406 return result.toArray(new String[result.size()]); 407 } 408 splitList(String source, char separator)409 public static List<String> splitList(String source, char separator) { 410 return splitList(source, separator, false, null); 411 } 412 splitList(String source, char separator, boolean trim)413 public static List<String> splitList(String source, char separator, boolean trim) { 414 return splitList(source, separator, trim, null); 415 } 416 splitList(String source, char separator, boolean trim, List<String> output)417 public static List<String> splitList(String source, char separator, boolean trim, List<String> output) { 418 return splitList(source, Character.toString(separator), trim, output); 419 } 420 splitList(String source, String separator)421 public static List<String> splitList(String source, String separator) { 422 return splitList(source, separator, false, null); 423 } 424 splitList(String source, String separator, boolean trim)425 public static List<String> splitList(String source, String separator, boolean trim) { 426 return splitList(source, separator, trim, null); 427 } 428 splitList(String source, String separator, boolean trim, List<String> output)429 public static List<String> splitList(String source, String separator, boolean trim, List<String> output) { 430 if (output == null) output = new ArrayList<>(); 431 if (source.length() == 0) return output; 432 int pos = 0; 433 do { 434 int npos = source.indexOf(separator, pos); 435 if (npos < 0) npos = source.length(); 436 String piece = source.substring(pos, npos); 437 if (trim) piece = piece.trim(); 438 output.add(piece); 439 pos = npos + 1; 440 } while (pos < source.length()); 441 return output; 442 } 443 444 /** 445 * Protect a collection (as much as Java lets us!) from modification. 446 * Really, really ugly code, since Java doesn't let us do better. 447 */ 448 @SuppressWarnings({ "rawtypes", "unchecked" }) protectCollection(T source)449 public static <T> T protectCollection(T source) { 450 // TODO - exclude UnmodifiableMap, Set, ... 451 if (source instanceof Map) { 452 Map<Object,Object> sourceMap = (Map) source; 453 ImmutableMap.Builder<Object,Object> builder = ImmutableMap.builder(); 454 for (Entry<Object,Object> entry : sourceMap.entrySet()) { 455 final Object key = entry.getKey(); 456 final Object value = entry.getValue(); 457 builder.put(protectCollection(key), protectCollection(value)); 458 } 459 return (T) builder.build(); 460 } else if (source instanceof Multimap) { 461 Multimap<Object,Object> sourceMap = (Multimap) source; 462 ImmutableMultimap.Builder<Object,Object> builder = ImmutableMultimap.builder(); 463 for (Entry<Object,Object> entry : sourceMap.entries()) { 464 builder.put(protectCollection(entry.getKey()), protectCollection(entry.getValue())); 465 } 466 return (T) builder.build(); 467 } else if (source instanceof Collection) { 468 // TODO use ImmutableSet, List, ... 469 Collection sourceCollection = (Collection) source; 470 Collection<Object> resultCollection = clone(sourceCollection); 471 if (resultCollection == null) return (T) sourceCollection; // failed 472 resultCollection.clear(); 473 474 for (Object item : sourceCollection) { 475 resultCollection.add(protectCollection(item)); 476 } 477 478 return sourceCollection instanceof List ? (T) Collections.unmodifiableList((List) sourceCollection) 479 : sourceCollection instanceof SortedSet ? (T) Collections 480 .unmodifiableSortedSet((SortedSet) sourceCollection) 481 : sourceCollection instanceof Set ? (T) Collections.unmodifiableSet((Set) sourceCollection) 482 : (T) Collections.unmodifiableCollection(sourceCollection); 483 } else if (source instanceof Freezable) { 484 Freezable freezableSource = (Freezable) source; 485 return (T) freezableSource.freeze(); 486 // if (freezableSource.isFrozen()) return source; 487 // return (T) ((Freezable) (freezableSource.cloneAsThawed())).freeze(); 488 } else { 489 return source; // can't protect 490 } 491 } 492 493 /** 494 * Protect a collections where we don't need to clone. 495 * @param source 496 * @return 497 */ 498 @SuppressWarnings({ "rawtypes", "unchecked" }) protectCollectionX(T source)499 public static <T> T protectCollectionX(T source) { 500 // TODO - exclude UnmodifiableMap, Set, ... 501 if (isImmutable(source)) { 502 return source; 503 } 504 if (source instanceof Map) { 505 Map sourceMap = (Map) source; 506 // recurse 507 LinkedHashMap tempMap = new LinkedHashMap<>(sourceMap); // copy contents 508 sourceMap.clear(); 509 for (Object key : tempMap.keySet()) { 510 sourceMap.put(protectCollection(key), protectCollectionX(tempMap.get(key))); 511 } 512 return sourceMap instanceof SortedMap ? (T) Collections.unmodifiableSortedMap((SortedMap) sourceMap) 513 : (T) Collections.unmodifiableMap(sourceMap); 514 } else if (source instanceof Collection) { 515 Collection sourceCollection = (Collection) source; 516 LinkedHashSet tempSet = new LinkedHashSet<>(sourceCollection); // copy contents 517 518 sourceCollection.clear(); 519 for (Object item : tempSet) { 520 sourceCollection.add(protectCollectionX(item)); 521 } 522 523 return sourceCollection instanceof List ? (T) Collections.unmodifiableList((List) sourceCollection) 524 : sourceCollection instanceof SortedSet ? (T) Collections 525 .unmodifiableSortedSet((SortedSet) sourceCollection) 526 : sourceCollection instanceof Set ? (T) Collections.unmodifiableSet((Set) sourceCollection) 527 : (T) Collections.unmodifiableCollection(sourceCollection); 528 } else if (source instanceof Freezable) { 529 Freezable freezableSource = (Freezable) source; 530 return (T) freezableSource.freeze(); 531 } else { 532 throw new IllegalArgumentException("Can’t protect: " + source.getClass().toString()); 533 } 534 } 535 536 private static final Set<Object> KNOWN_IMMUTABLES = new HashSet<>(Arrays.asList( 537 String.class)); 538 isImmutable(Object source)539 public static boolean isImmutable(Object source) { 540 return source == null 541 || source instanceof Enum 542 || source instanceof Number 543 || KNOWN_IMMUTABLES.contains(source.getClass()); 544 } 545 546 /** 547 * Clones T if we can; otherwise returns null. 548 * 549 * @param <T> 550 * @param source 551 * @return 552 */ 553 @SuppressWarnings("unchecked") clone(T source)554 private static <T> T clone(T source) { 555 final Class<? extends Object> class1 = source.getClass(); 556 try { 557 final Method declaredMethod = class1.getDeclaredMethod("clone", (Class<?>) null); 558 return (T) declaredMethod.invoke(source, (Object) null); 559 } catch (Exception e) { 560 } 561 try { 562 final Constructor<? extends Object> declaredMethod = class1.getConstructor((Class<?>) null); 563 return (T) declaredMethod.newInstance((Object) null); 564 } catch (Exception e) { 565 } 566 return null; // uncloneable 567 } 568 569 /** 570 * Appends two strings, inserting separator if either is empty 571 */ joinWithSeparation(String a, String separator, String b)572 public static String joinWithSeparation(String a, String separator, String b) { 573 if (a.length() == 0) return b; 574 if (b.length() == 0) return a; 575 return a + separator + b; 576 } 577 578 /** 579 * Appends two strings, inserting separator if either is empty. Modifies first map 580 */ joinWithSeparation(Map<String, String> a, String separator, Map<String, String> b)581 public static Map<String, String> joinWithSeparation(Map<String, String> a, String separator, Map<String, String> b) { 582 for (Iterator<String> it = b.keySet().iterator(); it.hasNext();) { 583 String key = it.next(); 584 String bvalue = b.get(key); 585 String avalue = a.get(key); 586 if (avalue != null) { 587 if (avalue.trim().equals(bvalue.trim())) continue; 588 bvalue = joinWithSeparation(avalue, separator, bvalue); 589 } 590 a.put(key, bvalue); 591 } 592 return a; 593 } 594 join(Collection<T> c, String separator)595 public static <T> String join(Collection<T> c, String separator) { 596 return join(c, separator, null); 597 } 598 join(Object[] c, String separator)599 public static String join(Object[] c, String separator) { 600 return join(c, separator, null); 601 } 602 join(Collection<T> c, String separator, Transform<T, String> transform)603 public static <T> String join(Collection<T> c, String separator, Transform<T, String> transform) { 604 StringBuffer output = new StringBuffer(); 605 boolean isFirst = true; 606 for (T item : c) { 607 if (isFirst) { 608 isFirst = false; 609 } else { 610 output.append(separator); 611 } 612 output.append(transform != null ? transform.transform(item) : item); 613 } 614 return output.toString(); 615 } 616 join(T[] c, String separator, Transform<T, String> transform)617 public static <T> String join(T[] c, String separator, Transform<T, String> transform) { 618 return join(Arrays.asList(c), separator, transform); 619 } 620 621 /** 622 * Utility like Arrays.asList() 623 */ 624 @SuppressWarnings("unchecked") asMap(Object[][] source, Map<K, V> target, boolean reverse)625 public static <K, V> Map<K, V> asMap(Object[][] source, Map<K, V> target, boolean reverse) { 626 int from = 0, to = 1; 627 if (reverse) { 628 from = 1; 629 to = 0; 630 } 631 for (int i = 0; i < source.length; ++i) { 632 if (source[i].length != 2) { 633 throw new IllegalArgumentException("Source must be array of pairs of strings: " 634 + Arrays.asList(source[i])); 635 } 636 target.put((K) source[i][from], (V) source[i][to]); 637 } 638 return target; 639 } 640 asMap(Object[][] source)641 public static <K, V> Map<K, V> asMap(Object[][] source) { 642 return asMap(source, new HashMap<K, V>(), false); 643 } 644 645 /** 646 * Returns the canonical name for a file. 647 */ getCanonicalName(String file)648 public static String getCanonicalName(String file) { 649 try { 650 return PathUtilities.getNormalizedPathString(file); 651 } catch (Exception e) { 652 return file; 653 } 654 } 655 656 /** 657 * Convert a UnicodeSet into a string that can be embedded into a Regex. Handles strings that are in the UnicodeSet, 658 * Supplementary ranges, and escaping 659 * 660 * @param source 661 * The source set 662 * @param escaper 663 * A transliterator that is used to escape the characters according to the requirements of the regex. 664 * @return 665 */ toRegex(UnicodeSet source)666 public static String toRegex(UnicodeSet source) { 667 return toRegex(source, null, false); 668 } 669 670 private static final Transliterator DEFAULT_REGEX_ESCAPER = Transliterator.createFromRules( 671 "foo", 672 "([ \\- \\\\ \\[ \\] ]) > '\\' $1 ;" 673 // + " ([:c:]) > &hex($1);" 674 + " ([[:control:][[:z:]&[:ascii:]]]) > &hex($1);", 675 Transliterator.FORWARD); 676 677 /** 678 * Convert a UnicodeSet into a string that can be embedded into a Regex. 679 * Handles strings that are in the UnicodeSet, Supplementary ranges, and 680 * escaping 681 * 682 * @param source 683 * The source set 684 * @param escaper 685 * A transliterator that is used to escape the characters according 686 * to the requirements of the regex. The default puts a \\ before [, -, 687 * \, and ], and converts controls and Ascii whitespace to hex. 688 * Alternatives can be supplied. Note that some Regex engines, 689 * including Java 1.5, don't really deal with escaped supplementaries 690 * well. 691 * @param onlyBmp 692 * Set to true if the Regex only accepts BMP characters. In that 693 * case, ranges of supplementary characters are converted to lists of 694 * ranges. For example, [\uFFF0-\U0010000F \U0010100F-\U0010300F] 695 * converts into: 696 * 697 * <pre> 698 * [\uD800][\uDC00-\uDFFF] 699 * [\uD801-\uDBBF][\uDC00-\uDFFF] 700 * [\uDBC0][\uDC00-\uDC0F] 701 * </pre> 702 * 703 * and 704 * 705 * <pre> 706 * [\uDBC4][\uDC0F-\uDFFF] 707 * [\uDBC5-\uDBCB][\uDC00-\uDFFF] 708 * [\uDBCC][\uDC00-\uDC0F] 709 * </pre> 710 * 711 * These are then coalesced into a list of alternatives by sharing 712 * parts where feasible. For example, the above turns into 3 pairs of ranges: 713 * 714 * <pre> 715 * [\uDBC0\uDBCC][\uDC00-\uDC0F]|\uDBC4[\uDC0F-\uDFFF]|[\uD800-\uDBBF\uDBC5-\uDBCB][\uDC00-\uDFFF] 716 * </pre> 717 * 718 * @return escaped string. Something like [a-z] or (?:[a-m]|{zh}) if there is 719 * a string zh in the set, or a more complicated case for 720 * supplementaries. <br> 721 * Special cases: [] returns "", single item returns a string 722 * (escaped), like [a] => "a", or [{abc}] => "abc"<br> 723 * Supplementaries are handled specially, as described under onlyBmp. 724 */ toRegex(UnicodeSet source, Transliterator escaper, boolean onlyBmp)725 public static String toRegex(UnicodeSet source, Transliterator escaper, boolean onlyBmp) { 726 if (escaper == null) { 727 escaper = DEFAULT_REGEX_ESCAPER; 728 } 729 UnicodeSetIterator it = new UnicodeSetIterator(source); 730 // if there is only one item, return it 731 if (source.size() == 0) { 732 return ""; 733 } 734 if (source.size() == 1) { 735 it.next(); 736 return escaper.transliterate(it.getString()); 737 } 738 // otherwise, we figure out what is in the set, and will return 739 StringBuilder base = new StringBuilder("["); 740 StringBuilder alternates = new StringBuilder(); 741 Map<UnicodeSet, UnicodeSet> lastToFirst = new TreeMap<>(new UnicodeSetComparator()); 742 int alternateCount = 0; 743 while (it.nextRange()) { 744 if (it.codepoint == UnicodeSetIterator.IS_STRING) { 745 ++alternateCount; 746 alternates.append('|').append(escaper.transliterate(it.string)); 747 } else if (!onlyBmp || it.codepointEnd <= 0xFFFF) { // BMP 748 addBmpRange(it.codepoint, it.codepointEnd, escaper, base); 749 } else { // supplementary 750 if (it.codepoint <= 0xFFFF) { 751 addBmpRange(it.codepoint, 0xFFFF, escaper, base); 752 it.codepoint = 0x10000; // reset the range 753 } 754 // this gets a bit ugly; we are trying to minimize the extra ranges for supplementaries 755 // we do this by breaking up X-Y based on the Lead and Trail values for X and Y 756 // Lx [Tx - Ty]) (if Lx == Ly) 757 // Lx [Tx - DFFF] | Ly [DC00-Ty] (if Lx == Ly - 1) 758 // Lx [Tx - DFFF] | [Lx+1 - Ly-1][DC00-DFFF] | Ly [DC00-Ty] (otherwise) 759 int leadX = UTF16.getLeadSurrogate(it.codepoint); 760 int trailX = UTF16.getTrailSurrogate(it.codepoint); 761 int leadY = UTF16.getLeadSurrogate(it.codepointEnd); 762 int trailY = UTF16.getTrailSurrogate(it.codepointEnd); 763 if (leadX == leadY) { 764 addSupplementalRange(leadX, leadX, trailX, trailY, escaper, lastToFirst); 765 } else { 766 addSupplementalRange(leadX, leadX, trailX, 0xDFFF, escaper, lastToFirst); 767 if (leadX != leadY - 1) { 768 addSupplementalRange(leadX + 1, leadY - 1, 0xDC00, 0xDFFF, escaper, lastToFirst); 769 } 770 addSupplementalRange(leadY, leadY, 0xDC00, trailY, escaper, lastToFirst); 771 } 772 } 773 } 774 // add in the supplementary ranges 775 if (lastToFirst.size() != 0) { 776 for (UnicodeSet last : lastToFirst.keySet()) { 777 ++alternateCount; 778 alternates.append('|').append(toRegex(lastToFirst.get(last), escaper, onlyBmp)) 779 .append(toRegex(last, escaper, onlyBmp)); 780 } 781 } 782 // Return the output. We separate cases in order to get the minimal extra apparatus 783 base.append("]"); 784 if (alternateCount == 0) { 785 return base.toString(); 786 } else if (base.length() > 2) { 787 return "(?:" + base + "|" + alternates.substring(1) + ")"; 788 } else if (alternateCount == 1) { 789 return alternates.substring(1); 790 } else { 791 return "(?:" + alternates.substring(1) + ")"; 792 } 793 } 794 addSupplementalRange(int leadX, int leadY, int trailX, int trailY, Transliterator escaper, Map<UnicodeSet, UnicodeSet> lastToFirst)795 private static void addSupplementalRange(int leadX, int leadY, int trailX, int trailY, Transliterator escaper, 796 Map<UnicodeSet, UnicodeSet> lastToFirst) { 797 System.out.println("\tadding: " + new UnicodeSet(leadX, leadY) + "\t" + new UnicodeSet(trailX, trailY)); 798 UnicodeSet last = new UnicodeSet(trailX, trailY); 799 UnicodeSet first = lastToFirst.get(last); 800 if (first == null) { 801 lastToFirst.put(last, first = new UnicodeSet()); 802 } 803 first.add(leadX, leadY); 804 } 805 addBmpRange(int start, int limit, Transliterator escaper, StringBuilder base)806 private static void addBmpRange(int start, int limit, Transliterator escaper, StringBuilder base) { 807 base.append(escaper.transliterate(UTF16.valueOf(start))); 808 if (start != limit) { 809 base.append("-").append(escaper.transliterate(UTF16.valueOf(limit))); 810 } 811 } 812 813 public static class UnicodeSetComparator implements Comparator<UnicodeSet> { 814 @Override compare(UnicodeSet o1, UnicodeSet o2)815 public int compare(UnicodeSet o1, UnicodeSet o2) { 816 return o1.compareTo(o2); 817 } 818 } 819 820 public static class CollectionComparator<T extends Comparable<T>> implements Comparator<Collection<T>> { 821 @Override compare(Collection<T> o1, Collection<T> o2)822 public int compare(Collection<T> o1, Collection<T> o2) { 823 return UnicodeSet.compare(o1, o2, UnicodeSet.ComparisonStyle.SHORTER_FIRST); 824 } 825 } 826 827 public static class ComparableComparator<T extends Comparable<T>> implements Comparator<T> { 828 @Override compare(T arg0, T arg1)829 public int compare(T arg0, T arg1) { 830 return Utility.checkCompare(arg0, arg1); 831 } 832 } 833 834 @SuppressWarnings({ "rawtypes", "unchecked" }) addTreeMapChain(Map coverageData, Object... objects)835 public static void addTreeMapChain(Map coverageData, Object... objects) { 836 Map<Object, Object> base = coverageData; 837 for (int i = 0; i < objects.length - 2; ++i) { 838 Map<Object, Object> nextOne = (Map<Object, Object>) base.get(objects[i]); 839 if (nextOne == null) base.put(objects[i], nextOne = new TreeMap<>()); 840 base = nextOne; 841 } 842 base.put(objects[objects.length - 2], objects[objects.length - 1]); 843 } 844 845 public static abstract class CollectionTransform<S, T> implements Transform<S, T> { 846 @Override transform(S source)847 public abstract T transform(S source); 848 transform(Collection<S> input, Collection<T> output)849 public Collection<T> transform(Collection<S> input, Collection<T> output) { 850 return CldrUtility.transform(input, this, output); 851 } 852 transform(Collection<S> input)853 public Collection<T> transform(Collection<S> input) { 854 return transform(input, new ArrayList<T>()); 855 } 856 } 857 transform(SC source, Transform<S, T> transform, TC target)858 public static <S, T, SC extends Collection<S>, TC extends Collection<T>> TC transform(SC source, Transform<S, T> transform, TC target) { 859 for (S sourceItem : source) { 860 T targetItem = transform.transform(sourceItem); 861 if (targetItem != null) { 862 target.add(targetItem); 863 } 864 } 865 return target; 866 } 867 transform( SM source, Transform<SK, TK> transformKey, Transform<SV, TV> transformValue, TM target)868 public static <SK, SV, TK, TV, SM extends Map<SK, SV>, TM extends Map<TK, TV>> TM transform( 869 SM source, Transform<SK, TK> transformKey, Transform<SV, TV> transformValue, TM target) { 870 for (Entry<SK, SV> sourceEntry : source.entrySet()) { 871 TK targetKey = transformKey.transform(sourceEntry.getKey()); 872 TV targetValue = transformValue.transform(sourceEntry.getValue()); 873 if (targetKey != null && targetValue != null) { 874 target.put(targetKey, targetValue); 875 } 876 } 877 return target; 878 } 879 880 public static abstract class Apply<T> { apply(T item)881 public abstract void apply(T item); 882 applyTo(U collection)883 public <U extends Collection<T>> void applyTo(U collection) { 884 for (T item : collection) { 885 apply(item); 886 } 887 } 888 } 889 890 public static abstract class Filter<T> { 891 contains(T item)892 public abstract boolean contains(T item); 893 retainAll(U c)894 public <U extends Collection<T>> U retainAll(U c) { 895 for (Iterator<T> it = c.iterator(); it.hasNext();) { 896 if (!contains(it.next())) it.remove(); 897 } 898 return c; 899 } 900 extractMatches(U c, U target)901 public <U extends Collection<T>> U extractMatches(U c, U target) { 902 for (Iterator<T> it = c.iterator(); it.hasNext();) { 903 T item = it.next(); 904 if (contains(item)) { 905 target.add(item); 906 } 907 } 908 return target; 909 } 910 removeAll(U c)911 public <U extends Collection<T>> U removeAll(U c) { 912 for (Iterator<T> it = c.iterator(); it.hasNext();) { 913 if (contains(it.next())) it.remove(); 914 } 915 return c; 916 } 917 extractNonMatches(U c, U target)918 public <U extends Collection<T>> U extractNonMatches(U c, U target) { 919 for (Iterator<T> it = c.iterator(); it.hasNext();) { 920 T item = it.next(); 921 if (!contains(item)) { 922 target.add(item); 923 } 924 } 925 return target; 926 } 927 } 928 929 public static class MatcherFilter<T> extends Filter<T> { 930 private Matcher matcher; 931 MatcherFilter(String pattern)932 public MatcherFilter(String pattern) { 933 this.matcher = PatternCache.get(pattern).matcher(""); 934 } 935 MatcherFilter(Matcher matcher)936 public MatcherFilter(Matcher matcher) { 937 this.matcher = matcher; 938 } 939 set(Matcher matcher)940 public MatcherFilter<T> set(Matcher matcher) { 941 this.matcher = matcher; 942 return this; 943 } 944 set(String pattern)945 public MatcherFilter<T> set(String pattern) { 946 this.matcher = PatternCache.get(pattern).matcher(""); 947 return this; 948 } 949 950 @Override contains(T o)951 public boolean contains(T o) { 952 return matcher.reset(o.toString()).matches(); 953 } 954 } 955 956 // static final class HandlingTransform implements Transform<String, Handling> { 957 // @Override 958 // public Handling transform(String source) { 959 // return Handling.valueOf(source); 960 // } 961 // } 962 963 public static final class PairComparator<K extends Comparable<K>, V extends Comparable<V>> implements java.util.Comparator<Pair<K, V>> { 964 965 private Comparator<K> comp1; 966 private Comparator<V> comp2; 967 PairComparator(Comparator<K> comp1, Comparator<V> comp2)968 public PairComparator(Comparator<K> comp1, Comparator<V> comp2) { 969 this.comp1 = comp1; 970 this.comp2 = comp2; 971 } 972 973 @Override compare(Pair<K, V> o1, Pair<K, V> o2)974 public int compare(Pair<K, V> o1, Pair<K, V> o2) { 975 { 976 K o1First = o1.getFirst(); 977 K o2First = o2.getFirst(); 978 int diff = o1First == null ? (o2First == null ? 0 : -1) 979 : o2First == null ? 1 980 : comp1 == null ? o1First.compareTo(o2First) 981 : comp1.compare(o1First, o2First); 982 if (diff != 0) { 983 return diff; 984 } 985 } 986 V o1Second = o1.getSecond(); 987 V o2Second = o2.getSecond(); 988 return o1Second == null ? (o2Second == null ? 0 : -1) 989 : o2Second == null ? 1 990 : comp2 == null ? o1Second.compareTo(o2Second) 991 : comp2.compare(o1Second, o2Second); 992 } 993 994 } 995 996 /** 997 * Fetch data from jar 998 * 999 * @param name 1000 * a name residing in the org/unicode/cldr/util/data/ directory, or loading from a jar will break. 1001 */ getUTF8Data(String name)1002 public static BufferedReader getUTF8Data(String name) { 1003 if (new File(name).isAbsolute()) { 1004 throw new IllegalArgumentException( 1005 "Path must be relative to org/unicode/cldr/util/data such as 'file.txt' or 'casing/file.txt', but got '" 1006 + name + "'."); 1007 } 1008 return FileReaders.openFile(CldrUtility.class, "data/" + name); 1009 } 1010 1011 /** 1012 * Fetch data from jar 1013 * 1014 * @param name 1015 * a name residing in the org/unicode/cldr/util/data/ directory, or loading from a jar will break. 1016 */ getInputStream(String name)1017 public static InputStream getInputStream(String name) { 1018 if (new File(name).isAbsolute()) { 1019 throw new IllegalArgumentException( 1020 "Path must be relative to org/unicode/cldr/util/data such as 'file.txt' or 'casing/file.txt', but got '" 1021 + name + "'."); 1022 } 1023 return getInputStream(CldrUtility.class, "data/" + name); 1024 } 1025 getInputStream(Class<?> callingClass, String relativePath)1026 public static InputStream getInputStream(Class<?> callingClass, String relativePath) { 1027 InputStream is = callingClass.getResourceAsStream(relativePath); 1028 // add buffering 1029 return InputStreamFactory.buffer(is); 1030 } 1031 1032 /** 1033 * Takes a Map that goes from Object to Set, and fills in the transpose 1034 * 1035 * @param source_key_valueSet 1036 * @param output_value_key 1037 */ putAllTransposed(Map<Object, Set<Object>> source_key_valueSet, Map<Object, Object> output_value_key)1038 public static void putAllTransposed(Map<Object, Set<Object>> source_key_valueSet, Map<Object, Object> output_value_key) { 1039 for (Iterator<Object> it = source_key_valueSet.keySet().iterator(); it.hasNext();) { 1040 Object key = it.next(); 1041 Set<Object> values = source_key_valueSet.get(key); 1042 for (Iterator<Object> it2 = values.iterator(); it2.hasNext();) { 1043 Object value = it2.next(); 1044 output_value_key.put(value, key); 1045 } 1046 } 1047 } 1048 countInstances(String source, String substring)1049 public static int countInstances(String source, String substring) { 1050 int count = 0; 1051 int pos = 0; 1052 while (true) { 1053 pos = source.indexOf(substring, pos) + 1; 1054 if (pos <= 0) break; 1055 count++; 1056 } 1057 return count; 1058 } 1059 registerTransliteratorFromFile(String id, String dir, String filename)1060 public static void registerTransliteratorFromFile(String id, String dir, String filename) { 1061 registerTransliteratorFromFile(id, dir, filename, Transliterator.FORWARD, true); 1062 registerTransliteratorFromFile(id, dir, filename, Transliterator.REVERSE, true); 1063 } 1064 registerTransliteratorFromFile(String id, String dir, String filename, int direction, boolean reverseID)1065 public static void registerTransliteratorFromFile(String id, String dir, String filename, int direction, 1066 boolean reverseID) { 1067 if (filename == null) { 1068 filename = id.replace('-', '_'); 1069 filename = filename.replace('/', '_'); 1070 filename += ".txt"; 1071 } 1072 String rules = getText(dir, filename); 1073 Transliterator t; 1074 int pos = id.indexOf('-'); 1075 String rid; 1076 if (pos < 0) { 1077 rid = id + "-Any"; 1078 id = "Any-" + id; 1079 } else { 1080 rid = id.substring(pos + 1) + "-" + id.substring(0, pos); 1081 } 1082 if (!reverseID) rid = id; 1083 1084 if (direction == Transliterator.FORWARD) { 1085 Transliterator.unregister(id); 1086 t = Transliterator.createFromRules(id, rules, Transliterator.FORWARD); 1087 Transliterator.registerInstance(t); 1088 System.out.println("Registered new Transliterator: " + id); 1089 } 1090 1091 /* 1092 * String test = "\u049A\u0430\u0437\u0430\u049B"; 1093 * System.out.println(t.transliterate(test)); 1094 * t = Transliterator.getInstance(id); 1095 * System.out.println(t.transliterate(test)); 1096 */ 1097 1098 if (direction == Transliterator.REVERSE) { 1099 Transliterator.unregister(rid); 1100 t = Transliterator.createFromRules(rid, rules, Transliterator.REVERSE); 1101 Transliterator.registerInstance(t); 1102 System.out.println("Registered new Transliterator: " + rid); 1103 } 1104 } 1105 getText(String dir, String filename)1106 public static String getText(String dir, String filename) { 1107 try { 1108 BufferedReader br = FileUtilities.openUTF8Reader(dir, filename); 1109 StringBuffer buffer = new StringBuffer(); 1110 while (true) { 1111 String line = br.readLine(); 1112 if (line == null) break; 1113 if (line.length() > 0 && line.charAt(0) == '\uFEFF') line = line.substring(1); 1114 if (line.startsWith("//")) continue; 1115 buffer.append(line).append(CldrUtility.LINE_SEPARATOR); 1116 } 1117 br.close(); 1118 String rules = buffer.toString(); 1119 return rules; 1120 } catch (IOException e) { 1121 throw (IllegalArgumentException) new IllegalArgumentException("Can't open " + dir + ", " + filename) 1122 .initCause(e); 1123 } 1124 } 1125 callMethod(String methodNames, Class<?> cls)1126 public static void callMethod(String methodNames, Class<?> cls) { 1127 for (String methodName : methodNames.split(",")) { 1128 try { 1129 Method method; 1130 try { 1131 method = cls.getMethod(methodName, (Class[]) null); 1132 try { 1133 method.invoke(null, (Object[]) null); 1134 } catch (Exception e) { 1135 e.printStackTrace(); 1136 } 1137 } catch (Exception e) { 1138 System.out.println("No such method: " + methodName); 1139 showMethods(cls); 1140 } 1141 } catch (ClassNotFoundException e) { 1142 e.printStackTrace(); 1143 } 1144 } 1145 } 1146 showMethods(Class<?> cls)1147 public static void showMethods(Class<?> cls) throws ClassNotFoundException { 1148 System.out.println("Possible methods of " + cls.getCanonicalName() + " are: "); 1149 Method[] methods = cls.getMethods(); 1150 Set<String> names = new TreeSet<>(); 1151 for (int i = 0; i < methods.length; ++i) { 1152 if (methods[i].getGenericParameterTypes().length != 0) continue; 1153 //int mods = methods[i].getModifiers(); 1154 // if (!Modifier.isStatic(mods)) continue; 1155 String name = methods[i].getName(); 1156 names.add(name); 1157 } 1158 for (Iterator<String> it = names.iterator(); it.hasNext();) { 1159 System.out.println("\t" + it.next()); 1160 } 1161 } 1162 1163 /** 1164 * Breaks lines if they are too long, or if matcher.group(1) != last. Only breaks just before matcher. 1165 * 1166 * @param input 1167 * @param separator 1168 * @param matcher 1169 * must match each possible item. The first group is significant; if different, will cause break 1170 * @return 1171 */ breakLines(CharSequence input, String separator, Matcher matcher, int width)1172 static public String breakLines(CharSequence input, String separator, Matcher matcher, int width) { 1173 StringBuffer output = new StringBuffer(); 1174 String lastPrefix = ""; 1175 int lastEnd = 0; 1176 int lastBreakPos = 0; 1177 matcher.reset(input); 1178 while (true) { 1179 boolean match = matcher.find(); 1180 if (!match) { 1181 output.append(input.subSequence(lastEnd, input.length())); 1182 break; 1183 } 1184 String prefix = matcher.group(1); 1185 if (!prefix.equalsIgnoreCase(lastPrefix) || matcher.end() - lastBreakPos > width) { // break before? 1186 output.append(separator); 1187 lastBreakPos = lastEnd; 1188 } else if (lastEnd != 0) { 1189 output.append(' '); 1190 } 1191 output.append(input.subSequence(lastEnd, matcher.end()).toString().trim()); 1192 lastEnd = matcher.end(); 1193 lastPrefix = prefix; 1194 } 1195 return output.toString(); 1196 } 1197 showOptions(String[] args)1198 public static void showOptions(String[] args) { 1199 // Properties props = System.getProperties(); 1200 System.out.println("Arguments: " + join(args, " ")); // + (props == null ? "" : " " + props)); 1201 } 1202 roundToDecimals(double input, int places)1203 public static double roundToDecimals(double input, int places) { 1204 double log10 = Math.log10(input); // 15000 => 4.xxx 1205 double intLog10 = Math.floor(log10); 1206 double scale = Math.pow(10, intLog10 - places + 1); 1207 double factored = Math.round(input / scale) * scale; 1208 // System.out.println("###\t" +input + "\t" + factored); 1209 return factored; 1210 } 1211 1212 /** 1213 * Get a property value, returning the value if there is one (eg -Dkey=value), 1214 * otherwise the default value (for either empty or null). 1215 * 1216 * @param key 1217 * @param valueIfNull 1218 * @param valueIfEmpty 1219 * @return 1220 */ getProperty(String key, String defaultValue)1221 public static String getProperty(String key, String defaultValue) { 1222 return getProperty(key, defaultValue, defaultValue); 1223 } 1224 1225 /** 1226 * Get a property value, returning the value if there is one, otherwise null. 1227 */ getProperty(String key)1228 public static String getProperty(String key) { 1229 return getProperty(key, null, null); 1230 } 1231 1232 /** 1233 * Get a property value, returning the value if there is one (eg -Dkey=value), 1234 * the valueIfEmpty if there is one with no value (eg -Dkey) and the valueIfNull 1235 * if there is no property. 1236 * 1237 * @param key 1238 * @param valueIfNull 1239 * @param valueIfEmpty 1240 * @return 1241 */ getProperty(String key, String valueIfNull, String valueIfEmpty)1242 public static String getProperty(String key, String valueIfNull, String valueIfEmpty) { 1243 String result = CLDRConfig.getInstance().getProperty(key); 1244 if (result == null) { 1245 result = valueIfNull; 1246 } else if (result.length() == 0) { 1247 result = valueIfEmpty; 1248 } 1249 return result; 1250 } 1251 hex(byte[] bytes, int start, int end, String separator)1252 public static String hex(byte[] bytes, int start, int end, String separator) { 1253 StringBuilder result = new StringBuilder(); 1254 for (int i = 0; i < end; ++i) { 1255 if (result.length() != 0) { 1256 result.append(separator); 1257 } 1258 result.append(Utility.hex(bytes[i] & 0xFF, 2)); 1259 } 1260 return result.toString(); 1261 } 1262 getProperty(String string, boolean b)1263 public static boolean getProperty(String string, boolean b) { 1264 return getProperty(string, b ? "true" : "false", "true").matches("(?i)T|TRUE"); 1265 } 1266 checkValidDirectory(String sourceDirectory)1267 public static String checkValidDirectory(String sourceDirectory) { 1268 return checkValidFile(sourceDirectory, true, null); 1269 } 1270 checkValidDirectory(String sourceDirectory, String correction)1271 public static String checkValidDirectory(String sourceDirectory, String correction) { 1272 return checkValidFile(sourceDirectory, true, correction); 1273 } 1274 checkValidFile(String sourceDirectory, boolean checkForDirectory, String correction)1275 public static String checkValidFile(String sourceDirectory, boolean checkForDirectory, String correction) { 1276 File file = null; 1277 String normalizedPath = null; 1278 try { 1279 file = new File(sourceDirectory); 1280 normalizedPath = PathUtilities.getNormalizedPathString(file) + File.separatorChar; 1281 } catch (Exception e) { 1282 } 1283 if (file == null || normalizedPath == null || checkForDirectory && !file.isDirectory()) { 1284 throw new RuntimeException("Directory not found: " + sourceDirectory 1285 + (normalizedPath == null ? "" : " => " + normalizedPath) 1286 + (correction == null ? "" : CldrUtility.LINE_SEPARATOR + correction)); 1287 } 1288 return normalizedPath; 1289 } 1290 1291 /** 1292 * Copy up to matching line (not included). If output is null, then just skip until. 1293 * 1294 * @param oldFile 1295 * file to copy 1296 * @param readUntilPattern 1297 * pattern to search for. If null, goes to end of file. 1298 * @param output 1299 * into to copy into. If null, just skips in the input. 1300 * @param includeMatchingLine 1301 * inclde the matching line when copying. 1302 * @throws IOException 1303 */ copyUpTo(BufferedReader oldFile, final Pattern readUntilPattern, final PrintWriter output, boolean includeMatchingLine)1304 public static void copyUpTo(BufferedReader oldFile, final Pattern readUntilPattern, 1305 final PrintWriter output, boolean includeMatchingLine) throws IOException { 1306 Matcher readUntil = readUntilPattern == null ? null : readUntilPattern.matcher(""); 1307 while (true) { 1308 String line = oldFile.readLine(); 1309 if (line == null) { 1310 break; 1311 } 1312 if (line.startsWith("\uFEFF")) { 1313 line = line.substring(1); 1314 } 1315 if (readUntil != null && readUntil.reset(line).matches()) { 1316 if (includeMatchingLine && output != null) { 1317 output.println(line); 1318 } 1319 break; 1320 } 1321 if (output != null) { 1322 output.println(line); 1323 } 1324 } 1325 } 1326 1327 private static DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss 'GMT'"); 1328 private static DateFormat DATE_ONLY = new SimpleDateFormat("yyyy-MM-dd"); 1329 static { 1330 df.setTimeZone(TimeZone.getTimeZone("GMT")); 1331 DATE_ONLY.setTimeZone(TimeZone.getTimeZone("GMT")); 1332 } 1333 isoFormat(Date date)1334 public static String isoFormat(Date date) { 1335 synchronized (df) { 1336 return df.format(date); 1337 } 1338 } 1339 isoFormatDateOnly(Date date)1340 public static String isoFormatDateOnly(Date date) { 1341 synchronized (DATE_ONLY) { 1342 return DATE_ONLY.format(date); 1343 } 1344 } 1345 newConcurrentHashMap()1346 public static <K, V> ConcurrentHashMap<K, V> newConcurrentHashMap() { 1347 // http://ria101.wordpress.com/2011/12/12/concurrenthashmap-avoid-a-common-misuse/ 1348 return new ConcurrentHashMap<>(4, 0.9f, 1); 1349 } 1350 newConcurrentHashMap(Map<K, V> source)1351 public static <K, V> ConcurrentHashMap<K, V> newConcurrentHashMap(Map<K, V> source) { 1352 ConcurrentHashMap<K, V> result = newConcurrentHashMap(); 1353 result.putAll(source); 1354 return result; 1355 } 1356 equals(Object a, Object b)1357 public static boolean equals(Object a, Object b) { 1358 return a == b ? true 1359 : a == null || b == null ? false 1360 : a.equals(b); 1361 } 1362 getDoubleLink(String code)1363 public static String getDoubleLink(String code) { 1364 final String anchorSafe = TransliteratorUtilities.toHTML.transliterate(code).replace(" ", "_"); 1365 return "<a name='" + anchorSafe + "' href='#" + anchorSafe + "'>"; 1366 } 1367 getDoubleLinkedText(String anchor, String anchorText)1368 public static String getDoubleLinkedText(String anchor, String anchorText) { 1369 return getDoubleLink(anchor) + TransliteratorUtilities.toHTML.transliterate(anchorText).replace("_", " ") 1370 + "</a>"; 1371 } 1372 getDoubleLinkedText(String anchor)1373 public static String getDoubleLinkedText(String anchor) { 1374 return getDoubleLinkedText(anchor, anchor); 1375 } 1376 getDoubleLinkMsg()1377 public static String getDoubleLinkMsg() { 1378 return "<a name=''{0}'' href=''#{0}''>{0}</a>"; 1379 } 1380 getDoubleLinkMsg2()1381 public static String getDoubleLinkMsg2() { 1382 return "<a name=''{0}{1}'' href=''#{0}{1}''>{0}</a>"; 1383 } 1384 getCopyrightString()1385 public static String getCopyrightString() { 1386 return getCopyrightString(""); 1387 } 1388 getCopyrightString(String linePrefix)1389 public static String getCopyrightString(String linePrefix) { 1390 // now do the rest 1391 return linePrefix + "Copyright \u00A9 1991-" + Calendar.getInstance().get(Calendar.YEAR) + " Unicode, Inc." + CldrUtility.LINE_SEPARATOR 1392 + linePrefix + "For terms of use, see http://www.unicode.org/copyright.html" + CldrUtility.LINE_SEPARATOR 1393 + linePrefix + CLDRURLS.UNICODE_SPDX_HEADER + CldrUtility.LINE_SEPARATOR 1394 + linePrefix + "CLDR data files are interpreted according to the LDML specification " + "(http://unicode.org/reports/tr35/)"; 1395 } 1396 1397 // TODO Move to collection utilities 1398 /** 1399 * Type-safe get 1400 * @param map 1401 * @param key 1402 * @return value 1403 */ get(M map, K key)1404 public static <K, V, M extends Map<K, V>> V get(M map, K key) { 1405 return map.get(key); 1406 } 1407 1408 /** 1409 * Type-safe contains 1410 * @param map 1411 * @param key 1412 * @return value 1413 */ contains(C collection, K key)1414 public static <K, C extends Collection<K>> boolean contains(C collection, K key) { 1415 return collection.contains(key); 1416 } 1417 toEnumSet(Class<E> classValue, Collection<String> stringValues)1418 public static <E extends Enum<E>> EnumSet<E> toEnumSet(Class<E> classValue, Collection<String> stringValues) { 1419 EnumSet<E> result = EnumSet.noneOf(classValue); 1420 for (String s : stringValues) { 1421 result.add(Enum.valueOf(classValue, s)); 1422 } 1423 return result; 1424 } 1425 putNew(M map, K key, V value)1426 public static <K, V, M extends Map<K, V>> M putNew(M map, K key, V value) { 1427 if (!map.containsKey(key)) { 1428 map.put(key, value); 1429 } 1430 return map; 1431 } 1432 cleanSemiFields(String line)1433 public static String[] cleanSemiFields(String line) { 1434 line = cleanLine(line); 1435 return line.isEmpty() ? null : SEMI_SPLIT.split(line); 1436 } 1437 cleanLine(String line)1438 private static String cleanLine(String line) { 1439 int comment = line.indexOf("#"); 1440 if (comment >= 0) { 1441 line = line.substring(0, comment); 1442 } 1443 if (line.startsWith("\uFEFF")) { 1444 line = line.substring(1); 1445 } 1446 return line.trim(); 1447 } 1448 handleFile(String filename, LineHandler handler)1449 public static void handleFile(String filename, LineHandler handler) throws IOException { 1450 try (BufferedReader in = getUTF8Data(filename);) { 1451 String line = null; 1452 while ((line = in.readLine()) != null) { 1453 // String line = in.readLine(); 1454 // if (line == null) { 1455 // break; 1456 // } 1457 try { 1458 if (!handler.handle(line)) { 1459 if (HANDLEFILE_SHOW_SKIP) { 1460 System.out.println("Skipping line: " + line); 1461 } 1462 } 1463 } catch (Exception e) { 1464 throw (RuntimeException) new IllegalArgumentException("Problem with line: " + line) 1465 .initCause(e); 1466 } 1467 } 1468 } 1469 // in.close(); 1470 } 1471 ifNull(T x, T y)1472 public static <T> T ifNull(T x, T y) { 1473 return x == null 1474 ? y 1475 : x; 1476 } 1477 ifSame(T source, T replaceIfSame, T replacement)1478 public static <T> T ifSame(T source, T replaceIfSame, T replacement) { 1479 return source == replaceIfSame ? replacement : source; 1480 } 1481 ifEqual(T source, T replaceIfSame, T replacement)1482 public static <T> T ifEqual(T source, T replaceIfSame, T replacement) { 1483 return Objects.equals(source, replaceIfSame) ? replacement : source; 1484 } 1485 intersect(Set<T> a, Collection<T> b)1486 public static <T> Set<T> intersect(Set<T> a, Collection<T> b) { 1487 Set<T> result = new LinkedHashSet<>(a); 1488 result.retainAll(b); 1489 return result; 1490 } 1491 subtract(Set<T> a, Collection<T> b)1492 public static <T> Set<T> subtract(Set<T> a, Collection<T> b) { 1493 Set<T> result = new LinkedHashSet<>(a); 1494 result.removeAll(b); 1495 return result; 1496 } 1497 deepEquals(Object... pairs)1498 public static boolean deepEquals(Object... pairs) { 1499 for (int item = 0; item < pairs.length;) { 1500 if (!Objects.deepEquals(pairs[item++], pairs[item++])) { 1501 return false; 1502 } 1503 } 1504 return true; 1505 } 1506 array(Splitter splitter, String source)1507 public static String[] array(Splitter splitter, String source) { 1508 List<String> list = splitter.splitToList(source); 1509 return list.toArray(new String[list.size()]); 1510 } 1511 toHex(String in, boolean javaStyle)1512 public static String toHex(String in, boolean javaStyle) { 1513 StringBuilder result = new StringBuilder(); 1514 for (int i = 0; i < in.length(); ++i) { 1515 result.append(toHex(in.charAt(i), javaStyle)); 1516 } 1517 return result.toString(); 1518 } 1519 toHex(int j, boolean javaStyle)1520 public static String toHex(int j, boolean javaStyle) { 1521 if (j == '\"') { 1522 return "\\\""; 1523 } else if (j == '\\') { 1524 return "\\\\"; 1525 } else if (0x20 < j && j < 0x7F) { 1526 return String.valueOf((char) j); 1527 } 1528 final String hexString = Integer.toHexString(j).toUpperCase(); 1529 int gap = 4 - hexString.length(); 1530 if (gap < 0) { 1531 gap = 0; 1532 } 1533 String prefix = javaStyle ? "\\u" : "U+"; 1534 return prefix + "000".substring(0, gap) + hexString; 1535 } 1536 1537 /** 1538 * get string format for debugging, since Java has a useless display for many items 1539 * @param item 1540 * @return 1541 */ toString(Object item)1542 public static String toString(Object item) { 1543 if (item instanceof Object[]) { 1544 return toString(Arrays.asList((Object[]) item)); 1545 } else if (item instanceof Entry) { 1546 return toString(((Entry) item).getKey()) + "≔" + toString(((Entry) item).getValue()); 1547 } else if (item instanceof Map) { 1548 return "{" + toString(((Map) item).entrySet()) + "}"; 1549 } else if (item instanceof Collection) { 1550 List<String> result = new ArrayList<>(); 1551 for (Object subitem : (Collection) item) { 1552 result.add(toString(subitem)); 1553 } 1554 return result.toString(); 1555 } 1556 return item.toString(); 1557 } 1558 1559 /** 1560 * Return the git hash for the CLDR base directory. 1561 * 1562 * @return the hash, like "9786e05e95a2e4f02687fa3b84126782f9f698a3" 1563 */ getCldrBaseDirHash()1564 public static String getCldrBaseDirHash() { 1565 final File baseDir = CLDRConfig.getInstance().getCldrBaseDirectory(); 1566 return getGitHashForDir(baseDir.toString()); 1567 } 1568 1569 /** 1570 * Return the git hash for a directory. 1571 * 1572 * @param dir the directory name 1573 * @return the hash, like "9786e05e95a2e4f02687fa3b84126782f9f698a3" 1574 */ getGitHashForDir(String dir)1575 public final static String getGitHashForDir(String dir) { 1576 final String GIT_HASH_COMMANDS[] = { "git", "rev-parse", "HEAD" }; 1577 try { 1578 if (dir == null) { 1579 return CLDRURLS.UNKNOWN_REVISION; // no dir 1580 } 1581 File f = new File(dir); 1582 if (!f.isDirectory()) { 1583 return CLDRURLS.UNKNOWN_REVISION; // does not exist 1584 } 1585 Process p = Runtime.getRuntime().exec(GIT_HASH_COMMANDS, null, f); 1586 try (BufferedReader is = new BufferedReader(new InputStreamReader(p.getInputStream()))) { 1587 String str = is.readLine(); 1588 if (str.length() == 0) { 1589 throw new Exception("git returned empty"); 1590 } 1591 return str; 1592 } 1593 } catch(Throwable t) { 1594 // We do not expect this to be called frequently. 1595 System.err.println("While trying to get 'git' hash for " + dir + " : " + t.getMessage()); 1596 t.printStackTrace(); 1597 return CLDRURLS.UNKNOWN_REVISION; 1598 } 1599 } 1600 } 1601