1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2013, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Mark Davis 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.util; 10 11 import java.io.BufferedReader; 12 import java.io.File; 13 import java.io.FileReader; 14 import java.io.IOException; 15 import java.io.InputStream; 16 import java.io.InputStreamReader; 17 import java.io.PrintWriter; 18 import java.lang.reflect.Constructor; 19 import java.lang.reflect.Method; 20 import java.nio.file.Files; 21 import java.nio.file.Path; 22 import java.nio.file.Paths; 23 import java.util.ArrayList; 24 import java.util.Arrays; 25 import java.util.Calendar; 26 import java.util.Collection; 27 import java.util.Collections; 28 import java.util.Comparator; 29 import java.util.Date; 30 import java.util.EnumSet; 31 import java.util.HashMap; 32 import java.util.HashSet; 33 import java.util.Iterator; 34 import java.util.LinkedHashMap; 35 import java.util.LinkedHashSet; 36 import java.util.List; 37 import java.util.Map; 38 import java.util.Map.Entry; 39 import java.util.Objects; 40 import java.util.Set; 41 import java.util.SortedMap; 42 import java.util.SortedSet; 43 import java.util.TreeMap; 44 import java.util.TreeSet; 45 import java.util.concurrent.ConcurrentHashMap; 46 import java.util.concurrent.TimeUnit; 47 import java.util.regex.Matcher; 48 import java.util.regex.Pattern; 49 50 import org.unicode.cldr.draft.FileUtilities; 51 52 import com.google.common.base.Splitter; 53 import com.google.common.collect.ImmutableMap; 54 import com.google.common.collect.ImmutableMultimap; 55 import com.google.common.collect.Multimap; 56 import com.ibm.icu.impl.Utility; 57 import com.ibm.icu.text.DateFormat; 58 import com.ibm.icu.text.SimpleDateFormat; 59 import com.ibm.icu.text.Transform; 60 import com.ibm.icu.text.Transliterator; 61 import com.ibm.icu.text.UTF16; 62 import com.ibm.icu.text.UnicodeSet; 63 import com.ibm.icu.text.UnicodeSetIterator; 64 import com.ibm.icu.util.Freezable; 65 import com.ibm.icu.util.TimeZone; 66 67 public class CldrUtility { 68 /** 69 * These need to be consistent with "CLDR-Code-Git-Commit" in tools/cldr-code/pom.xml 70 * 71 * If and when "CLDR-Apps-Git-Commit" in tools/cldr-apps/pom.xml becomes usable for the 72 * cldr-apps war file, we may add APPS_SLUG = "CLDR-Apps" here, and in some contexts 73 * use APPS_SLUG in addition to, or instead of, CODE_SLUG 74 */ 75 public static final String CODE_SLUG = "CLDR-Code"; 76 public static final String GIT_COMMIT_SUFFIX = "-Git-Commit"; 77 78 public static final String HOME_KEY = "CLDRHOME"; 79 public static final String DIR_KEY = "CLDR_DIR"; 80 public static final String MAIN_KEY = "CLDR_MAIN"; 81 82 public static final boolean DEBUG_MISSING_DIRECTORIES = false; 83 84 public static final boolean BETA = false; 85 86 public static final String LINE_SEPARATOR = "\n"; 87 public final static Pattern SEMI_SPLIT = PatternCache.get("\\s*;\\s*"); 88 89 private static final boolean HANDLEFILE_SHOW_SKIP = false; 90 // Constant for "∅∅∅". Indicates that a child locale has no value for a 91 // path even though a parent does. 92 public static final String NO_INHERITANCE_MARKER = new String(new char[] { 0x2205, 0x2205, 0x2205 }); 93 94 /** 95 * Define the constant INHERITANCE_MARKER for "↑↑↑", used by Survey Tool to indicate a "passthru" vote to the parent locale. 96 * If CLDRFile ever finds this value in a data field, writing of the field should be suppressed. 97 */ 98 public static final String INHERITANCE_MARKER = new String(new char[] { 0x2191, 0x2191, 0x2191 }); 99 100 public static final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze(); 101 102 /** 103 * Very simple class, used to replace variables in a string. For example 104 * <p> 105 * 106 * <pre> 107 * static VariableReplacer langTag = new VariableReplacer() 108 * .add("$alpha", "[a-zA-Z]") 109 * .add("$digit", "[0-9]") 110 * .add("$alphanum", "[a-zA-Z0-9]") 111 * .add("$x", "[xX]"); 112 * ... 113 * String langTagPattern = langTag.replace(...); 114 * </pre> 115 */ 116 public static class VariableReplacer { 117 // simple implementation for now 118 private Map<String, String> m = new TreeMap<>(Collections.reverseOrder()); 119 add(String variable, String value)120 public VariableReplacer add(String variable, String value) { 121 m.put(variable, value); 122 return this; 123 } 124 replace(String source)125 public String replace(String source) { 126 String oldSource; 127 do { 128 oldSource = source; 129 for (Iterator<String> it = m.keySet().iterator(); it.hasNext();) { 130 String variable = it.next(); 131 String value = m.get(variable); 132 source = replaceAll(source, variable, value); 133 } 134 } while (!source.equals(oldSource)); 135 return source; 136 } 137 replaceAll(String source, String key, String value)138 public String replaceAll(String source, String key, String value) { 139 while (true) { 140 int pos = source.indexOf(key); 141 if (pos < 0) return source; 142 source = source.substring(0, pos) + value + source.substring(pos + key.length()); 143 } 144 } 145 } 146 147 public interface LineHandler { 148 /** 149 * Return false if line was skipped 150 * 151 * @param line 152 * @return 153 */ handle(String line)154 boolean handle(String line) throws Exception; 155 } 156 getPath(String fileOrDir, String filename)157 public static String getPath(String fileOrDir, String filename) { 158 // Required for cases where a system property is read but not default is given. 159 // TODO: Fix callers to not fail silently if properties are missing. 160 if (fileOrDir == null) { 161 return null; 162 } 163 Path path = Paths.get(fileOrDir); 164 if (filename != null) { 165 path = path.resolve(filename); 166 } 167 if (DEBUG_MISSING_DIRECTORIES && !Files.exists(path)) { 168 System.err.println("Warning: directory doesn't exist: " + path); 169 } 170 return PathUtilities.getNormalizedPathString(path) + File.separatorChar; 171 } 172 getPath(String path)173 public static String getPath(String path) { 174 return getPath(path, null); 175 } 176 177 public static final String ANALYTICS = "<script>\n" 178 + "var gaJsHost = ((\"https:\" == document.location.protocol) ? \"https://ssl.\" : \"http://www.\");\n" 179 + "document.write(unescape(\"%3Cscript src='\" + gaJsHost + \"google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E\"));\n" 180 + "</script>\n" 181 + "<script>\n" 182 + "try {\n" 183 + "var pageTracker = _gat._getTracker(\"UA-7672775-1\");\n" 184 + "pageTracker._trackPageview();\n" 185 + "} catch(err) {}</script>"; 186 187 public static final List<String> MINIMUM_LANGUAGES = Arrays.asList(new String[] { "ar", "en", "de", "fr", "hi", 188 "it", "es", "pt", "ru", "zh", "ja" }); // plus language itself 189 public static final List<String> MINIMUM_TERRITORIES = Arrays.asList(new String[] { "US", "GB", "DE", "FR", "IT", 190 "JP", "CN", "IN", "RU", "BR" }); 191 192 public interface LineComparer { 193 static final int LINES_DIFFERENT = -1, LINES_SAME = 0, SKIP_FIRST = 1, SKIP_SECOND = 2; 194 195 /** 196 * Returns LINES_DIFFERENT, LINES_SAME, or if one of the lines is ignorable, SKIP_FIRST or SKIP_SECOND 197 * 198 * @param line1 199 * @param line2 200 * @return 201 */ compare(String line1, String line2)202 int compare(String line1, String line2); 203 } 204 205 public static class SimpleLineComparator implements LineComparer { 206 public static final int TRIM = 1, SKIP_SPACES = 2, SKIP_EMPTY = 4, SKIP_CVS_TAGS = 8; 207 StringIterator si1 = new StringIterator(); 208 StringIterator si2 = new StringIterator(); 209 int flags; 210 SimpleLineComparator(int flags)211 public SimpleLineComparator(int flags) { 212 this.flags = flags; 213 } 214 215 @Override compare(String line1, String line2)216 public int compare(String line1, String line2) { 217 // first, see if we want to skip one or the other lines 218 int skipper = 0; 219 if (line1 == null) { 220 skipper = SKIP_FIRST; 221 } else { 222 if ((flags & TRIM) != 0) line1 = line1.trim(); 223 if ((flags & SKIP_EMPTY) != 0 && line1.length() == 0) skipper = SKIP_FIRST; 224 } 225 if (line2 == null) { 226 skipper = SKIP_SECOND; 227 } else { 228 if ((flags & TRIM) != 0) line2 = line2.trim(); 229 if ((flags & SKIP_EMPTY) != 0 && line2.length() == 0) skipper += SKIP_SECOND; 230 } 231 if (skipper != 0) { 232 if (skipper == SKIP_FIRST + SKIP_SECOND) return LINES_SAME; // ok, don't skip both 233 return skipper; 234 } 235 236 // check for null 237 if (line1 == null) { 238 if (line2 == null) return LINES_SAME; 239 return LINES_DIFFERENT; 240 } 241 if (line2 == null) { 242 return LINES_DIFFERENT; 243 } 244 245 // now check equality 246 if (line1.equals(line2)) return LINES_SAME; 247 248 // if not equal, see if we are skipping spaces 249 if ((flags & SKIP_CVS_TAGS) != 0) { 250 if (line1.indexOf('$') >= 0 && line2.indexOf('$') >= 0) { 251 line1 = stripTags(line1); 252 line2 = stripTags(line2); 253 if (line1.equals(line2)) return LINES_SAME; 254 } else if (line1.startsWith("<!DOCTYPE ldml SYSTEM \"../../common/dtd/") 255 && line2.startsWith("<!DOCTYPE ldml SYSTEM \"../../common/dtd/")) { 256 return LINES_SAME; 257 } 258 } 259 if ((flags & SKIP_SPACES) != 0 && si1.set(line1).matches(si2.set(line2))) return LINES_SAME; 260 return LINES_DIFFERENT; 261 } 262 263 // private Matcher dtdMatcher = PatternCache.get( 264 // "\\Q<!DOCTYPE ldml SYSTEM \"http://www.unicode.org/cldr/dtd/\\E.*\\Q/ldml.dtd\">\\E").matcher(""); 265 266 private String[] CVS_TAGS = { "Revision", "Date" }; 267 stripTags(String line)268 private String stripTags(String line) { 269 // $ 270 // Revision: 8994 $ 271 // $ 272 // Date: 2013-07-03 21:31:17 +0200 (Wed, 03 Jul 2013) $ 273 int pos = line.indexOf('$'); 274 if (pos < 0) return line; 275 pos++; 276 int endpos = line.indexOf('$', pos); 277 if (endpos < 0) return line; 278 for (int i = 0; i < CVS_TAGS.length; ++i) { 279 if (!line.startsWith(CVS_TAGS[i], pos)) continue; 280 line = line.substring(0, pos + CVS_TAGS[i].length()) + line.substring(endpos); 281 } 282 return line; 283 } 284 285 } 286 287 /** 288 * 289 * @param file1 290 * @param file2 291 * @param failureLines 292 * on input, String[2], on output, failing lines 293 * @param lineComparer 294 * @return 295 * @throws IOException 296 */ areFileIdentical(String file1, String file2, String[] failureLines, LineComparer lineComparer)297 public static boolean areFileIdentical(String file1, String file2, String[] failureLines, 298 LineComparer lineComparer) throws IOException { 299 try (BufferedReader br1 = new BufferedReader(new FileReader(file1), 32 * 1024); 300 BufferedReader br2 = new BufferedReader(new FileReader(file2), 32 * 1024);) { 301 String line1 = ""; 302 String line2 = ""; 303 int skip = 0; 304 305 while (true) { 306 if ((skip & LineComparer.SKIP_FIRST) == 0) line1 = br1.readLine(); 307 if ((skip & LineComparer.SKIP_SECOND) == 0) line2 = br2.readLine(); 308 if (line1 == null && line2 == null) return true; 309 if (line1 == null || line2 == null) { 310 // System.out.println("debug"); 311 } 312 skip = lineComparer.compare(line1, line2); 313 if (skip == LineComparer.LINES_DIFFERENT) { 314 break; 315 } 316 } 317 failureLines[0] = line1 != null ? line1 : "<end of file>"; 318 failureLines[1] = line2 != null ? line2 : "<end of file>"; 319 return false; 320 } 321 } 322 323 /* 324 * static String getLineWithoutFluff(BufferedReader br1, boolean first, int flags) throws IOException { 325 * while (true) { 326 * String line1 = br1.readLine(); 327 * if (line1 == null) return line1; 328 * if ((flags & TRIM)!= 0) line1 = line1.trim(); 329 * if ((flags & SKIP_EMPTY)!= 0 && line1.length() == 0) continue; 330 * return line1; 331 * } 332 * } 333 */ 334 335 public final static class StringIterator { 336 String string; 337 int position = 0; 338 next()339 char next() { 340 while (true) { 341 if (position >= string.length()) return '\uFFFF'; 342 char ch = string.charAt(position++); 343 if (ch != ' ' && ch != '\t') return ch; 344 } 345 } 346 reset()347 StringIterator reset() { 348 position = 0; 349 return this; 350 } 351 set(String string)352 StringIterator set(String string) { 353 this.string = string; 354 position = 0; 355 return this; 356 } 357 matches(StringIterator other)358 boolean matches(StringIterator other) { 359 while (true) { 360 char c1 = next(); 361 char c2 = other.next(); 362 if (c1 != c2) return false; 363 if (c1 == '\uFFFF') return true; 364 } 365 } 366 367 /** 368 * @return Returns the position. 369 */ getPosition()370 public int getPosition() { 371 return position; 372 } 373 } 374 splitArray(String source, char separator)375 public static String[] splitArray(String source, char separator) { 376 return splitArray(source, separator, false); 377 } 378 splitArray(String source, char separator, boolean trim)379 public static String[] splitArray(String source, char separator, boolean trim) { 380 List<String> piecesList = splitList(source, separator, trim); 381 String[] pieces = new String[piecesList.size()]; 382 piecesList.toArray(pieces); 383 return pieces; 384 } 385 splitCommaSeparated(String line)386 public static String[] splitCommaSeparated(String line) { 387 // items are separated by ',' 388 // each item is of the form abc... 389 // or "..." (required if a comma or quote is contained) 390 // " in a field is represented by "" 391 List<String> result = new ArrayList<>(); 392 StringBuilder item = new StringBuilder(); 393 boolean inQuote = false; 394 for (int i = 0; i < line.length(); ++i) { 395 char ch = line.charAt(i); // don't worry about supplementaries 396 switch (ch) { 397 case '"': 398 inQuote = !inQuote; 399 // at start or end, that's enough 400 // if get a quote when we are not in a quote, and not at start, then add it and return to inQuote 401 if (inQuote && item.length() != 0) { 402 item.append('"'); 403 inQuote = true; 404 } 405 break; 406 case ',': 407 if (!inQuote) { 408 result.add(item.toString()); 409 item.setLength(0); 410 } else { 411 item.append(ch); 412 } 413 break; 414 default: 415 item.append(ch); 416 break; 417 } 418 } 419 result.add(item.toString()); 420 return result.toArray(new String[result.size()]); 421 } 422 splitList(String source, char separator)423 public static List<String> splitList(String source, char separator) { 424 return splitList(source, separator, false, null); 425 } 426 splitList(String source, char separator, boolean trim)427 public static List<String> splitList(String source, char separator, boolean trim) { 428 return splitList(source, separator, trim, null); 429 } 430 splitList(String source, char separator, boolean trim, List<String> output)431 public static List<String> splitList(String source, char separator, boolean trim, List<String> output) { 432 return splitList(source, Character.toString(separator), trim, output); 433 } 434 splitList(String source, String separator)435 public static List<String> splitList(String source, String separator) { 436 return splitList(source, separator, false, null); 437 } 438 splitList(String source, String separator, boolean trim)439 public static List<String> splitList(String source, String separator, boolean trim) { 440 return splitList(source, separator, trim, null); 441 } 442 splitList(String source, String separator, boolean trim, List<String> output)443 public static List<String> splitList(String source, String separator, boolean trim, List<String> output) { 444 if (output == null) output = new ArrayList<>(); 445 if (source.length() == 0) return output; 446 int pos = 0; 447 do { 448 int npos = source.indexOf(separator, pos); 449 if (npos < 0) npos = source.length(); 450 String piece = source.substring(pos, npos); 451 if (trim) piece = piece.trim(); 452 output.add(piece); 453 pos = npos + 1; 454 } while (pos < source.length()); 455 return output; 456 } 457 458 /** 459 * Protect a collection (as much as Java lets us!) from modification. 460 * Really, really ugly code, since Java doesn't let us do better. 461 */ 462 @SuppressWarnings({ "rawtypes", "unchecked" }) protectCollection(T source)463 public static <T> T protectCollection(T source) { 464 // TODO - exclude UnmodifiableMap, Set, ... 465 if (source instanceof Map) { 466 Map<Object,Object> sourceMap = (Map) source; 467 ImmutableMap.Builder<Object,Object> builder = ImmutableMap.builder(); 468 for (Entry<Object,Object> entry : sourceMap.entrySet()) { 469 final Object key = entry.getKey(); 470 final Object value = entry.getValue(); 471 builder.put(protectCollection(key), protectCollection(value)); 472 } 473 return (T) builder.build(); 474 } else if (source instanceof Multimap) { 475 Multimap<Object,Object> sourceMap = (Multimap) source; 476 ImmutableMultimap.Builder<Object,Object> builder = ImmutableMultimap.builder(); 477 for (Entry<Object,Object> entry : sourceMap.entries()) { 478 builder.put(protectCollection(entry.getKey()), protectCollection(entry.getValue())); 479 } 480 return (T) builder.build(); 481 } else if (source instanceof Collection) { 482 // TODO use ImmutableSet, List, ... 483 Collection sourceCollection = (Collection) source; 484 Collection<Object> resultCollection = clone(sourceCollection); 485 if (resultCollection == null) return (T) sourceCollection; // failed 486 resultCollection.clear(); 487 488 for (Object item : sourceCollection) { 489 resultCollection.add(protectCollection(item)); 490 } 491 492 return sourceCollection instanceof List ? (T) Collections.unmodifiableList((List) sourceCollection) 493 : sourceCollection instanceof SortedSet ? (T) Collections 494 .unmodifiableSortedSet((SortedSet) sourceCollection) 495 : sourceCollection instanceof Set ? (T) Collections.unmodifiableSet((Set) sourceCollection) 496 : (T) Collections.unmodifiableCollection(sourceCollection); 497 } else if (source instanceof Freezable) { 498 Freezable freezableSource = (Freezable) source; 499 return (T) freezableSource.freeze(); 500 // if (freezableSource.isFrozen()) return source; 501 // return (T) ((Freezable) (freezableSource.cloneAsThawed())).freeze(); 502 } else { 503 return source; // can't protect 504 } 505 } 506 507 /** 508 * Protect a collections where we don't need to clone. 509 * @param source 510 * @return 511 */ 512 @SuppressWarnings({ "rawtypes", "unchecked" }) protectCollectionX(T source)513 public static <T> T protectCollectionX(T source) { 514 // TODO - exclude UnmodifiableMap, Set, ... 515 if (isImmutable(source)) { 516 return source; 517 } 518 if (source instanceof Map) { 519 Map sourceMap = (Map) source; 520 // recurse 521 LinkedHashMap tempMap = new LinkedHashMap<>(sourceMap); // copy contents 522 sourceMap.clear(); 523 for (Object key : tempMap.keySet()) { 524 sourceMap.put(protectCollection(key), protectCollectionX(tempMap.get(key))); 525 } 526 return sourceMap instanceof SortedMap ? (T) Collections.unmodifiableSortedMap((SortedMap) sourceMap) 527 : (T) Collections.unmodifiableMap(sourceMap); 528 } else if (source instanceof Collection) { 529 Collection sourceCollection = (Collection) source; 530 LinkedHashSet tempSet = new LinkedHashSet<>(sourceCollection); // copy contents 531 532 sourceCollection.clear(); 533 for (Object item : tempSet) { 534 sourceCollection.add(protectCollectionX(item)); 535 } 536 537 return sourceCollection instanceof List ? (T) Collections.unmodifiableList((List) sourceCollection) 538 : sourceCollection instanceof SortedSet ? (T) Collections 539 .unmodifiableSortedSet((SortedSet) sourceCollection) 540 : sourceCollection instanceof Set ? (T) Collections.unmodifiableSet((Set) sourceCollection) 541 : (T) Collections.unmodifiableCollection(sourceCollection); 542 } else if (source instanceof Freezable) { 543 Freezable freezableSource = (Freezable) source; 544 return (T) freezableSource.freeze(); 545 } else { 546 throw new IllegalArgumentException("Can’t protect: " + source.getClass().toString()); 547 } 548 } 549 550 private static final Set<Object> KNOWN_IMMUTABLES = new HashSet<>(Arrays.asList( 551 String.class)); 552 isImmutable(Object source)553 public static boolean isImmutable(Object source) { 554 return source == null 555 || source instanceof Enum 556 || source instanceof Number 557 || KNOWN_IMMUTABLES.contains(source.getClass()); 558 } 559 560 /** 561 * Clones T if we can; otherwise returns null. 562 * 563 * @param <T> 564 * @param source 565 * @return 566 */ 567 @SuppressWarnings("unchecked") clone(T source)568 private static <T> T clone(T source) { 569 final Class<? extends Object> class1 = source.getClass(); 570 try { 571 final Method declaredMethod = class1.getDeclaredMethod("clone", (Class<?>) null); 572 return (T) declaredMethod.invoke(source, (Object) null); 573 } catch (Exception e) { 574 } 575 try { 576 final Constructor<? extends Object> declaredMethod = class1.getConstructor((Class<?>) null); 577 return (T) declaredMethod.newInstance((Object) null); 578 } catch (Exception e) { 579 } 580 return null; // uncloneable 581 } 582 583 /** 584 * Appends two strings, inserting separator if either is empty 585 */ joinWithSeparation(String a, String separator, String b)586 public static String joinWithSeparation(String a, String separator, String b) { 587 if (a.length() == 0) return b; 588 if (b.length() == 0) return a; 589 return a + separator + b; 590 } 591 592 /** 593 * Appends two strings, inserting separator if either is empty. Modifies first map 594 */ joinWithSeparation(Map<String, String> a, String separator, Map<String, String> b)595 public static Map<String, String> joinWithSeparation(Map<String, String> a, String separator, Map<String, String> b) { 596 for (Iterator<String> it = b.keySet().iterator(); it.hasNext();) { 597 String key = it.next(); 598 String bvalue = b.get(key); 599 String avalue = a.get(key); 600 if (avalue != null) { 601 if (avalue.trim().equals(bvalue.trim())) continue; 602 bvalue = joinWithSeparation(avalue, separator, bvalue); 603 } 604 a.put(key, bvalue); 605 } 606 return a; 607 } 608 join(Collection<T> c, String separator)609 public static <T> String join(Collection<T> c, String separator) { 610 return join(c, separator, null); 611 } 612 join(Object[] c, String separator)613 public static String join(Object[] c, String separator) { 614 return join(c, separator, null); 615 } 616 join(Collection<T> c, String separator, Transform<T, String> transform)617 public static <T> String join(Collection<T> c, String separator, Transform<T, String> transform) { 618 StringBuffer output = new StringBuffer(); 619 boolean isFirst = true; 620 for (T item : c) { 621 if (isFirst) { 622 isFirst = false; 623 } else { 624 output.append(separator); 625 } 626 output.append(transform != null ? transform.transform(item) : item); 627 } 628 return output.toString(); 629 } 630 join(T[] c, String separator, Transform<T, String> transform)631 public static <T> String join(T[] c, String separator, Transform<T, String> transform) { 632 return join(Arrays.asList(c), separator, transform); 633 } 634 635 /** 636 * Utility like Arrays.asList() 637 */ 638 @SuppressWarnings("unchecked") asMap(Object[][] source, Map<K, V> target, boolean reverse)639 public static <K, V> Map<K, V> asMap(Object[][] source, Map<K, V> target, boolean reverse) { 640 int from = 0, to = 1; 641 if (reverse) { 642 from = 1; 643 to = 0; 644 } 645 for (int i = 0; i < source.length; ++i) { 646 if (source[i].length != 2) { 647 throw new IllegalArgumentException("Source must be array of pairs of strings: " 648 + Arrays.asList(source[i])); 649 } 650 target.put((K) source[i][from], (V) source[i][to]); 651 } 652 return target; 653 } 654 asMap(Object[][] source)655 public static <K, V> Map<K, V> asMap(Object[][] source) { 656 return asMap(source, new HashMap<K, V>(), false); 657 } 658 659 /** 660 * Returns the canonical name for a file. 661 */ getCanonicalName(String file)662 public static String getCanonicalName(String file) { 663 try { 664 return PathUtilities.getNormalizedPathString(file); 665 } catch (Exception e) { 666 return file; 667 } 668 } 669 670 /** 671 * Convert a UnicodeSet into a string that can be embedded into a Regex. Handles strings that are in the UnicodeSet, 672 * Supplementary ranges, and escaping 673 * 674 * @param source 675 * The source set 676 * @return 677 */ toRegex(UnicodeSet source)678 public static String toRegex(UnicodeSet source) { 679 return toRegex(source, null, false); 680 } 681 682 private static final Transliterator DEFAULT_REGEX_ESCAPER = Transliterator.createFromRules( 683 "foo", 684 "([ \\- \\\\ \\[ \\] ]) > '\\' $1 ;" 685 // + " ([:c:]) > &hex($1);" 686 + " ([[:control:][[:z:]&[:ascii:]]]) > &hex($1);", 687 Transliterator.FORWARD); 688 689 /** 690 * Convert a UnicodeSet into a string that can be embedded into a Regex. 691 * Handles strings that are in the UnicodeSet, Supplementary ranges, and 692 * escaping 693 * 694 * @param source 695 * The source set 696 * @param escaper 697 * A transliterator that is used to escape the characters according 698 * to the requirements of the regex. The default puts a \\ before [, -, 699 * \, and ], and converts controls and Ascii whitespace to hex. 700 * Alternatives can be supplied. Note that some Regex engines, 701 * including Java 1.5, don't really deal with escaped supplementaries 702 * well. 703 * @param onlyBmp 704 * Set to true if the Regex only accepts BMP characters. In that 705 * case, ranges of supplementary characters are converted to lists of 706 * ranges. For example, [\uFFF0-\U0010000F \U0010100F-\U0010300F] 707 * converts into: 708 * 709 * <pre> 710 * [\uD800][\uDC00-\uDFFF] 711 * [\uD801-\uDBBF][\uDC00-\uDFFF] 712 * [\uDBC0][\uDC00-\uDC0F] 713 * </pre> 714 * 715 * and 716 * 717 * <pre> 718 * [\uDBC4][\uDC0F-\uDFFF] 719 * [\uDBC5-\uDBCB][\uDC00-\uDFFF] 720 * [\uDBCC][\uDC00-\uDC0F] 721 * </pre> 722 * 723 * These are then coalesced into a list of alternatives by sharing 724 * parts where feasible. For example, the above turns into 3 pairs of ranges: 725 * 726 * <pre> 727 * [\uDBC0\uDBCC][\uDC00-\uDC0F]|\uDBC4[\uDC0F-\uDFFF]|[\uD800-\uDBBF\uDBC5-\uDBCB][\uDC00-\uDFFF] 728 * </pre> 729 * 730 * @return escaped string. Something like [a-z] or (?:[a-m]|{zh}) if there is 731 * a string zh in the set, or a more complicated case for 732 * supplementaries. <br> 733 * Special cases: [] returns "", single item returns a string 734 * (escaped), like [a] => "a", or [{abc}] => "abc"<br> 735 * Supplementaries are handled specially, as described under onlyBmp. 736 */ toRegex(UnicodeSet source, Transliterator escaper, boolean onlyBmp)737 public static String toRegex(UnicodeSet source, Transliterator escaper, boolean onlyBmp) { 738 if (escaper == null) { 739 escaper = DEFAULT_REGEX_ESCAPER; 740 } 741 UnicodeSetIterator it = new UnicodeSetIterator(source); 742 // if there is only one item, return it 743 if (source.size() == 0) { 744 return ""; 745 } 746 if (source.size() == 1) { 747 it.next(); 748 return escaper.transliterate(it.getString()); 749 } 750 // otherwise, we figure out what is in the set, and will return 751 StringBuilder base = new StringBuilder("["); 752 StringBuilder alternates = new StringBuilder(); 753 Map<UnicodeSet, UnicodeSet> lastToFirst = new TreeMap<>(new UnicodeSetComparator()); 754 int alternateCount = 0; 755 while (it.nextRange()) { 756 if (it.codepoint == UnicodeSetIterator.IS_STRING) { 757 ++alternateCount; 758 alternates.append('|').append(escaper.transliterate(it.string)); 759 } else if (!onlyBmp || it.codepointEnd <= 0xFFFF) { // BMP 760 addBmpRange(it.codepoint, it.codepointEnd, escaper, base); 761 } else { // supplementary 762 if (it.codepoint <= 0xFFFF) { 763 addBmpRange(it.codepoint, 0xFFFF, escaper, base); 764 it.codepoint = 0x10000; // reset the range 765 } 766 // this gets a bit ugly; we are trying to minimize the extra ranges for supplementaries 767 // we do this by breaking up X-Y based on the Lead and Trail values for X and Y 768 // Lx [Tx - Ty]) (if Lx == Ly) 769 // Lx [Tx - DFFF] | Ly [DC00-Ty] (if Lx == Ly - 1) 770 // Lx [Tx - DFFF] | [Lx+1 - Ly-1][DC00-DFFF] | Ly [DC00-Ty] (otherwise) 771 int leadX = UTF16.getLeadSurrogate(it.codepoint); 772 int trailX = UTF16.getTrailSurrogate(it.codepoint); 773 int leadY = UTF16.getLeadSurrogate(it.codepointEnd); 774 int trailY = UTF16.getTrailSurrogate(it.codepointEnd); 775 if (leadX == leadY) { 776 addSupplementalRange(leadX, leadX, trailX, trailY, escaper, lastToFirst); 777 } else { 778 addSupplementalRange(leadX, leadX, trailX, 0xDFFF, escaper, lastToFirst); 779 if (leadX != leadY - 1) { 780 addSupplementalRange(leadX + 1, leadY - 1, 0xDC00, 0xDFFF, escaper, lastToFirst); 781 } 782 addSupplementalRange(leadY, leadY, 0xDC00, trailY, escaper, lastToFirst); 783 } 784 } 785 } 786 // add in the supplementary ranges 787 if (lastToFirst.size() != 0) { 788 for (UnicodeSet last : lastToFirst.keySet()) { 789 ++alternateCount; 790 alternates.append('|').append(toRegex(lastToFirst.get(last), escaper, onlyBmp)) 791 .append(toRegex(last, escaper, onlyBmp)); 792 } 793 } 794 // Return the output. We separate cases in order to get the minimal extra apparatus 795 base.append("]"); 796 if (alternateCount == 0) { 797 return base.toString(); 798 } else if (base.length() > 2) { 799 return "(?:" + base + "|" + alternates.substring(1) + ")"; 800 } else if (alternateCount == 1) { 801 return alternates.substring(1); 802 } else { 803 return "(?:" + alternates.substring(1) + ")"; 804 } 805 } 806 addSupplementalRange(int leadX, int leadY, int trailX, int trailY, Transliterator escaper, Map<UnicodeSet, UnicodeSet> lastToFirst)807 private static void addSupplementalRange(int leadX, int leadY, int trailX, int trailY, Transliterator escaper, 808 Map<UnicodeSet, UnicodeSet> lastToFirst) { 809 System.out.println("\tadding: " + new UnicodeSet(leadX, leadY) + "\t" + new UnicodeSet(trailX, trailY)); 810 UnicodeSet last = new UnicodeSet(trailX, trailY); 811 UnicodeSet first = lastToFirst.get(last); 812 if (first == null) { 813 lastToFirst.put(last, first = new UnicodeSet()); 814 } 815 first.add(leadX, leadY); 816 } 817 addBmpRange(int start, int limit, Transliterator escaper, StringBuilder base)818 private static void addBmpRange(int start, int limit, Transliterator escaper, StringBuilder base) { 819 base.append(escaper.transliterate(UTF16.valueOf(start))); 820 if (start != limit) { 821 base.append("-").append(escaper.transliterate(UTF16.valueOf(limit))); 822 } 823 } 824 825 public static class UnicodeSetComparator implements Comparator<UnicodeSet> { 826 @Override compare(UnicodeSet o1, UnicodeSet o2)827 public int compare(UnicodeSet o1, UnicodeSet o2) { 828 return o1.compareTo(o2); 829 } 830 } 831 832 public static class CollectionComparator<T extends Comparable<T>> implements Comparator<Collection<T>> { 833 @Override compare(Collection<T> o1, Collection<T> o2)834 public int compare(Collection<T> o1, Collection<T> o2) { 835 return UnicodeSet.compare(o1, o2, UnicodeSet.ComparisonStyle.SHORTER_FIRST); 836 } 837 } 838 839 public static class ComparableComparator<T extends Comparable<T>> implements Comparator<T> { 840 @Override compare(T arg0, T arg1)841 public int compare(T arg0, T arg1) { 842 return Utility.checkCompare(arg0, arg1); 843 } 844 } 845 846 @SuppressWarnings({ "rawtypes", "unchecked" }) addTreeMapChain(Map coverageData, Object... objects)847 public static void addTreeMapChain(Map coverageData, Object... objects) { 848 Map<Object, Object> base = coverageData; 849 for (int i = 0; i < objects.length - 2; ++i) { 850 Map<Object, Object> nextOne = (Map<Object, Object>) base.get(objects[i]); 851 if (nextOne == null) base.put(objects[i], nextOne = new TreeMap<>()); 852 base = nextOne; 853 } 854 base.put(objects[objects.length - 2], objects[objects.length - 1]); 855 } 856 857 public static abstract class CollectionTransform<S, T> implements Transform<S, T> { 858 @Override transform(S source)859 public abstract T transform(S source); 860 transform(Collection<S> input, Collection<T> output)861 public Collection<T> transform(Collection<S> input, Collection<T> output) { 862 return CldrUtility.transform(input, this, output); 863 } 864 transform(Collection<S> input)865 public Collection<T> transform(Collection<S> input) { 866 return transform(input, new ArrayList<T>()); 867 } 868 } 869 transform(SC source, Transform<S, T> transform, TC target)870 public static <S, T, SC extends Collection<S>, TC extends Collection<T>> TC transform(SC source, Transform<S, T> transform, TC target) { 871 for (S sourceItem : source) { 872 T targetItem = transform.transform(sourceItem); 873 if (targetItem != null) { 874 target.add(targetItem); 875 } 876 } 877 return target; 878 } 879 transform( SM source, Transform<SK, TK> transformKey, Transform<SV, TV> transformValue, TM target)880 public static <SK, SV, TK, TV, SM extends Map<SK, SV>, TM extends Map<TK, TV>> TM transform( 881 SM source, Transform<SK, TK> transformKey, Transform<SV, TV> transformValue, TM target) { 882 for (Entry<SK, SV> sourceEntry : source.entrySet()) { 883 TK targetKey = transformKey.transform(sourceEntry.getKey()); 884 TV targetValue = transformValue.transform(sourceEntry.getValue()); 885 if (targetKey != null && targetValue != null) { 886 target.put(targetKey, targetValue); 887 } 888 } 889 return target; 890 } 891 892 public static abstract class Apply<T> { apply(T item)893 public abstract void apply(T item); 894 applyTo(U collection)895 public <U extends Collection<T>> void applyTo(U collection) { 896 for (T item : collection) { 897 apply(item); 898 } 899 } 900 } 901 902 public static abstract class Filter<T> { 903 contains(T item)904 public abstract boolean contains(T item); 905 retainAll(U c)906 public <U extends Collection<T>> U retainAll(U c) { 907 for (Iterator<T> it = c.iterator(); it.hasNext();) { 908 if (!contains(it.next())) it.remove(); 909 } 910 return c; 911 } 912 extractMatches(U c, U target)913 public <U extends Collection<T>> U extractMatches(U c, U target) { 914 for (Iterator<T> it = c.iterator(); it.hasNext();) { 915 T item = it.next(); 916 if (contains(item)) { 917 target.add(item); 918 } 919 } 920 return target; 921 } 922 removeAll(U c)923 public <U extends Collection<T>> U removeAll(U c) { 924 for (Iterator<T> it = c.iterator(); it.hasNext();) { 925 if (contains(it.next())) it.remove(); 926 } 927 return c; 928 } 929 extractNonMatches(U c, U target)930 public <U extends Collection<T>> U extractNonMatches(U c, U target) { 931 for (Iterator<T> it = c.iterator(); it.hasNext();) { 932 T item = it.next(); 933 if (!contains(item)) { 934 target.add(item); 935 } 936 } 937 return target; 938 } 939 } 940 941 public static class MatcherFilter<T> extends Filter<T> { 942 private Matcher matcher; 943 MatcherFilter(String pattern)944 public MatcherFilter(String pattern) { 945 this.matcher = PatternCache.get(pattern).matcher(""); 946 } 947 MatcherFilter(Matcher matcher)948 public MatcherFilter(Matcher matcher) { 949 this.matcher = matcher; 950 } 951 set(Matcher matcher)952 public MatcherFilter<T> set(Matcher matcher) { 953 this.matcher = matcher; 954 return this; 955 } 956 set(String pattern)957 public MatcherFilter<T> set(String pattern) { 958 this.matcher = PatternCache.get(pattern).matcher(""); 959 return this; 960 } 961 962 @Override contains(T o)963 public boolean contains(T o) { 964 return matcher.reset(o.toString()).matches(); 965 } 966 } 967 968 // static final class HandlingTransform implements Transform<String, Handling> { 969 // @Override 970 // public Handling transform(String source) { 971 // return Handling.valueOf(source); 972 // } 973 // } 974 975 public static final class PairComparator<K extends Comparable<K>, V extends Comparable<V>> implements java.util.Comparator<Pair<K, V>> { 976 977 private Comparator<K> comp1; 978 private Comparator<V> comp2; 979 PairComparator(Comparator<K> comp1, Comparator<V> comp2)980 public PairComparator(Comparator<K> comp1, Comparator<V> comp2) { 981 this.comp1 = comp1; 982 this.comp2 = comp2; 983 } 984 985 @Override compare(Pair<K, V> o1, Pair<K, V> o2)986 public int compare(Pair<K, V> o1, Pair<K, V> o2) { 987 { 988 K o1First = o1.getFirst(); 989 K o2First = o2.getFirst(); 990 int diff = o1First == null ? (o2First == null ? 0 : -1) 991 : o2First == null ? 1 992 : comp1 == null ? o1First.compareTo(o2First) 993 : comp1.compare(o1First, o2First); 994 if (diff != 0) { 995 return diff; 996 } 997 } 998 V o1Second = o1.getSecond(); 999 V o2Second = o2.getSecond(); 1000 return o1Second == null ? (o2Second == null ? 0 : -1) 1001 : o2Second == null ? 1 1002 : comp2 == null ? o1Second.compareTo(o2Second) 1003 : comp2.compare(o1Second, o2Second); 1004 } 1005 1006 } 1007 1008 /** 1009 * Fetch data from jar 1010 * 1011 * @param name 1012 * a name residing in the org/unicode/cldr/util/data/ directory, or loading from a jar will break. 1013 */ getUTF8Data(String name)1014 public static BufferedReader getUTF8Data(String name) { 1015 if (new File(name).isAbsolute()) { 1016 throw new IllegalArgumentException( 1017 "Path must be relative to org/unicode/cldr/util/data such as 'file.txt' or 'casing/file.txt', but got '" 1018 + name + "'."); 1019 } 1020 return FileReaders.openFile(CldrUtility.class, "data/" + name); 1021 } 1022 1023 /** 1024 * Fetch data from jar 1025 * 1026 * @param name 1027 * a name residing in the org/unicode/cldr/util/data/ directory, or loading from a jar will break. 1028 */ getInputStream(String name)1029 public static InputStream getInputStream(String name) { 1030 if (new File(name).isAbsolute()) { 1031 throw new IllegalArgumentException( 1032 "Path must be relative to org/unicode/cldr/util/data such as 'file.txt' or 'casing/file.txt', but got '" 1033 + name + "'."); 1034 } 1035 return getInputStream(CldrUtility.class, "data/" + name); 1036 } 1037 getInputStream(Class<?> callingClass, String relativePath)1038 public static InputStream getInputStream(Class<?> callingClass, String relativePath) { 1039 InputStream is = callingClass.getResourceAsStream(relativePath); 1040 // add buffering 1041 return InputStreamFactory.buffer(is); 1042 } 1043 1044 /** 1045 * Takes a Map that goes from Object to Set, and fills in the transpose 1046 * 1047 * @param source_key_valueSet 1048 * @param output_value_key 1049 */ putAllTransposed(Map<Object, Set<Object>> source_key_valueSet, Map<Object, Object> output_value_key)1050 public static void putAllTransposed(Map<Object, Set<Object>> source_key_valueSet, Map<Object, Object> output_value_key) { 1051 for (Iterator<Object> it = source_key_valueSet.keySet().iterator(); it.hasNext();) { 1052 Object key = it.next(); 1053 Set<Object> values = source_key_valueSet.get(key); 1054 for (Iterator<Object> it2 = values.iterator(); it2.hasNext();) { 1055 Object value = it2.next(); 1056 output_value_key.put(value, key); 1057 } 1058 } 1059 } 1060 countInstances(String source, String substring)1061 public static int countInstances(String source, String substring) { 1062 int count = 0; 1063 int pos = 0; 1064 while (true) { 1065 pos = source.indexOf(substring, pos) + 1; 1066 if (pos <= 0) break; 1067 count++; 1068 } 1069 return count; 1070 } 1071 registerTransliteratorFromFile(String id, String dir, String filename)1072 public static void registerTransliteratorFromFile(String id, String dir, String filename) { 1073 registerTransliteratorFromFile(id, dir, filename, Transliterator.FORWARD, true); 1074 registerTransliteratorFromFile(id, dir, filename, Transliterator.REVERSE, true); 1075 } 1076 registerTransliteratorFromFile(String id, String dir, String filename, int direction, boolean reverseID)1077 public static void registerTransliteratorFromFile(String id, String dir, String filename, int direction, 1078 boolean reverseID) { 1079 if (filename == null) { 1080 filename = id.replace('-', '_'); 1081 filename = filename.replace('/', '_'); 1082 filename += ".txt"; 1083 } 1084 String rules = getText(dir, filename); 1085 Transliterator t; 1086 int pos = id.indexOf('-'); 1087 String rid; 1088 if (pos < 0) { 1089 rid = id + "-Any"; 1090 id = "Any-" + id; 1091 } else { 1092 rid = id.substring(pos + 1) + "-" + id.substring(0, pos); 1093 } 1094 if (!reverseID) rid = id; 1095 1096 if (direction == Transliterator.FORWARD) { 1097 Transliterator.unregister(id); 1098 t = Transliterator.createFromRules(id, rules, Transliterator.FORWARD); 1099 Transliterator.registerInstance(t); 1100 System.out.println("Registered new Transliterator: " + id); 1101 } 1102 1103 /* 1104 * String test = "\u049A\u0430\u0437\u0430\u049B"; 1105 * System.out.println(t.transliterate(test)); 1106 * t = Transliterator.getInstance(id); 1107 * System.out.println(t.transliterate(test)); 1108 */ 1109 1110 if (direction == Transliterator.REVERSE) { 1111 Transliterator.unregister(rid); 1112 t = Transliterator.createFromRules(rid, rules, Transliterator.REVERSE); 1113 Transliterator.registerInstance(t); 1114 System.out.println("Registered new Transliterator: " + rid); 1115 } 1116 } 1117 getText(String dir, String filename)1118 public static String getText(String dir, String filename) { 1119 try { 1120 BufferedReader br = FileUtilities.openUTF8Reader(dir, filename); 1121 StringBuffer buffer = new StringBuffer(); 1122 while (true) { 1123 String line = br.readLine(); 1124 if (line == null) break; 1125 if (line.length() > 0 && line.charAt(0) == '\uFEFF') line = line.substring(1); 1126 if (line.startsWith("//")) continue; 1127 buffer.append(line).append(CldrUtility.LINE_SEPARATOR); 1128 } 1129 br.close(); 1130 String rules = buffer.toString(); 1131 return rules; 1132 } catch (IOException e) { 1133 throw (IllegalArgumentException) new IllegalArgumentException("Can't open " + dir + ", " + filename) 1134 .initCause(e); 1135 } 1136 } 1137 callMethod(String methodNames, Class<?> cls)1138 public static void callMethod(String methodNames, Class<?> cls) { 1139 for (String methodName : methodNames.split(",")) { 1140 try { 1141 Method method; 1142 try { 1143 method = cls.getMethod(methodName, (Class[]) null); 1144 try { 1145 method.invoke(null, (Object[]) null); 1146 } catch (Exception e) { 1147 e.printStackTrace(); 1148 } 1149 } catch (Exception e) { 1150 System.out.println("No such method: " + methodName); 1151 showMethods(cls); 1152 } 1153 } catch (ClassNotFoundException e) { 1154 e.printStackTrace(); 1155 } 1156 } 1157 } 1158 showMethods(Class<?> cls)1159 public static void showMethods(Class<?> cls) throws ClassNotFoundException { 1160 System.out.println("Possible methods of " + cls.getCanonicalName() + " are: "); 1161 Method[] methods = cls.getMethods(); 1162 Set<String> names = new TreeSet<>(); 1163 for (int i = 0; i < methods.length; ++i) { 1164 if (methods[i].getGenericParameterTypes().length != 0) continue; 1165 //int mods = methods[i].getModifiers(); 1166 // if (!Modifier.isStatic(mods)) continue; 1167 String name = methods[i].getName(); 1168 names.add(name); 1169 } 1170 for (Iterator<String> it = names.iterator(); it.hasNext();) { 1171 System.out.println("\t" + it.next()); 1172 } 1173 } 1174 1175 /** 1176 * Breaks lines if they are too long, or if matcher.group(1) != last. Only breaks just before matcher. 1177 * 1178 * @param input 1179 * @param separator 1180 * @param matcher 1181 * must match each possible item. The first group is significant; if different, will cause break 1182 * @return 1183 */ breakLines(CharSequence input, String separator, Matcher matcher, int width)1184 static public String breakLines(CharSequence input, String separator, Matcher matcher, int width) { 1185 StringBuffer output = new StringBuffer(); 1186 String lastPrefix = ""; 1187 int lastEnd = 0; 1188 int lastBreakPos = 0; 1189 matcher.reset(input); 1190 while (true) { 1191 boolean match = matcher.find(); 1192 if (!match) { 1193 output.append(input.subSequence(lastEnd, input.length())); 1194 break; 1195 } 1196 String prefix = matcher.group(1); 1197 if (!prefix.equalsIgnoreCase(lastPrefix) || matcher.end() - lastBreakPos > width) { // break before? 1198 output.append(separator); 1199 lastBreakPos = lastEnd; 1200 } else if (lastEnd != 0) { 1201 output.append(' '); 1202 } 1203 output.append(input.subSequence(lastEnd, matcher.end()).toString().trim()); 1204 lastEnd = matcher.end(); 1205 lastPrefix = prefix; 1206 } 1207 return output.toString(); 1208 } 1209 showOptions(String[] args)1210 public static void showOptions(String[] args) { 1211 // Properties props = System.getProperties(); 1212 System.out.println("Arguments: " + join(args, " ")); // + (props == null ? "" : " " + props)); 1213 } 1214 roundToDecimals(double input, int places)1215 public static double roundToDecimals(double input, int places) { 1216 double log10 = Math.log10(input); // 15000 => 4.xxx 1217 double intLog10 = Math.floor(log10); 1218 double scale = Math.pow(10, intLog10 - places + 1); 1219 double factored = Math.round(input / scale) * scale; 1220 // System.out.println("###\t" +input + "\t" + factored); 1221 return factored; 1222 } 1223 1224 /** 1225 * Get a property value, returning the value if there is one (eg -Dkey=value), 1226 * otherwise the default value (for either empty or null). 1227 * 1228 * @param key 1229 * @param defaultValue 1230 * @return 1231 */ getProperty(String key, String defaultValue)1232 public static String getProperty(String key, String defaultValue) { 1233 return getProperty(key, defaultValue, defaultValue); 1234 } 1235 1236 /** 1237 * Get a property value, returning the value if there is one, otherwise null. 1238 */ getProperty(String key)1239 public static String getProperty(String key) { 1240 return getProperty(key, null, null); 1241 } 1242 1243 /** 1244 * Get a property value, returning the value if there is one (eg -Dkey=value), 1245 * the valueIfEmpty if there is one with no value (eg -Dkey) and the valueIfNull 1246 * if there is no property. 1247 * 1248 * @param key 1249 * @param valueIfNull 1250 * @param valueIfEmpty 1251 * @return 1252 */ getProperty(String key, String valueIfNull, String valueIfEmpty)1253 public static String getProperty(String key, String valueIfNull, String valueIfEmpty) { 1254 String result = CLDRConfig.getInstance().getProperty(key); 1255 if (result == null) { 1256 result = valueIfNull; 1257 } else if (result.length() == 0) { 1258 result = valueIfEmpty; 1259 } 1260 return result; 1261 } 1262 hex(byte[] bytes, int start, int end, String separator)1263 public static String hex(byte[] bytes, int start, int end, String separator) { 1264 StringBuilder result = new StringBuilder(); 1265 for (int i = 0; i < end; ++i) { 1266 if (result.length() != 0) { 1267 result.append(separator); 1268 } 1269 result.append(Utility.hex(bytes[i] & 0xFF, 2)); 1270 } 1271 return result.toString(); 1272 } 1273 getProperty(String string, boolean b)1274 public static boolean getProperty(String string, boolean b) { 1275 return getProperty(string, b ? "true" : "false", "true").matches("(?i)T|TRUE"); 1276 } 1277 checkValidDirectory(String sourceDirectory)1278 public static String checkValidDirectory(String sourceDirectory) { 1279 return checkValidFile(sourceDirectory, true, null); 1280 } 1281 checkValidDirectory(String sourceDirectory, String correction)1282 public static String checkValidDirectory(String sourceDirectory, String correction) { 1283 return checkValidFile(sourceDirectory, true, correction); 1284 } 1285 checkValidFile(String sourceDirectory, boolean checkForDirectory, String correction)1286 public static String checkValidFile(String sourceDirectory, boolean checkForDirectory, String correction) { 1287 File file = null; 1288 String normalizedPath = null; 1289 try { 1290 file = new File(sourceDirectory); 1291 normalizedPath = PathUtilities.getNormalizedPathString(file) + File.separatorChar; 1292 } catch (Exception e) { 1293 } 1294 if (file == null || normalizedPath == null || checkForDirectory && !file.isDirectory()) { 1295 throw new RuntimeException("Directory not found: " + sourceDirectory 1296 + (normalizedPath == null ? "" : " => " + normalizedPath) 1297 + (correction == null ? "" : CldrUtility.LINE_SEPARATOR + correction)); 1298 } 1299 return normalizedPath; 1300 } 1301 1302 /** 1303 * Copy up to matching line (not included). If output is null, then just skip until. 1304 * 1305 * @param oldFile 1306 * file to copy 1307 * @param readUntilPattern 1308 * pattern to search for. If null, goes to end of file. 1309 * @param output 1310 * into to copy into. If null, just skips in the input. 1311 * @param includeMatchingLine 1312 * inclde the matching line when copying. 1313 * @throws IOException 1314 */ copyUpTo(BufferedReader oldFile, final Pattern readUntilPattern, final PrintWriter output, boolean includeMatchingLine)1315 public static void copyUpTo(BufferedReader oldFile, final Pattern readUntilPattern, 1316 final PrintWriter output, boolean includeMatchingLine) throws IOException { 1317 Matcher readUntil = readUntilPattern == null ? null : readUntilPattern.matcher(""); 1318 while (true) { 1319 String line = oldFile.readLine(); 1320 if (line == null) { 1321 break; 1322 } 1323 if (line.startsWith("\uFEFF")) { 1324 line = line.substring(1); 1325 } 1326 if (readUntil != null && readUntil.reset(line).matches()) { 1327 if (includeMatchingLine && output != null) { 1328 output.println(line); 1329 } 1330 break; 1331 } 1332 if (output != null) { 1333 output.println(line); 1334 } 1335 } 1336 } 1337 1338 private static DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss 'GMT'"); 1339 private static DateFormat DATE_ONLY = new SimpleDateFormat("yyyy-MM-dd"); 1340 static { 1341 df.setTimeZone(TimeZone.getTimeZone("GMT")); 1342 DATE_ONLY.setTimeZone(TimeZone.getTimeZone("GMT")); 1343 } 1344 isoFormat(Date date)1345 public static String isoFormat(Date date) { 1346 synchronized (df) { 1347 return df.format(date); 1348 } 1349 } 1350 isoFormatDateOnly(Date date)1351 public static String isoFormatDateOnly(Date date) { 1352 synchronized (DATE_ONLY) { 1353 return DATE_ONLY.format(date); 1354 } 1355 } 1356 newConcurrentHashMap()1357 public static <K, V> ConcurrentHashMap<K, V> newConcurrentHashMap() { 1358 // http://ria101.wordpress.com/2011/12/12/concurrenthashmap-avoid-a-common-misuse/ 1359 return new ConcurrentHashMap<>(4, 0.9f, 1); 1360 } 1361 newConcurrentHashMap(Map<K, V> source)1362 public static <K, V> ConcurrentHashMap<K, V> newConcurrentHashMap(Map<K, V> source) { 1363 ConcurrentHashMap<K, V> result = newConcurrentHashMap(); 1364 result.putAll(source); 1365 return result; 1366 } 1367 equals(Object a, Object b)1368 public static boolean equals(Object a, Object b) { 1369 return a == b ? true 1370 : a == null || b == null ? false 1371 : a.equals(b); 1372 } 1373 getDoubleLink(String code)1374 public static String getDoubleLink(String code) { 1375 final String anchorSafe = TransliteratorUtilities.toHTML.transliterate(code).replace(" ", "_"); 1376 return "<a name='" + anchorSafe + "' href='#" + anchorSafe + "'>"; 1377 } 1378 getDoubleLinkedText(String anchor, String anchorText)1379 public static String getDoubleLinkedText(String anchor, String anchorText) { 1380 return getDoubleLink(anchor) + TransliteratorUtilities.toHTML.transliterate(anchorText).replace("_", " ") 1381 + "</a>"; 1382 } 1383 getDoubleLinkedText(String anchor)1384 public static String getDoubleLinkedText(String anchor) { 1385 return getDoubleLinkedText(anchor, anchor); 1386 } 1387 getDoubleLinkMsg()1388 public static String getDoubleLinkMsg() { 1389 return "<a name=''{0}'' href=''#{0}''>{0}</a>"; 1390 } 1391 getDoubleLinkMsg2()1392 public static String getDoubleLinkMsg2() { 1393 return "<a name=''{0}{1}'' href=''#{0}{1}''>{0}</a>"; 1394 } 1395 getCopyrightString()1396 public static String getCopyrightString() { 1397 return getCopyrightString(""); 1398 } 1399 1400 private static final class CopyrightHelper { 1401 public static final CopyrightHelper INSTANCE = new CopyrightHelper(); 1402 public final String COPYRIGHT_SHORT = 1403 "Copyright \u00A9 1991-" + Calendar.getInstance().get(Calendar.YEAR) + " Unicode, Inc."; 1404 } 1405 getCopyrightString(String linePrefix)1406 public static String getCopyrightString(String linePrefix) { 1407 // now do the rest 1408 return linePrefix + getCopyrightShort() + CldrUtility.LINE_SEPARATOR 1409 + linePrefix + "For terms of use, see http://www.unicode.org/copyright.html" + CldrUtility.LINE_SEPARATOR 1410 + linePrefix + CLDRURLS.UNICODE_SPDX_HEADER + CldrUtility.LINE_SEPARATOR 1411 + linePrefix + "CLDR data files are interpreted according to the LDML specification " + "(http://unicode.org/reports/tr35/)"; 1412 } 1413 1414 /** 1415 * Returns the '## License' section in markdown. 1416 */ getCopyrightMarkdown()1417 public static String getCopyrightMarkdown() { 1418 return "## License\n" + 1419 "\n" + 1420 getCopyrightShort() + "\n" + 1421 "[Terms of Use](http://www.unicode.org/copyright.html)\n\n" + 1422 CLDRURLS.UNICODE_SPDX_HEADER + "\n"; 1423 } 1424 1425 /** 1426 * Get the short copyright string, "Copyright © YYYY-YYYY Unicode, Inc." 1427 */ getCopyrightShort()1428 public static String getCopyrightShort() { 1429 return CopyrightHelper.INSTANCE.COPYRIGHT_SHORT; 1430 } 1431 1432 // TODO Move to collection utilities 1433 /** 1434 * Type-safe get 1435 * @param map 1436 * @param key 1437 * @return value 1438 */ get(M map, K key)1439 public static <K, V, M extends Map<K, V>> V get(M map, K key) { 1440 return map.get(key); 1441 } 1442 1443 /** 1444 * Type-safe contains 1445 * @param collection 1446 * @param key 1447 * @return value 1448 */ contains(C collection, K key)1449 public static <K, C extends Collection<K>> boolean contains(C collection, K key) { 1450 return collection.contains(key); 1451 } 1452 toEnumSet(Class<E> classValue, Collection<String> stringValues)1453 public static <E extends Enum<E>> EnumSet<E> toEnumSet(Class<E> classValue, Collection<String> stringValues) { 1454 EnumSet<E> result = EnumSet.noneOf(classValue); 1455 for (String s : stringValues) { 1456 result.add(Enum.valueOf(classValue, s)); 1457 } 1458 return result; 1459 } 1460 putNew(M map, K key, V value)1461 public static <K, V, M extends Map<K, V>> M putNew(M map, K key, V value) { 1462 if (!map.containsKey(key)) { 1463 map.put(key, value); 1464 } 1465 return map; 1466 } 1467 cleanSemiFields(String line)1468 public static String[] cleanSemiFields(String line) { 1469 line = cleanLine(line); 1470 return line.isEmpty() ? null : SEMI_SPLIT.split(line); 1471 } 1472 cleanLine(String line)1473 private static String cleanLine(String line) { 1474 int comment = line.indexOf("#"); 1475 if (comment >= 0) { 1476 line = line.substring(0, comment); 1477 } 1478 if (line.startsWith("\uFEFF")) { 1479 line = line.substring(1); 1480 } 1481 return line.trim(); 1482 } 1483 handleFile(String filename, LineHandler handler)1484 public static void handleFile(String filename, LineHandler handler) throws IOException { 1485 try (BufferedReader in = getUTF8Data(filename);) { 1486 String line = null; 1487 while ((line = in.readLine()) != null) { 1488 // String line = in.readLine(); 1489 // if (line == null) { 1490 // break; 1491 // } 1492 try { 1493 if (!handler.handle(line)) { 1494 if (HANDLEFILE_SHOW_SKIP) { 1495 System.out.println("Skipping line: " + line); 1496 } 1497 } 1498 } catch (Exception e) { 1499 throw (RuntimeException) new IllegalArgumentException("Problem with line: " + line) 1500 .initCause(e); 1501 } 1502 } 1503 } 1504 // in.close(); 1505 } 1506 ifNull(T x, T y)1507 public static <T> T ifNull(T x, T y) { 1508 return x == null 1509 ? y 1510 : x; 1511 } 1512 ifSame(T source, T replaceIfSame, T replacement)1513 public static <T> T ifSame(T source, T replaceIfSame, T replacement) { 1514 return source == replaceIfSame ? replacement : source; 1515 } 1516 ifEqual(T source, T replaceIfSame, T replacement)1517 public static <T> T ifEqual(T source, T replaceIfSame, T replacement) { 1518 return Objects.equals(source, replaceIfSame) ? replacement : source; 1519 } 1520 intersect(Set<T> a, Collection<T> b)1521 public static <T> Set<T> intersect(Set<T> a, Collection<T> b) { 1522 Set<T> result = new LinkedHashSet<>(a); 1523 result.retainAll(b); 1524 return result; 1525 } 1526 subtract(Set<T> a, Collection<T> b)1527 public static <T> Set<T> subtract(Set<T> a, Collection<T> b) { 1528 Set<T> result = new LinkedHashSet<>(a); 1529 result.removeAll(b); 1530 return result; 1531 } 1532 deepEquals(Object... pairs)1533 public static boolean deepEquals(Object... pairs) { 1534 for (int item = 0; item < pairs.length;) { 1535 if (!Objects.deepEquals(pairs[item++], pairs[item++])) { 1536 return false; 1537 } 1538 } 1539 return true; 1540 } 1541 array(Splitter splitter, String source)1542 public static String[] array(Splitter splitter, String source) { 1543 List<String> list = splitter.splitToList(source); 1544 return list.toArray(new String[list.size()]); 1545 } 1546 toHex(String in, boolean javaStyle)1547 public static String toHex(String in, boolean javaStyle) { 1548 StringBuilder result = new StringBuilder(); 1549 for (int i = 0; i < in.length(); ++i) { 1550 result.append(toHex(in.charAt(i), javaStyle)); 1551 } 1552 return result.toString(); 1553 } 1554 toHex(int j, boolean javaStyle)1555 public static String toHex(int j, boolean javaStyle) { 1556 if (j == '\"') { 1557 return "\\\""; 1558 } else if (j == '\\') { 1559 return "\\\\"; 1560 } else if (0x20 < j && j < 0x7F) { 1561 return String.valueOf((char) j); 1562 } 1563 final String hexString = Integer.toHexString(j).toUpperCase(); 1564 int gap = 4 - hexString.length(); 1565 if (gap < 0) { 1566 gap = 0; 1567 } 1568 String prefix = javaStyle ? "\\u" : "U+"; 1569 return prefix + "000".substring(0, gap) + hexString; 1570 } 1571 1572 /** 1573 * get string format for debugging, since Java has a useless display for many items 1574 * @param item 1575 * @return 1576 */ toString(Object item)1577 public static String toString(Object item) { 1578 if (item instanceof Object[]) { 1579 return toString(Arrays.asList((Object[]) item)); 1580 } else if (item instanceof Entry) { 1581 return toString(((Entry) item).getKey()) + "≔" + toString(((Entry) item).getValue()); 1582 } else if (item instanceof Map) { 1583 return "{" + toString(((Map) item).entrySet()) + "}"; 1584 } else if (item instanceof Collection) { 1585 List<String> result = new ArrayList<>(); 1586 for (Object subitem : (Collection) item) { 1587 result.add(toString(subitem)); 1588 } 1589 return result.toString(); 1590 } 1591 return item.toString(); 1592 } 1593 1594 /** 1595 * Return the git hash for the CLDR base directory. 1596 * 1597 * @return the hash, like "9786e05e95a2e4f02687fa3b84126782f9f698a3" 1598 */ getCldrBaseDirHash()1599 public static String getCldrBaseDirHash() { 1600 final File baseDir = CLDRConfig.getInstance().getCldrBaseDirectory(); 1601 return getGitHashForDir(baseDir.toString()); 1602 } 1603 1604 /** 1605 * Return the git hash for a directory. 1606 * 1607 * @param dir the directory name 1608 * @return the hash, like "9786e05e95a2e4f02687fa3b84126782f9f698a3" 1609 */ getGitHashForDir(String dir)1610 public final static String getGitHashForDir(String dir) { 1611 // Try #1 1612 String hash = getGitHashDirectlyForDir(dir); 1613 if (hash == null) { 1614 // Try #2 1615 hash = getGitHashByRevParseForDir(dir); 1616 } 1617 if (hash == null) { 1618 // return 'unknown' 1619 hash = CLDRURLS.UNKNOWN_REVISION; 1620 } 1621 return hash; 1622 } 1623 1624 /** 1625 * Attempt to retrieve git hash by digging through .git/HEAD and related files 1626 * @param dir 1627 * @return the hash, like "9786e05e95a2e4f02687fa3b84126782f9f698a3" 1628 */ getGitHashDirectlyForDir(String dir)1629 private static String getGitHashDirectlyForDir(String dir) { 1630 // First, try just reading .git/HEAD 1631 final File gitDir = new File(dir, ".git"); 1632 final File headfile = new File(gitDir, "HEAD"); 1633 if (headfile.canRead()) { 1634 // Try this first, fallback to git commands 1635 try { 1636 String s = Files.readString(headfile.toPath()); 1637 if (s != null && !s.isBlank()) { 1638 s = s.trim(); 1639 if (s.startsWith("ref: ")) { 1640 s = s.substring(5); // refs/heads/main 1641 final Path refPath = gitDir.toPath().resolve(s); 1642 if (refPath.startsWith(gitDir.toPath())) { 1643 s = Files.readString(refPath); 1644 if (s != null && !s.isBlank()) { 1645 return s.trim(); 1646 } 1647 } else { // ignore something like refs: ../../../yourfiles 1648 System.err.println("Ignoring strange git refPath " + refPath); 1649 } 1650 } // else, maybe detached head 1651 return s.trim(); 1652 } 1653 } catch (IOException e) { 1654 System.err.println(e + ": readString failed for " + headfile); 1655 e.printStackTrace(); 1656 } 1657 1658 } 1659 return null; // not found; 1660 } 1661 1662 /** 1663 * Attempt to retrieve git hash by calling 'git rev-parse HEAD' 1664 * @param dir 1665 * @return the hash, like "9786e05e95a2e4f02687fa3b84126782f9f698a3" 1666 */ getGitHashByRevParseForDir(String dir)1667 private static String getGitHashByRevParseForDir(String dir) { 1668 final String GIT_HASH_COMMANDS[] = { "git", "rev-parse", "HEAD" }; 1669 try { 1670 if (dir == null) { 1671 return null; // no dir 1672 } 1673 File f = new File(dir); 1674 if (!f.isDirectory()) { 1675 return null; // does not exist 1676 } 1677 Process p = Runtime.getRuntime().exec(GIT_HASH_COMMANDS, null, f); 1678 if (!p.waitFor(15, TimeUnit.SECONDS)) { 1679 System.err.println("Git query " + String.join(" ", GIT_HASH_COMMANDS) + " timed out"); 1680 p.destroyForcibly(); 1681 return null; 1682 } 1683 if (p.exitValue() != 0) { 1684 System.err.println("Error return : " + p.exitValue() + " from " + String.join(" ", GIT_HASH_COMMANDS)); 1685 try (BufferedReader is = new BufferedReader(new InputStreamReader(p.getErrorStream()))) { 1686 String str = is.readLine(); 1687 if (str.length() == 0) { 1688 throw new Exception("git returned empty"); 1689 } 1690 System.err.println("git: " + str); 1691 } 1692 return null; 1693 } 1694 try (BufferedReader is = new BufferedReader(new InputStreamReader(p.getInputStream()))) { 1695 String str = is.readLine(); 1696 if (str == null || str.length() == 0) { 1697 throw new Exception("git returned empty"); 1698 } 1699 return str; 1700 } 1701 } catch (Throwable t) { 1702 // We do not expect this to be called frequently. 1703 System.err.println("While trying to get 'git' hash for " + dir + " : " + t.getMessage()); 1704 t.printStackTrace(); 1705 return null; 1706 } 1707 } 1708 } 1709