1 package org.unicode.cldr.draft; 2 3 import static org.unicode.cldr.util.PathUtilities.getNormalizedPathString; 4 5 import java.io.BufferedReader; 6 import java.io.BufferedWriter; 7 import java.io.File; 8 import java.io.FileInputStream; 9 import java.io.FileNotFoundException; 10 import java.io.FileOutputStream; 11 import java.io.IOException; 12 import java.io.InputStream; 13 import java.io.InputStreamReader; 14 import java.io.OutputStreamWriter; 15 import java.io.PrintWriter; 16 import java.io.UnsupportedEncodingException; 17 import java.net.URL; 18 import java.nio.charset.Charset; 19 import java.nio.charset.StandardCharsets; 20 import java.util.ArrayList; 21 import java.util.List; 22 import java.util.Locale; 23 import java.util.regex.Pattern; 24 25 import org.unicode.cldr.util.CldrUtility; 26 import org.unicode.cldr.util.PatternCache; 27 import org.unicode.cldr.util.With; 28 import org.unicode.cldr.util.With.SimpleIterator; 29 30 import com.ibm.icu.util.ICUUncheckedIOException; 31 32 public final class FileUtilities { 33 public static final boolean SHOW_FILES; 34 static { 35 boolean showFiles = false; 36 try { 37 showFiles = System.getProperty("SHOW_FILES") != null; 38 } catch (SecurityException ignored) { 39 } 40 SHOW_FILES = showFiles; 41 } 42 43 public static final PrintWriter CONSOLE = new PrintWriter(System.out, true); 44 45 private static PrintWriter log = CONSOLE; 46 openUTF8Reader(String dir, String filename)47 public static BufferedReader openUTF8Reader(String dir, String filename) throws IOException { 48 return openReader(dir, filename, "UTF-8"); 49 } 50 openUTF8Reader(File file)51 public static BufferedReader openUTF8Reader(File file) throws IOException { 52 return openReader(file, "UTF-8"); 53 } 54 openReader(String dir, String filename, String encoding)55 public static BufferedReader openReader(String dir, String filename, String encoding) throws IOException { 56 File file = dir.length() == 0 ? new File(filename) : new File(dir, filename); 57 return openReader(file, encoding); 58 } 59 openReader(File file, String encoding)60 private static BufferedReader openReader(File file, String encoding) throws UnsupportedEncodingException, FileNotFoundException { 61 if (SHOW_FILES && log != null) { 62 log.println("Opening File: " 63 + getNormalizedPathString(file)); 64 } 65 return new BufferedReader( 66 new InputStreamReader( 67 new FileInputStream(file), 68 encoding), 69 4 * 1024); 70 } 71 openUTF8Writer(String dir, String filename)72 public static PrintWriter openUTF8Writer(String dir, String filename) throws IOException { 73 return openWriter(dir, filename, StandardCharsets.UTF_8); 74 } 75 openUTF8Writer(File dir, String filename)76 public static PrintWriter openUTF8Writer(File dir, String filename) throws IOException { 77 return openWriter(dir, filename, StandardCharsets.UTF_8); 78 } 79 openUTF8Writer(File file)80 public static PrintWriter openUTF8Writer(File file) throws IOException { 81 return openWriter(file, StandardCharsets.UTF_8); 82 } 83 openWriter(File dir, String filename, Charset encoding)84 public static PrintWriter openWriter(File dir, String filename, Charset encoding) throws IOException { 85 File file; 86 if (dir == null) { 87 file = new File(filename); 88 } else { 89 file = new File(dir, filename); 90 } 91 return openWriter(file, encoding); 92 } 93 openWriter(File file, Charset encoding)94 private static PrintWriter openWriter(File file, Charset encoding) throws IOException { 95 if (SHOW_FILES && log != null) { 96 log.println("Creating File: " + getNormalizedPathString(file)); 97 } 98 String parentName = file.getParent(); 99 if (parentName != null) { 100 File parent = new File(parentName); 101 parent.mkdirs(); 102 } 103 return new PrintWriter( 104 new BufferedWriter( 105 new OutputStreamWriter( 106 new FileOutputStream(file), 107 encoding), 108 4 * 1024)); 109 } 110 openWriter(String dir, String filename, String encoding)111 public static PrintWriter openWriter(String dir, String filename, String encoding) throws IOException { 112 return openWriter(new File(dir), filename, Charset.forName(encoding)); 113 } 114 openWriter(String dir, String filename, Charset encoding)115 public static PrintWriter openWriter(String dir, String filename, Charset encoding) throws IOException { 116 return openWriter(new File(dir), filename, encoding); 117 } 118 119 public static abstract class SemiFileReader extends FileProcessor { 120 public final static Pattern SPLIT = PatternCache.get("\\s*;\\s*"); 121 handleLine(int lineCount, int start, int end, String[] items)122 protected abstract boolean handleLine(int lineCount, int start, int end, String[] items); 123 124 @Override handleEnd()125 protected void handleEnd() { 126 } 127 isCodePoint()128 protected boolean isCodePoint() { 129 return true; 130 } 131 splitLine(String line)132 protected String[] splitLine(String line) { 133 return SPLIT.split(line); 134 } 135 136 @Override handleLine(int lineCount, String line)137 protected boolean handleLine(int lineCount, String line) { 138 String[] parts = splitLine(line); 139 int start, end; 140 if (isCodePoint()) { 141 String source = parts[0]; 142 int range = source.indexOf(".."); 143 if (range >= 0) { 144 start = Integer.parseInt(source.substring(0, range), 16); 145 end = Integer.parseInt(source.substring(range + 2), 16); 146 } else { 147 start = end = Integer.parseInt(source, 16); 148 } 149 } else { 150 start = end = -1; 151 } 152 return handleLine(lineCount, start, end, parts); 153 } 154 } 155 156 public static class FileProcessor { 157 private int lineCount; 158 handleStart()159 protected void handleStart() { 160 } 161 162 /** 163 * Return false to abort 164 * 165 * @param lineCount 166 * @param line 167 * @return 168 */ handleLine(int lineCount, String line)169 protected boolean handleLine(int lineCount, String line) { 170 return true; 171 } 172 handleEnd()173 protected void handleEnd() { 174 } 175 getLineCount()176 public int getLineCount() { 177 return lineCount; 178 } 179 handleComment(String line, int commentCharPosition)180 public void handleComment(String line, int commentCharPosition) { 181 } 182 process(Class<?> classLocation, String fileName)183 public FileProcessor process(Class<?> classLocation, String fileName) { 184 try { 185 BufferedReader in = openFile(classLocation, fileName); 186 return process(in, fileName); 187 } catch (Exception e) { 188 throw new ICUUncheckedIOException(lineCount + ":\t" + 0, e); 189 } 190 191 } 192 process(String fileName)193 public FileProcessor process(String fileName) { 194 try { 195 FileInputStream fileStream = new FileInputStream(fileName); 196 InputStreamReader reader = new InputStreamReader(fileStream, StandardCharsets.UTF_8); 197 BufferedReader bufferedReader = new BufferedReader(reader, 1024 * 64); 198 return process(bufferedReader, fileName); 199 } catch (Exception e) { 200 throw new ICUUncheckedIOException(lineCount + ":\t" + 0, e); 201 } 202 } 203 process(String directory, String fileName)204 public FileProcessor process(String directory, String fileName) { 205 try { 206 FileInputStream fileStream = new FileInputStream(directory + File.separator + fileName); 207 InputStreamReader reader = new InputStreamReader(fileStream, StandardCharsets.UTF_8); 208 BufferedReader bufferedReader = new BufferedReader(reader, 1024 * 64); 209 return process(bufferedReader, fileName); 210 } catch (Exception e) { 211 throw new ICUUncheckedIOException(lineCount + ":\t" + 0, e); 212 } 213 } 214 process(BufferedReader in, String fileName)215 public FileProcessor process(BufferedReader in, String fileName) { 216 handleStart(); 217 String line = null; 218 lineCount = 1; 219 try { 220 for (;; ++lineCount) { 221 line = in.readLine(); 222 if (line == null) { 223 break; 224 } 225 int comment = line.indexOf("#"); 226 if (comment >= 0) { 227 handleComment(line, comment); 228 line = line.substring(0, comment); 229 } 230 if (line.startsWith("\uFEFF")) { 231 line = line.substring(1); 232 } 233 line = line.trim(); 234 if (line.length() == 0) { 235 continue; 236 } 237 if (!handleLine(lineCount, line)) { 238 break; 239 } 240 } 241 in.close(); 242 handleEnd(); 243 } catch (Exception e) { 244 throw new ICUUncheckedIOException(lineCount + ":\t" + line, e); 245 } 246 return this; 247 } 248 } 249 250 // 251 // public static SemiFileReader fillMapFromSemi(Class classLocation, String fileName, SemiFileReader handler) { 252 // return handler.process(classLocation, fileName); 253 // } openFile(Class<?> class1, String file)254 public static BufferedReader openFile(Class<?> class1, String file) { 255 return openFile(class1, file, StandardCharsets.UTF_8); 256 } 257 openFile(Class<?> class1, String file, Charset charset)258 public static BufferedReader openFile(Class<?> class1, String file, Charset charset) { 259 // URL path = null; 260 // String externalForm = null; 261 try { 262 // //System.out.println("Reading:\t" + file1.getCanonicalPath()); 263 // path = class1.getResource(file); 264 // externalForm = path.toExternalForm(); 265 // if (externalForm.startsWith("file:")) { 266 // externalForm = externalForm.substring(5); 267 // } 268 // File file1 = new File(externalForm); 269 // boolean x = file1.canRead(); 270 // final InputStream resourceAsStream = new FileInputStream(file1); 271 final InputStream resourceAsStream = class1.getResourceAsStream(file); 272 // String foo = class1.getResource(".").toString(); 273 if (charset == null) { 274 charset = StandardCharsets.UTF_8; 275 } 276 InputStreamReader reader = new InputStreamReader(resourceAsStream, charset); 277 BufferedReader bufferedReader = new BufferedReader(reader, 1024 * 64); 278 return bufferedReader; 279 } catch (Exception e) { 280 String className = class1 == null ? null : class1.getCanonicalName(); 281 String normalizedPath = null; 282 try { 283 String relativeFileName = getRelativeFileName(class1, "../util/"); 284 normalizedPath = getNormalizedPathString(relativeFileName); 285 } catch (Exception e1) { 286 throw new ICUUncheckedIOException("Couldn't open file: " + file + "; relative to class: " 287 + className, e); 288 } 289 throw new ICUUncheckedIOException("Couldn't open file " + file + "; in path " + normalizedPath + "; relative to class: " 290 + className, e); 291 } 292 } 293 openFile(String directory, String file, Charset charset)294 public static BufferedReader openFile(String directory, String file, Charset charset) { 295 try { 296 if (directory.equals("")) { 297 return new BufferedReader(new InputStreamReader(new FileInputStream(new File(file)), charset)); 298 } else { 299 return new BufferedReader(new InputStreamReader(new FileInputStream(new File(directory, file)), charset)); 300 } 301 } catch (FileNotFoundException e) { 302 throw new ICUUncheckedIOException(e); // handle dang'd checked exception 303 } 304 } 305 openFile(File file, Charset charset)306 public static BufferedReader openFile(File file, Charset charset) { 307 try { 308 return new BufferedReader(new InputStreamReader(new FileInputStream(file), charset)); 309 } catch (FileNotFoundException e) { 310 throw new ICUUncheckedIOException(e); // handle dang'd checked exception 311 } 312 } 313 openFile(File file)314 public static BufferedReader openFile(File file) { 315 return openFile(file, StandardCharsets.UTF_8); 316 } 317 openFile(String directory, String file)318 public static BufferedReader openFile(String directory, String file) { 319 return openFile(directory, file, StandardCharsets.UTF_8); 320 } 321 splitCommaSeparated(String line)322 public static String[] splitCommaSeparated(String line) { 323 // items are separated by ',' 324 // each item is of the form abc... 325 // or "..." (required if a comma or quote is contained) 326 // " in a field is represented by "" 327 List<String> result = new ArrayList<>(); 328 StringBuilder item = new StringBuilder(); 329 boolean inQuote = false; 330 for (int i = 0; i < line.length(); ++i) { 331 char ch = line.charAt(i); // don't worry about supplementaries 332 switch (ch) { 333 case '"': 334 inQuote = !inQuote; 335 // at start or end, that's enough 336 // if get a quote when we are not in a quote, and not at start, then add it and return to inQuote 337 if (inQuote && item.length() != 0) { 338 item.append('"'); 339 inQuote = true; 340 } 341 break; 342 case ',': 343 if (!inQuote) { 344 result.add(item.toString()); 345 item.setLength(0); 346 } else { 347 item.append(ch); 348 } 349 break; 350 default: 351 item.append(ch); 352 break; 353 } 354 } 355 result.add(item.toString()); 356 return result.toArray(new String[result.size()]); 357 } 358 appendFile(Class<?> class1, String filename, PrintWriter out)359 public static void appendFile(Class<?> class1, String filename, PrintWriter out) { 360 appendFile(class1, filename, StandardCharsets.UTF_8, null, out); 361 } 362 appendFile(Class<?> class1, String filename, String[] replacementList, PrintWriter out)363 public static void appendFile(Class<?> class1, String filename, String[] replacementList, PrintWriter out) { 364 appendFile(class1, filename, StandardCharsets.UTF_8, replacementList, out); 365 } 366 appendFile(Class<?> class1, String filename, Charset charset, String[] replacementList, PrintWriter out)367 public static void appendFile(Class<?> class1, String filename, Charset charset, String[] replacementList, 368 PrintWriter out) { 369 BufferedReader br = openFile(class1, filename, charset); 370 try { 371 try { 372 appendBufferedReader(br, out, replacementList); 373 } finally { 374 br.close(); 375 } 376 } catch (IOException e) { 377 throw new ICUUncheckedIOException(e); // wrap darn'd checked exception 378 } 379 } 380 appendFile(String filename, String encoding, PrintWriter output)381 public static void appendFile(String filename, String encoding, PrintWriter output) throws IOException { 382 appendFile(filename, encoding, output, null); 383 } 384 appendFile(String filename, String encoding, PrintWriter output, String[] replacementList)385 public static void appendFile(String filename, String encoding, PrintWriter output, String[] replacementList) throws IOException { 386 BufferedReader br = openReader("", filename, encoding); 387 try { 388 appendBufferedReader(br, output, replacementList); 389 } finally { 390 br.close(); 391 } 392 } 393 appendBufferedReader(BufferedReader br, PrintWriter output, String[] replacementList)394 public static void appendBufferedReader(BufferedReader br, 395 PrintWriter output, String[] replacementList) throws IOException { 396 while (true) { 397 String line = br.readLine(); 398 if (line == null) break; 399 if (replacementList != null) { 400 for (int i = 0; i < replacementList.length; i += 2) { 401 line = replace(line, replacementList[i], replacementList[i + 1]); 402 } 403 } 404 output.println(line); 405 } 406 br.close(); 407 } 408 409 /** 410 * Replaces all occurrences of piece with replacement, and returns new String 411 */ replace(String source, String piece, String replacement)412 public static String replace(String source, String piece, String replacement) { 413 if (source == null || source.length() < piece.length()) return source; 414 int pos = 0; 415 while (true) { 416 pos = source.indexOf(piece, pos); 417 if (pos < 0) return source; 418 source = source.substring(0, pos) + replacement + source.substring(pos + piece.length()); 419 pos += replacement.length(); 420 } 421 } 422 replace(String source, String[][] replacements)423 public static String replace(String source, String[][] replacements) { 424 return replace(source, replacements, replacements.length); 425 } 426 replace(String source, String[][] replacements, int count)427 public static String replace(String source, String[][] replacements, int count) { 428 for (int i = 0; i < count; ++i) { 429 source = replace(source, replacements[i][0], replacements[i][1]); 430 } 431 return source; 432 } 433 replace(String source, String[][] replacements, boolean reverse)434 public static String replace(String source, String[][] replacements, boolean reverse) { 435 if (!reverse) return replace(source, replacements); 436 for (int i = 0; i < replacements.length; ++i) { 437 source = replace(source, replacements[i][1], replacements[i][0]); 438 } 439 return source; 440 } 441 anchorize(String source)442 public static String anchorize(String source) { 443 String result = source.toLowerCase(Locale.ENGLISH).replaceAll("[^\\p{L}\\p{N}]+", "_"); 444 if (result.endsWith("_")) result = result.substring(0, result.length() - 1); 445 if (result.startsWith("_")) result = result.substring(1); 446 return result; 447 } 448 copyFile(Class<?> class1, String sourceFile, String targetDirectory)449 public static void copyFile(Class<?> class1, String sourceFile, String targetDirectory) { 450 copyFile(class1, sourceFile, targetDirectory, sourceFile, null); 451 } 452 copyFile(Class<?> class1, String sourceFile, String targetDirectory, String newName)453 public static void copyFile(Class<?> class1, String sourceFile, String targetDirectory, String newName) { 454 copyFile(class1, sourceFile, targetDirectory, newName, null); 455 } 456 copyFile(Class<?> class1, String sourceFile, String targetDirectory, String newName, String[] replacementList)457 public static void copyFile(Class<?> class1, String sourceFile, String targetDirectory, String newName, String[] replacementList) { 458 try { 459 PrintWriter out = openUTF8Writer(targetDirectory, newName); 460 appendFile(class1, sourceFile, StandardCharsets.UTF_8, replacementList, out); 461 out.close(); 462 } catch (IOException e) { 463 throw new ICUUncheckedIOException(e); // dang'd checked exceptions 464 } 465 } 466 getRelativeFileName(Class<?> class1, String filename)467 public static String getRelativeFileName(Class<?> class1, String filename) { 468 URL resource = class1.getResource(filename); 469 String resourceString = resource.toString(); 470 if (resourceString.startsWith("file:")) { 471 return resourceString.substring(5); 472 } else if (resourceString.startsWith("jar:file:")) { 473 return resourceString.substring(9); 474 } else { 475 throw new ICUUncheckedIOException("File not found: " + resourceString); 476 } 477 } 478 479 /** 480 * Simple API to iterate over file lines. Example: 481 * for (String s : FileUtilities.in(directory,name)) { 482 * ... 483 * } 484 * 485 * @author markdavis 486 * 487 */ in(Class<?> class1, String file)488 public static Iterable<String> in(Class<?> class1, String file) { 489 return With.in(new FileLines(openFile(class1, file, StandardCharsets.UTF_8))); 490 } 491 492 /** 493 * Simple API to iterate over file lines. Example: 494 * for (String s : FileUtilities.in(directory,name)) { 495 * ... 496 * } 497 * 498 * @author markdavis 499 * 500 */ in(Class<?> class1, String file, Charset charset)501 public static Iterable<String> in(Class<?> class1, String file, Charset charset) { 502 return With.in(new FileLines(openFile(class1, file, charset))); 503 } 504 505 /** 506 * Simple API to iterate over file lines. Example: 507 * for (String s : FileUtilities.in(directory,name)) { 508 * ... 509 * } 510 * 511 * @author markdavis 512 * 513 */ in(String directory, String file)514 public static Iterable<String> in(String directory, String file) { 515 return With.in(new FileLines(openFile(directory, file, StandardCharsets.UTF_8))); 516 } 517 518 /** 519 * Simple API to iterate over file lines. Example: 520 * for (String s : FileUtilities.in(directory,name)) { 521 * ... 522 * } 523 * 524 * @author markdavis 525 * 526 */ in(BufferedReader reader)527 public static Iterable<String> in(BufferedReader reader) { 528 return With.in(new FileLines(reader)); 529 } 530 531 /** 532 * Simple API to iterate over file lines. Example: 533 * for (String s : FileUtilities.in(directory,name)) { 534 * ... 535 * } 536 * 537 * @author markdavis 538 * 539 */ in(String directory, String file, Charset charset)540 public static Iterable<String> in(String directory, String file, Charset charset) { 541 return With.in(new FileLines(openFile(directory, file, charset))); 542 } 543 544 private static class FileLines implements SimpleIterator<String> { 545 private BufferedReader input; 546 FileLines(BufferedReader input)547 public FileLines(BufferedReader input) { 548 this.input = input; 549 } 550 551 @Override next()552 public String next() { 553 try { 554 String result = input.readLine(); 555 if (result == null) { 556 input.close(); 557 } 558 return result; 559 } catch (IOException e) { 560 throw new ICUUncheckedIOException(e); // handle dang'd checked exception 561 } 562 } 563 564 } 565 cleanLine(String line)566 public static String cleanLine(String line) { 567 int comment = line.indexOf("#"); 568 if (comment >= 0) { 569 line = line.substring(0, comment); 570 } 571 if (line.startsWith("\uFEFF")) { 572 line = line.substring(1); 573 } 574 return line.trim(); 575 } 576 577 public final static Pattern SEMI_SPLIT = PatternCache.get("\\s*;\\s*"); 578 private static final boolean SHOW_SKIP = false; 579 cleanSemiFields(String line)580 public static String[] cleanSemiFields(String line) { 581 line = cleanLine(line); 582 return line.isEmpty() ? null : SEMI_SPLIT.split(line); 583 } 584 585 public interface LineHandler { 586 /** 587 * Return false if line was skipped 588 * 589 * @param line 590 * @return 591 */ handle(String line)592 boolean handle(String line) throws Exception; 593 } 594 handleFile(String filename, LineHandler handler)595 public static void handleFile(String filename, LineHandler handler) throws IOException { 596 BufferedReader in = CldrUtility.getUTF8Data(filename); 597 while (true) { 598 String line = in.readLine(); 599 if (line == null) { 600 break; 601 } 602 try { 603 if (!handler.handle(line)) { 604 if (SHOW_SKIP) System.out.println("Skipping line: " + line); 605 } 606 } catch (Exception e) { 607 throw new ICUUncheckedIOException("Problem with line: " + line, e); 608 } 609 } 610 in.close(); 611 } 612 in(File file)613 public static Iterable<String> in(File file) { 614 return With.in(new FileLines(openFile(file, StandardCharsets.UTF_8))); 615 } 616 } 617