1 package org.unicode.cldr.draft; 2 3 import static org.unicode.cldr.util.PathUtilities.getNormalizedPathString; 4 5 import com.ibm.icu.util.ICUUncheckedIOException; 6 import java.io.BufferedReader; 7 import java.io.BufferedWriter; 8 import java.io.File; 9 import java.io.FileInputStream; 10 import java.io.FileNotFoundException; 11 import java.io.FileOutputStream; 12 import java.io.IOException; 13 import java.io.InputStream; 14 import java.io.InputStreamReader; 15 import java.io.OutputStreamWriter; 16 import java.io.PrintWriter; 17 import java.io.UnsupportedEncodingException; 18 import java.net.URL; 19 import java.nio.charset.Charset; 20 import java.nio.charset.StandardCharsets; 21 import java.util.ArrayList; 22 import java.util.List; 23 import java.util.Locale; 24 import java.util.regex.Pattern; 25 import org.unicode.cldr.util.CldrUtility; 26 import org.unicode.cldr.util.PatternCache; 27 import org.unicode.cldr.util.With; 28 import org.unicode.cldr.util.With.SimpleIterator; 29 30 public final class FileUtilities { 31 public static final boolean SHOW_FILES; 32 33 static { 34 boolean showFiles = false; 35 try { 36 showFiles = System.getProperty("SHOW_FILES") != null; 37 } catch (SecurityException ignored) { 38 } 39 SHOW_FILES = showFiles; 40 } 41 42 public static final PrintWriter CONSOLE = new PrintWriter(System.out, true); 43 44 private static PrintWriter log = CONSOLE; 45 openUTF8Reader(String dir, String filename)46 public static BufferedReader openUTF8Reader(String dir, String filename) throws IOException { 47 return openReader(dir, filename, "UTF-8"); 48 } 49 openUTF8Reader(File file)50 public static BufferedReader openUTF8Reader(File file) throws IOException { 51 return openReader(file, "UTF-8"); 52 } 53 openReader(String dir, String filename, String encoding)54 public static BufferedReader openReader(String dir, String filename, String encoding) 55 throws IOException { 56 File file = dir.length() == 0 ? new File(filename) : new File(dir, filename); 57 return openReader(file, encoding); 58 } 59 openReader(File file, String encoding)60 private static BufferedReader openReader(File file, String encoding) 61 throws UnsupportedEncodingException, FileNotFoundException { 62 if (SHOW_FILES && log != null) { 63 log.println("Opening File: " + getNormalizedPathString(file)); 64 } 65 return new BufferedReader( 66 new InputStreamReader(new FileInputStream(file), encoding), 4 * 1024); 67 } 68 openUTF8Writer(String dir, String filename)69 public static PrintWriter openUTF8Writer(String dir, String filename) throws IOException { 70 return openWriter(dir, filename, StandardCharsets.UTF_8); 71 } 72 openUTF8Writer(File dir, String filename)73 public static PrintWriter openUTF8Writer(File dir, String filename) throws IOException { 74 return openWriter(dir, filename, StandardCharsets.UTF_8); 75 } 76 openUTF8Writer(File file)77 public static PrintWriter openUTF8Writer(File file) throws IOException { 78 return openWriter(file, StandardCharsets.UTF_8); 79 } 80 openWriter(File dir, String filename, Charset encoding)81 public static PrintWriter openWriter(File dir, String filename, Charset encoding) 82 throws IOException { 83 File file; 84 if (dir == null || dir.getPath().isEmpty()) { 85 file = new File(filename); 86 } else { 87 file = new File(dir, filename); 88 } 89 return openWriter(file, encoding); 90 } 91 openWriter(File file, Charset encoding)92 private static PrintWriter openWriter(File file, Charset encoding) throws IOException { 93 if (SHOW_FILES && log != null) { 94 log.println("Creating File: " + getNormalizedPathString(file)); 95 } 96 String parentName = file.getParent(); 97 if (parentName != null) { 98 File parent = new File(parentName); 99 parent.mkdirs(); 100 } 101 return new PrintWriter( 102 new BufferedWriter( 103 new OutputStreamWriter(new FileOutputStream(file), encoding), 4 * 1024)); 104 } 105 openWriter(String dir, String filename, String encoding)106 public static PrintWriter openWriter(String dir, String filename, String encoding) 107 throws IOException { 108 return openWriter(new File(dir), filename, Charset.forName(encoding)); 109 } 110 openWriter(String dir, String filename, Charset encoding)111 public static PrintWriter openWriter(String dir, String filename, Charset encoding) 112 throws IOException { 113 return openWriter(new File(dir), filename, encoding); 114 } 115 116 public abstract static class SemiFileReader extends FileProcessor { 117 public static final Pattern SPLIT = PatternCache.get("\\s*;\\s*"); 118 handleLine(int lineCount, int start, int end, String[] items)119 protected abstract boolean handleLine(int lineCount, int start, int end, String[] items); 120 121 @Override handleEnd()122 protected void handleEnd() {} 123 isCodePoint()124 protected boolean isCodePoint() { 125 return true; 126 } 127 splitLine(String line)128 protected String[] splitLine(String line) { 129 return SPLIT.split(line); 130 } 131 132 @Override handleLine(int lineCount, String line)133 protected boolean handleLine(int lineCount, String line) { 134 String[] parts = splitLine(line); 135 int start, end; 136 if (isCodePoint()) { 137 String source = parts[0]; 138 int range = source.indexOf(".."); 139 if (range >= 0) { 140 start = Integer.parseInt(source.substring(0, range), 16); 141 end = Integer.parseInt(source.substring(range + 2), 16); 142 } else { 143 start = end = Integer.parseInt(source, 16); 144 } 145 } else { 146 start = end = -1; 147 } 148 return handleLine(lineCount, start, end, parts); 149 } 150 } 151 152 public static class FileProcessor { 153 private int lineCount; 154 handleStart()155 protected void handleStart() {} 156 157 /** 158 * Return false to abort 159 * 160 * @param lineCount 161 * @param line 162 * @return 163 */ handleLine(int lineCount, String line)164 protected boolean handleLine(int lineCount, String line) { 165 return true; 166 } 167 handleEnd()168 protected void handleEnd() {} 169 getLineCount()170 public int getLineCount() { 171 return lineCount; 172 } 173 handleComment(String line, int commentCharPosition)174 public void handleComment(String line, int commentCharPosition) {} 175 process(Class<?> classLocation, String fileName)176 public FileProcessor process(Class<?> classLocation, String fileName) { 177 try { 178 BufferedReader in = openFile(classLocation, fileName); 179 return process(in, fileName); 180 } catch (Exception e) { 181 throw new ICUUncheckedIOException(lineCount + ":\t" + 0, e); 182 } 183 } 184 process(String fileName)185 public FileProcessor process(String fileName) { 186 try { 187 FileInputStream fileStream = new FileInputStream(fileName); 188 InputStreamReader reader = 189 new InputStreamReader(fileStream, StandardCharsets.UTF_8); 190 BufferedReader bufferedReader = new BufferedReader(reader, 1024 * 64); 191 return process(bufferedReader, fileName); 192 } catch (Exception e) { 193 throw new ICUUncheckedIOException(lineCount + ":\t" + 0, e); 194 } 195 } 196 process(String directory, String fileName)197 public FileProcessor process(String directory, String fileName) { 198 try { 199 FileInputStream fileStream = 200 new FileInputStream(directory + File.separator + fileName); 201 InputStreamReader reader = 202 new InputStreamReader(fileStream, StandardCharsets.UTF_8); 203 BufferedReader bufferedReader = new BufferedReader(reader, 1024 * 64); 204 return process(bufferedReader, fileName); 205 } catch (Exception e) { 206 throw new ICUUncheckedIOException(lineCount + ":\t" + 0, e); 207 } 208 } 209 process(BufferedReader in, String fileName)210 public FileProcessor process(BufferedReader in, String fileName) { 211 handleStart(); 212 String line = null; 213 lineCount = 1; 214 try { 215 for (; ; ++lineCount) { 216 line = in.readLine(); 217 if (line == null) { 218 break; 219 } 220 int comment = line.indexOf("#"); 221 if (comment >= 0) { 222 handleComment(line, comment); 223 line = line.substring(0, comment); 224 } 225 if (line.startsWith("\uFEFF")) { 226 line = line.substring(1); 227 } 228 line = line.trim(); 229 if (line.length() == 0) { 230 continue; 231 } 232 if (!handleLine(lineCount, line)) { 233 break; 234 } 235 } 236 in.close(); 237 handleEnd(); 238 } catch (Exception e) { 239 throw new ICUUncheckedIOException(lineCount + ":\t" + line, e); 240 } 241 return this; 242 } 243 } 244 245 // 246 // public static SemiFileReader fillMapFromSemi(Class classLocation, String fileName, 247 // SemiFileReader handler) { 248 // return handler.process(classLocation, fileName); 249 // } openFile(Class<?> class1, String file)250 public static BufferedReader openFile(Class<?> class1, String file) { 251 return openFile(class1, file, StandardCharsets.UTF_8); 252 } 253 openFile(Class<?> class1, String file, Charset charset)254 public static BufferedReader openFile(Class<?> class1, String file, Charset charset) { 255 // URL path = null; 256 // String externalForm = null; 257 try { 258 // //System.out.println("Reading:\t" + file1.getCanonicalPath()); 259 // path = class1.getResource(file); 260 // externalForm = path.toExternalForm(); 261 // if (externalForm.startsWith("file:")) { 262 // externalForm = externalForm.substring(5); 263 // } 264 // File file1 = new File(externalForm); 265 // boolean x = file1.canRead(); 266 // final InputStream resourceAsStream = new FileInputStream(file1); 267 final InputStream resourceAsStream = class1.getResourceAsStream(file); 268 // String foo = class1.getResource(".").toString(); 269 if (charset == null) { 270 charset = StandardCharsets.UTF_8; 271 } 272 InputStreamReader reader = new InputStreamReader(resourceAsStream, charset); 273 BufferedReader bufferedReader = new BufferedReader(reader, 1024 * 64); 274 return bufferedReader; 275 } catch (Exception e) { 276 String className = class1 == null ? null : class1.getCanonicalName(); 277 String normalizedPath = null; 278 try { 279 String relativeFileName = getRelativeFileName(class1, "../util/"); 280 normalizedPath = getNormalizedPathString(relativeFileName); 281 } catch (Exception e1) { 282 throw new ICUUncheckedIOException( 283 "Couldn't open file: " + file + "; relative to class: " + className, e); 284 } 285 throw new ICUUncheckedIOException( 286 "Couldn't open file " 287 + file 288 + "; in path " 289 + normalizedPath 290 + "; relative to class: " 291 + className, 292 e); 293 } 294 } 295 openFile(String directory, String file, Charset charset)296 public static BufferedReader openFile(String directory, String file, Charset charset) { 297 try { 298 if (directory.equals("")) { 299 return new BufferedReader( 300 new InputStreamReader(new FileInputStream(new File(file)), charset)); 301 } else { 302 return new BufferedReader( 303 new InputStreamReader( 304 new FileInputStream(new File(directory, file)), charset)); 305 } 306 } catch (FileNotFoundException e) { 307 throw new ICUUncheckedIOException(e); // handle dang'd checked exception 308 } 309 } 310 openFile(File file, Charset charset)311 public static BufferedReader openFile(File file, Charset charset) { 312 try { 313 return new BufferedReader(new InputStreamReader(new FileInputStream(file), charset)); 314 } catch (FileNotFoundException e) { 315 throw new ICUUncheckedIOException(e); // handle dang'd checked exception 316 } 317 } 318 openFile(File file)319 public static BufferedReader openFile(File file) { 320 return openFile(file, StandardCharsets.UTF_8); 321 } 322 openFile(String directory, String file)323 public static BufferedReader openFile(String directory, String file) { 324 return openFile(directory, file, StandardCharsets.UTF_8); 325 } 326 splitCommaSeparated(String line)327 public static String[] splitCommaSeparated(String line) { 328 // items are separated by ',' 329 // each item is of the form abc... 330 // or "..." (required if a comma or quote is contained) 331 // " in a field is represented by "" 332 List<String> result = new ArrayList<>(); 333 StringBuilder item = new StringBuilder(); 334 boolean inQuote = false; 335 for (int i = 0; i < line.length(); ++i) { 336 char ch = line.charAt(i); // don't worry about supplementaries 337 switch (ch) { 338 case '"': 339 inQuote = !inQuote; 340 // at start or end, that's enough 341 // if get a quote when we are not in a quote, and not at start, then add it and 342 // return to inQuote 343 if (inQuote && item.length() != 0) { 344 item.append('"'); 345 inQuote = true; 346 } 347 break; 348 case ',': 349 if (!inQuote) { 350 result.add(item.toString()); 351 item.setLength(0); 352 } else { 353 item.append(ch); 354 } 355 break; 356 default: 357 item.append(ch); 358 break; 359 } 360 } 361 result.add(item.toString()); 362 return result.toArray(new String[result.size()]); 363 } 364 appendFile(Class<?> class1, String filename, PrintWriter out)365 public static void appendFile(Class<?> class1, String filename, PrintWriter out) { 366 appendFile(class1, filename, StandardCharsets.UTF_8, null, out); 367 } 368 appendFile( Class<?> class1, String filename, String[] replacementList, PrintWriter out)369 public static void appendFile( 370 Class<?> class1, String filename, String[] replacementList, PrintWriter out) { 371 appendFile(class1, filename, StandardCharsets.UTF_8, replacementList, out); 372 } 373 appendFile( Class<?> class1, String filename, Charset charset, String[] replacementList, PrintWriter out)374 public static void appendFile( 375 Class<?> class1, 376 String filename, 377 Charset charset, 378 String[] replacementList, 379 PrintWriter out) { 380 BufferedReader br = openFile(class1, filename, charset); 381 try { 382 try { 383 appendBufferedReader(br, out, replacementList); 384 } finally { 385 br.close(); 386 } 387 } catch (IOException e) { 388 throw new ICUUncheckedIOException(e); // wrap darn'd checked exception 389 } 390 } 391 appendFile(String filename, String encoding, PrintWriter output)392 public static void appendFile(String filename, String encoding, PrintWriter output) 393 throws IOException { 394 appendFile(filename, encoding, output, null); 395 } 396 appendFile( String filename, String encoding, PrintWriter output, String[] replacementList)397 public static void appendFile( 398 String filename, String encoding, PrintWriter output, String[] replacementList) 399 throws IOException { 400 BufferedReader br = openReader("", filename, encoding); 401 try { 402 appendBufferedReader(br, output, replacementList); 403 } finally { 404 br.close(); 405 } 406 } 407 appendBufferedReader( BufferedReader br, PrintWriter output, String[] replacementList)408 public static void appendBufferedReader( 409 BufferedReader br, PrintWriter output, String[] replacementList) throws IOException { 410 while (true) { 411 String line = br.readLine(); 412 if (line == null) break; 413 if (replacementList != null) { 414 for (int i = 0; i < replacementList.length; i += 2) { 415 line = replace(line, replacementList[i], replacementList[i + 1]); 416 } 417 } 418 output.println(line); 419 } 420 br.close(); 421 } 422 423 /** Replaces all occurrences of piece with replacement, and returns new String */ replace(String source, String piece, String replacement)424 public static String replace(String source, String piece, String replacement) { 425 if (source == null || source.length() < piece.length()) return source; 426 int pos = 0; 427 while (true) { 428 pos = source.indexOf(piece, pos); 429 if (pos < 0) return source; 430 source = 431 source.substring(0, pos) + replacement + source.substring(pos + piece.length()); 432 pos += replacement.length(); 433 } 434 } 435 replace(String source, String[][] replacements)436 public static String replace(String source, String[][] replacements) { 437 return replace(source, replacements, replacements.length); 438 } 439 replace(String source, String[][] replacements, int count)440 public static String replace(String source, String[][] replacements, int count) { 441 for (int i = 0; i < count; ++i) { 442 source = replace(source, replacements[i][0], replacements[i][1]); 443 } 444 return source; 445 } 446 replace(String source, String[][] replacements, boolean reverse)447 public static String replace(String source, String[][] replacements, boolean reverse) { 448 if (!reverse) return replace(source, replacements); 449 for (int i = 0; i < replacements.length; ++i) { 450 source = replace(source, replacements[i][1], replacements[i][0]); 451 } 452 return source; 453 } 454 anchorize(String source)455 public static String anchorize(String source) { 456 String result = source.toLowerCase(Locale.ENGLISH).replaceAll("[^\\p{L}\\p{N}]+", "_"); 457 if (result.endsWith("_")) result = result.substring(0, result.length() - 1); 458 if (result.startsWith("_")) result = result.substring(1); 459 return result; 460 } 461 copyFile(Class<?> class1, String sourceFile, String targetDirectory)462 public static void copyFile(Class<?> class1, String sourceFile, String targetDirectory) { 463 copyFile(class1, sourceFile, targetDirectory, sourceFile, null); 464 } 465 copyFile( Class<?> class1, String sourceFile, String targetDirectory, String newName)466 public static void copyFile( 467 Class<?> class1, String sourceFile, String targetDirectory, String newName) { 468 copyFile(class1, sourceFile, targetDirectory, newName, null); 469 } 470 copyFile( Class<?> class1, String sourceFile, String targetDirectory, String newName, String[] replacementList)471 public static void copyFile( 472 Class<?> class1, 473 String sourceFile, 474 String targetDirectory, 475 String newName, 476 String[] replacementList) { 477 try { 478 PrintWriter out = openUTF8Writer(targetDirectory, newName); 479 appendFile(class1, sourceFile, StandardCharsets.UTF_8, replacementList, out); 480 out.close(); 481 } catch (IOException e) { 482 throw new ICUUncheckedIOException(e); // dang'd checked exceptions 483 } 484 } 485 getRelativeFileName(Class<?> class1, String filename)486 public static String getRelativeFileName(Class<?> class1, String filename) { 487 URL resource = class1.getResource(filename); 488 String resourceString = resource.toString(); 489 if (resourceString.startsWith("file:")) { 490 return resourceString.substring(5); 491 } else if (resourceString.startsWith("jar:file:")) { 492 return resourceString.substring(9); 493 } else { 494 throw new ICUUncheckedIOException("File not found: " + resourceString); 495 } 496 } 497 498 /** 499 * Simple API to iterate over file lines. Example: for (String s : 500 * FileUtilities.in(directory,name)) { ... } 501 * 502 * @author markdavis 503 */ in(Class<?> class1, String file)504 public static Iterable<String> in(Class<?> class1, String file) { 505 return With.in(new FileLines(openFile(class1, file, StandardCharsets.UTF_8))); 506 } 507 508 /** 509 * Simple API to iterate over file lines. Example: for (String s : 510 * FileUtilities.in(directory,name)) { ... } 511 * 512 * @author markdavis 513 */ in(Class<?> class1, String file, Charset charset)514 public static Iterable<String> in(Class<?> class1, String file, Charset charset) { 515 return With.in(new FileLines(openFile(class1, file, charset))); 516 } 517 518 /** 519 * Simple API to iterate over file lines. Example: for (String s : 520 * FileUtilities.in(directory,name)) { ... } 521 * 522 * @author markdavis 523 */ in(String directory, String file)524 public static Iterable<String> in(String directory, String file) { 525 return With.in(new FileLines(openFile(directory, file, StandardCharsets.UTF_8))); 526 } 527 528 /** 529 * Simple API to iterate over file lines. Example: for (String s : 530 * FileUtilities.in(directory,name)) { ... } 531 * 532 * @author markdavis 533 */ in(BufferedReader reader)534 public static Iterable<String> in(BufferedReader reader) { 535 return With.in(new FileLines(reader)); 536 } 537 538 /** 539 * Simple API to iterate over file lines. Example: for (String s : 540 * FileUtilities.in(directory,name)) { ... } 541 * 542 * @author markdavis 543 */ in(String directory, String file, Charset charset)544 public static Iterable<String> in(String directory, String file, Charset charset) { 545 return With.in(new FileLines(openFile(directory, file, charset))); 546 } 547 548 private static class FileLines implements SimpleIterator<String> { 549 private BufferedReader input; 550 FileLines(BufferedReader input)551 public FileLines(BufferedReader input) { 552 this.input = input; 553 } 554 555 @Override next()556 public String next() { 557 try { 558 String result = input.readLine(); 559 if (result == null) { 560 input.close(); 561 } 562 return result; 563 } catch (IOException e) { 564 throw new ICUUncheckedIOException(e); // handle dang'd checked exception 565 } 566 } 567 } 568 cleanLine(String line)569 public static String cleanLine(String line) { 570 int comment = line.indexOf("#"); 571 if (comment >= 0) { 572 line = line.substring(0, comment); 573 } 574 if (line.startsWith("\uFEFF")) { 575 line = line.substring(1); 576 } 577 return line.trim(); 578 } 579 580 public static final Pattern SEMI_SPLIT = PatternCache.get("\\s*;\\s*"); 581 private static final boolean SHOW_SKIP = false; 582 cleanSemiFields(String line)583 public static String[] cleanSemiFields(String line) { 584 line = cleanLine(line); 585 return line.isEmpty() ? null : SEMI_SPLIT.split(line); 586 } 587 588 public interface LineHandler { 589 /** 590 * Return false if line was skipped 591 * 592 * @param line 593 * @return 594 */ handle(String line)595 boolean handle(String line) throws Exception; 596 } 597 handleFile(String filename, LineHandler handler)598 public static void handleFile(String filename, LineHandler handler) throws IOException { 599 BufferedReader in = CldrUtility.getUTF8Data(filename); 600 while (true) { 601 String line = in.readLine(); 602 if (line == null) { 603 break; 604 } 605 try { 606 if (!handler.handle(line)) { 607 if (SHOW_SKIP) System.out.println("Skipping line: " + line); 608 } 609 } catch (Exception e) { 610 throw new ICUUncheckedIOException("Problem with line: " + line, e); 611 } 612 } 613 in.close(); 614 } 615 in(File file)616 public static Iterable<String> in(File file) { 617 return With.in(new FileLines(openFile(file, StandardCharsets.UTF_8))); 618 } 619 } 620