1 package org.unicode.cldr.draft; 2 3 import static org.unicode.cldr.util.PathUtilities.getNormalizedPathString; 4 5 import java.io.BufferedReader; 6 import java.io.BufferedWriter; 7 import java.io.File; 8 import java.io.FileInputStream; 9 import java.io.FileNotFoundException; 10 import java.io.FileOutputStream; 11 import java.io.IOException; 12 import java.io.InputStream; 13 import java.io.InputStreamReader; 14 import java.io.OutputStreamWriter; 15 import java.io.PrintWriter; 16 import java.net.URL; 17 import java.nio.charset.Charset; 18 import java.nio.charset.StandardCharsets; 19 import java.util.ArrayList; 20 import java.util.List; 21 import java.util.Locale; 22 import java.util.regex.Pattern; 23 24 import org.unicode.cldr.util.CldrUtility; 25 import org.unicode.cldr.util.PatternCache; 26 import org.unicode.cldr.util.With; 27 import org.unicode.cldr.util.With.SimpleIterator; 28 29 import com.ibm.icu.util.ICUUncheckedIOException; 30 31 public final class FileUtilities { 32 public static final boolean SHOW_FILES; 33 static { 34 boolean showFiles = false; 35 try { 36 showFiles = System.getProperty("SHOW_FILES") != null; 37 } catch (SecurityException ignored) { 38 } 39 SHOW_FILES = showFiles; 40 } 41 42 public static final PrintWriter CONSOLE = new PrintWriter(System.out, true); 43 44 private static PrintWriter log = CONSOLE; 45 openUTF8Reader(String dir, String filename)46 public static BufferedReader openUTF8Reader(String dir, String filename) throws IOException { 47 return openReader(dir, filename, "UTF-8"); 48 } 49 openReader(String dir, String filename, String encoding)50 public static BufferedReader openReader(String dir, String filename, String encoding) throws IOException { 51 File file = dir.length() == 0 ? new File(filename) : new File(dir, filename); 52 if (SHOW_FILES && log != null) { 53 log.println("Opening File: " 54 + getNormalizedPathString(file)); 55 } 56 return new BufferedReader( 57 new InputStreamReader( 58 new FileInputStream(file), 59 encoding), 60 4 * 1024); 61 } 62 openUTF8Writer(String dir, String filename)63 public static PrintWriter openUTF8Writer(String dir, String filename) throws IOException { 64 return openWriter(dir, filename, StandardCharsets.UTF_8); 65 } 66 openUTF8Writer(File dir, String filename)67 public static PrintWriter openUTF8Writer(File dir, String filename) throws IOException { 68 return openWriter(dir, filename, StandardCharsets.UTF_8); 69 } 70 openWriter(File dir, String filename, Charset encoding)71 public static PrintWriter openWriter(File dir, String filename, Charset encoding) throws IOException { 72 File file = new File(dir, filename); 73 if (SHOW_FILES && log != null) { 74 log.println("Creating File: " + getNormalizedPathString(file)); 75 } 76 String parentName = file.getParent(); 77 if (parentName != null) { 78 File parent = new File(parentName); 79 parent.mkdirs(); 80 } 81 return new PrintWriter( 82 new BufferedWriter( 83 new OutputStreamWriter( 84 new FileOutputStream(file), 85 encoding), 86 4 * 1024)); 87 } 88 openWriter(String dir, String filename, String encoding)89 public static PrintWriter openWriter(String dir, String filename, String encoding) throws IOException { 90 return openWriter(new File(dir), filename, Charset.forName(encoding)); 91 } 92 openWriter(String dir, String filename, Charset encoding)93 public static PrintWriter openWriter(String dir, String filename, Charset encoding) throws IOException { 94 return openWriter(new File(dir), filename, encoding); 95 } 96 97 public static abstract class SemiFileReader extends FileProcessor { 98 public final static Pattern SPLIT = PatternCache.get("\\s*;\\s*"); 99 handleLine(int lineCount, int start, int end, String[] items)100 protected abstract boolean handleLine(int lineCount, int start, int end, String[] items); 101 102 @Override handleEnd()103 protected void handleEnd() { 104 } 105 isCodePoint()106 protected boolean isCodePoint() { 107 return true; 108 } 109 splitLine(String line)110 protected String[] splitLine(String line) { 111 return SPLIT.split(line); 112 } 113 114 @Override handleLine(int lineCount, String line)115 protected boolean handleLine(int lineCount, String line) { 116 String[] parts = splitLine(line); 117 int start, end; 118 if (isCodePoint()) { 119 String source = parts[0]; 120 int range = source.indexOf(".."); 121 if (range >= 0) { 122 start = Integer.parseInt(source.substring(0, range), 16); 123 end = Integer.parseInt(source.substring(range + 2), 16); 124 } else { 125 start = end = Integer.parseInt(source, 16); 126 } 127 } else { 128 start = end = -1; 129 } 130 return handleLine(lineCount, start, end, parts); 131 } 132 } 133 134 public static class FileProcessor { 135 private int lineCount; 136 handleStart()137 protected void handleStart() { 138 } 139 140 /** 141 * Return false to abort 142 * 143 * @param lineCount 144 * @param line 145 * @return 146 */ handleLine(int lineCount, String line)147 protected boolean handleLine(int lineCount, String line) { 148 return true; 149 } 150 handleEnd()151 protected void handleEnd() { 152 } 153 getLineCount()154 public int getLineCount() { 155 return lineCount; 156 } 157 handleComment(String line, int commentCharPosition)158 public void handleComment(String line, int commentCharPosition) { 159 } 160 process(Class<?> classLocation, String fileName)161 public FileProcessor process(Class<?> classLocation, String fileName) { 162 try { 163 BufferedReader in = openFile(classLocation, fileName); 164 return process(in, fileName); 165 } catch (Exception e) { 166 throw new ICUUncheckedIOException(lineCount + ":\t" + 0, e); 167 } 168 169 } 170 process(String fileName)171 public FileProcessor process(String fileName) { 172 try { 173 FileInputStream fileStream = new FileInputStream(fileName); 174 InputStreamReader reader = new InputStreamReader(fileStream, StandardCharsets.UTF_8); 175 BufferedReader bufferedReader = new BufferedReader(reader, 1024 * 64); 176 return process(bufferedReader, fileName); 177 } catch (Exception e) { 178 throw new ICUUncheckedIOException(lineCount + ":\t" + 0, e); 179 } 180 } 181 process(String directory, String fileName)182 public FileProcessor process(String directory, String fileName) { 183 try { 184 FileInputStream fileStream = new FileInputStream(directory + File.separator + fileName); 185 InputStreamReader reader = new InputStreamReader(fileStream, StandardCharsets.UTF_8); 186 BufferedReader bufferedReader = new BufferedReader(reader, 1024 * 64); 187 return process(bufferedReader, fileName); 188 } catch (Exception e) { 189 throw new ICUUncheckedIOException(lineCount + ":\t" + 0, e); 190 } 191 } 192 process(BufferedReader in, String fileName)193 public FileProcessor process(BufferedReader in, String fileName) { 194 handleStart(); 195 String line = null; 196 lineCount = 1; 197 try { 198 for (;; ++lineCount) { 199 line = in.readLine(); 200 if (line == null) { 201 break; 202 } 203 int comment = line.indexOf("#"); 204 if (comment >= 0) { 205 handleComment(line, comment); 206 line = line.substring(0, comment); 207 } 208 if (line.startsWith("\uFEFF")) { 209 line = line.substring(1); 210 } 211 line = line.trim(); 212 if (line.length() == 0) { 213 continue; 214 } 215 if (!handleLine(lineCount, line)) { 216 break; 217 } 218 } 219 in.close(); 220 handleEnd(); 221 } catch (Exception e) { 222 throw new ICUUncheckedIOException(lineCount + ":\t" + line, e); 223 } 224 return this; 225 } 226 } 227 228 // 229 // public static SemiFileReader fillMapFromSemi(Class classLocation, String fileName, SemiFileReader handler) { 230 // return handler.process(classLocation, fileName); 231 // } openFile(Class<?> class1, String file)232 public static BufferedReader openFile(Class<?> class1, String file) { 233 return openFile(class1, file, StandardCharsets.UTF_8); 234 } 235 openFile(Class<?> class1, String file, Charset charset)236 public static BufferedReader openFile(Class<?> class1, String file, Charset charset) { 237 // URL path = null; 238 // String externalForm = null; 239 try { 240 // //System.out.println("Reading:\t" + file1.getCanonicalPath()); 241 // path = class1.getResource(file); 242 // externalForm = path.toExternalForm(); 243 // if (externalForm.startsWith("file:")) { 244 // externalForm = externalForm.substring(5); 245 // } 246 // File file1 = new File(externalForm); 247 // boolean x = file1.canRead(); 248 // final InputStream resourceAsStream = new FileInputStream(file1); 249 final InputStream resourceAsStream = class1.getResourceAsStream(file); 250 // String foo = class1.getResource(".").toString(); 251 if (charset == null) { 252 charset = StandardCharsets.UTF_8; 253 } 254 InputStreamReader reader = new InputStreamReader(resourceAsStream, charset); 255 BufferedReader bufferedReader = new BufferedReader(reader, 1024 * 64); 256 return bufferedReader; 257 } catch (Exception e) { 258 String className = class1 == null ? null : class1.getCanonicalName(); 259 String normalizedPath = null; 260 try { 261 String relativeFileName = getRelativeFileName(class1, "../util/"); 262 normalizedPath = getNormalizedPathString(relativeFileName); 263 } catch (Exception e1) { 264 throw new ICUUncheckedIOException("Couldn't open file: " + file + "; relative to class: " 265 + className, e); 266 } 267 throw new ICUUncheckedIOException("Couldn't open file " + file + "; in path " + normalizedPath + "; relative to class: " 268 + className, e); 269 } 270 } 271 openFile(String directory, String file, Charset charset)272 public static BufferedReader openFile(String directory, String file, Charset charset) { 273 try { 274 return new BufferedReader(new InputStreamReader(new FileInputStream(new File(directory, file)), charset)); 275 } catch (FileNotFoundException e) { 276 throw new ICUUncheckedIOException(e); // handle dang'd checked exception 277 } 278 } 279 openFile(File file, Charset charset)280 public static BufferedReader openFile(File file, Charset charset) { 281 try { 282 return new BufferedReader(new InputStreamReader(new FileInputStream(file), charset)); 283 } catch (FileNotFoundException e) { 284 throw new ICUUncheckedIOException(e); // handle dang'd checked exception 285 } 286 } 287 openFile(File file)288 public static BufferedReader openFile(File file) { 289 return openFile(file, StandardCharsets.UTF_8); 290 } 291 openFile(String directory, String file)292 public static BufferedReader openFile(String directory, String file) { 293 return openFile(directory, file, StandardCharsets.UTF_8); 294 } 295 splitCommaSeparated(String line)296 public static String[] splitCommaSeparated(String line) { 297 // items are separated by ',' 298 // each item is of the form abc... 299 // or "..." (required if a comma or quote is contained) 300 // " in a field is represented by "" 301 List<String> result = new ArrayList<>(); 302 StringBuilder item = new StringBuilder(); 303 boolean inQuote = false; 304 for (int i = 0; i < line.length(); ++i) { 305 char ch = line.charAt(i); // don't worry about supplementaries 306 switch (ch) { 307 case '"': 308 inQuote = !inQuote; 309 // at start or end, that's enough 310 // if get a quote when we are not in a quote, and not at start, then add it and return to inQuote 311 if (inQuote && item.length() != 0) { 312 item.append('"'); 313 inQuote = true; 314 } 315 break; 316 case ',': 317 if (!inQuote) { 318 result.add(item.toString()); 319 item.setLength(0); 320 } else { 321 item.append(ch); 322 } 323 break; 324 default: 325 item.append(ch); 326 break; 327 } 328 } 329 result.add(item.toString()); 330 return result.toArray(new String[result.size()]); 331 } 332 appendFile(Class<?> class1, String filename, PrintWriter out)333 public static void appendFile(Class<?> class1, String filename, PrintWriter out) { 334 appendFile(class1, filename, StandardCharsets.UTF_8, null, out); 335 } 336 appendFile(Class<?> class1, String filename, String[] replacementList, PrintWriter out)337 public static void appendFile(Class<?> class1, String filename, String[] replacementList, PrintWriter out) { 338 appendFile(class1, filename, StandardCharsets.UTF_8, replacementList, out); 339 } 340 appendFile(Class<?> class1, String filename, Charset charset, String[] replacementList, PrintWriter out)341 public static void appendFile(Class<?> class1, String filename, Charset charset, String[] replacementList, 342 PrintWriter out) { 343 BufferedReader br = openFile(class1, filename, charset); 344 try { 345 try { 346 appendBufferedReader(br, out, replacementList); 347 } finally { 348 br.close(); 349 } 350 } catch (IOException e) { 351 throw new ICUUncheckedIOException(e); // wrap darn'd checked exception 352 } 353 } 354 appendFile(String filename, String encoding, PrintWriter output)355 public static void appendFile(String filename, String encoding, PrintWriter output) throws IOException { 356 appendFile(filename, encoding, output, null); 357 } 358 appendFile(String filename, String encoding, PrintWriter output, String[] replacementList)359 public static void appendFile(String filename, String encoding, PrintWriter output, String[] replacementList) throws IOException { 360 BufferedReader br = openReader("", filename, encoding); 361 try { 362 appendBufferedReader(br, output, replacementList); 363 } finally { 364 br.close(); 365 } 366 } 367 appendBufferedReader(BufferedReader br, PrintWriter output, String[] replacementList)368 public static void appendBufferedReader(BufferedReader br, 369 PrintWriter output, String[] replacementList) throws IOException { 370 while (true) { 371 String line = br.readLine(); 372 if (line == null) break; 373 if (replacementList != null) { 374 for (int i = 0; i < replacementList.length; i += 2) { 375 line = replace(line, replacementList[i], replacementList[i + 1]); 376 } 377 } 378 output.println(line); 379 } 380 br.close(); 381 } 382 383 /** 384 * Replaces all occurrences of piece with replacement, and returns new String 385 */ replace(String source, String piece, String replacement)386 public static String replace(String source, String piece, String replacement) { 387 if (source == null || source.length() < piece.length()) return source; 388 int pos = 0; 389 while (true) { 390 pos = source.indexOf(piece, pos); 391 if (pos < 0) return source; 392 source = source.substring(0, pos) + replacement + source.substring(pos + piece.length()); 393 pos += replacement.length(); 394 } 395 } 396 replace(String source, String[][] replacements)397 public static String replace(String source, String[][] replacements) { 398 return replace(source, replacements, replacements.length); 399 } 400 replace(String source, String[][] replacements, int count)401 public static String replace(String source, String[][] replacements, int count) { 402 for (int i = 0; i < count; ++i) { 403 source = replace(source, replacements[i][0], replacements[i][1]); 404 } 405 return source; 406 } 407 replace(String source, String[][] replacements, boolean reverse)408 public static String replace(String source, String[][] replacements, boolean reverse) { 409 if (!reverse) return replace(source, replacements); 410 for (int i = 0; i < replacements.length; ++i) { 411 source = replace(source, replacements[i][1], replacements[i][0]); 412 } 413 return source; 414 } 415 anchorize(String source)416 public static String anchorize(String source) { 417 String result = source.toLowerCase(Locale.ENGLISH).replaceAll("[^\\p{L}\\p{N}]+", "_"); 418 if (result.endsWith("_")) result = result.substring(0, result.length() - 1); 419 if (result.startsWith("_")) result = result.substring(1); 420 return result; 421 } 422 copyFile(Class<?> class1, String sourceFile, String targetDirectory)423 public static void copyFile(Class<?> class1, String sourceFile, String targetDirectory) { 424 copyFile(class1, sourceFile, targetDirectory, sourceFile, null); 425 } 426 copyFile(Class<?> class1, String sourceFile, String targetDirectory, String newName)427 public static void copyFile(Class<?> class1, String sourceFile, String targetDirectory, String newName) { 428 copyFile(class1, sourceFile, targetDirectory, newName, null); 429 } 430 copyFile(Class<?> class1, String sourceFile, String targetDirectory, String newName, String[] replacementList)431 public static void copyFile(Class<?> class1, String sourceFile, String targetDirectory, String newName, String[] replacementList) { 432 try { 433 PrintWriter out = openUTF8Writer(targetDirectory, newName); 434 appendFile(class1, sourceFile, StandardCharsets.UTF_8, replacementList, out); 435 out.close(); 436 } catch (IOException e) { 437 throw new ICUUncheckedIOException(e); // dang'd checked exceptions 438 } 439 } 440 getRelativeFileName(Class<?> class1, String filename)441 public static String getRelativeFileName(Class<?> class1, String filename) { 442 URL resource = class1.getResource(filename); 443 String resourceString = resource.toString(); 444 if (resourceString.startsWith("file:")) { 445 return resourceString.substring(5); 446 } else if (resourceString.startsWith("jar:file:")) { 447 return resourceString.substring(9); 448 } else { 449 throw new ICUUncheckedIOException("File not found: " + resourceString); 450 } 451 } 452 453 /** 454 * Simple API to iterate over file lines. Example: 455 * for (String s : FileUtilities.in(directory,name)) { 456 * ... 457 * } 458 * 459 * @author markdavis 460 * 461 */ in(Class<?> class1, String file)462 public static Iterable<String> in(Class<?> class1, String file) { 463 return With.in(new FileLines(openFile(class1, file, StandardCharsets.UTF_8))); 464 } 465 466 /** 467 * Simple API to iterate over file lines. Example: 468 * for (String s : FileUtilities.in(directory,name)) { 469 * ... 470 * } 471 * 472 * @author markdavis 473 * 474 */ in(Class<?> class1, String file, Charset charset)475 public static Iterable<String> in(Class<?> class1, String file, Charset charset) { 476 return With.in(new FileLines(openFile(class1, file, charset))); 477 } 478 479 /** 480 * Simple API to iterate over file lines. Example: 481 * for (String s : FileUtilities.in(directory,name)) { 482 * ... 483 * } 484 * 485 * @author markdavis 486 * 487 */ in(String directory, String file)488 public static Iterable<String> in(String directory, String file) { 489 return With.in(new FileLines(openFile(directory, file, StandardCharsets.UTF_8))); 490 } 491 492 /** 493 * Simple API to iterate over file lines. Example: 494 * for (String s : FileUtilities.in(directory,name)) { 495 * ... 496 * } 497 * 498 * @author markdavis 499 * 500 */ in(BufferedReader reader)501 public static Iterable<String> in(BufferedReader reader) { 502 return With.in(new FileLines(reader)); 503 } 504 505 /** 506 * Simple API to iterate over file lines. Example: 507 * for (String s : FileUtilities.in(directory,name)) { 508 * ... 509 * } 510 * 511 * @author markdavis 512 * 513 */ in(String directory, String file, Charset charset)514 public static Iterable<String> in(String directory, String file, Charset charset) { 515 return With.in(new FileLines(openFile(directory, file, charset))); 516 } 517 518 private static class FileLines implements SimpleIterator<String> { 519 private BufferedReader input; 520 FileLines(BufferedReader input)521 public FileLines(BufferedReader input) { 522 this.input = input; 523 } 524 525 @Override next()526 public String next() { 527 try { 528 String result = input.readLine(); 529 if (result == null) { 530 input.close(); 531 } 532 return result; 533 } catch (IOException e) { 534 throw new ICUUncheckedIOException(e); // handle dang'd checked exception 535 } 536 } 537 538 } 539 cleanLine(String line)540 public static String cleanLine(String line) { 541 int comment = line.indexOf("#"); 542 if (comment >= 0) { 543 line = line.substring(0, comment); 544 } 545 if (line.startsWith("\uFEFF")) { 546 line = line.substring(1); 547 } 548 return line.trim(); 549 } 550 551 public final static Pattern SEMI_SPLIT = PatternCache.get("\\s*;\\s*"); 552 private static final boolean SHOW_SKIP = false; 553 cleanSemiFields(String line)554 public static String[] cleanSemiFields(String line) { 555 line = cleanLine(line); 556 return line.isEmpty() ? null : SEMI_SPLIT.split(line); 557 } 558 559 public interface LineHandler { 560 /** 561 * Return false if line was skipped 562 * 563 * @param line 564 * @return 565 */ handle(String line)566 boolean handle(String line) throws Exception; 567 } 568 handleFile(String filename, LineHandler handler)569 public static void handleFile(String filename, LineHandler handler) throws IOException { 570 BufferedReader in = CldrUtility.getUTF8Data(filename); 571 while (true) { 572 String line = in.readLine(); 573 if (line == null) { 574 break; 575 } 576 try { 577 if (!handler.handle(line)) { 578 if (SHOW_SKIP) System.out.println("Skipping line: " + line); 579 } 580 } catch (Exception e) { 581 throw new ICUUncheckedIOException("Problem with line: " + line, e); 582 } 583 } 584 in.close(); 585 } 586 in(File file)587 public static Iterable<String> in(File file) { 588 return With.in(new FileLines(openFile(file, StandardCharsets.UTF_8))); 589 } 590 } 591