1 package org.unicode.cldr.draft; 2 3 import static org.unicode.cldr.util.PathUtilities.getNormalizedPathString; 4 5 import java.io.BufferedReader; 6 import java.io.BufferedWriter; 7 import java.io.File; 8 import java.io.FileInputStream; 9 import java.io.FileNotFoundException; 10 import java.io.FileOutputStream; 11 import java.io.IOException; 12 import java.io.InputStream; 13 import java.io.InputStreamReader; 14 import java.io.OutputStreamWriter; 15 import java.io.PrintWriter; 16 import java.net.URL; 17 import java.nio.charset.Charset; 18 import java.util.ArrayList; 19 import java.util.List; 20 import java.util.Locale; 21 import java.util.regex.Pattern; 22 23 import org.unicode.cldr.util.CldrUtility; 24 import org.unicode.cldr.util.PathUtilities; 25 import org.unicode.cldr.util.PatternCache; 26 import org.unicode.cldr.util.With; 27 import org.unicode.cldr.util.With.SimpleIterator; 28 29 import com.ibm.icu.util.ICUUncheckedIOException; 30 31 public final class FileUtilities { 32 public static final boolean SHOW_FILES; 33 static { 34 boolean showFiles = false; 35 try { 36 showFiles = System.getProperty("SHOW_FILES") != null; 37 } catch (SecurityException ignored) { 38 } 39 SHOW_FILES = showFiles; 40 } 41 42 public static final PrintWriter CONSOLE = new PrintWriter(System.out, true); 43 44 private static PrintWriter log = CONSOLE; 45 openUTF8Reader(String dir, String filename)46 public static BufferedReader openUTF8Reader(String dir, String filename) throws IOException { 47 return openReader(dir, filename, "UTF-8"); 48 } 49 openReader(String dir, String filename, String encoding)50 public static BufferedReader openReader(String dir, String filename, String encoding) throws IOException { 51 File file = dir.length() == 0 ? new File(filename) : new File(dir, filename); 52 if (SHOW_FILES && log != null) { 53 log.println("Opening File: " 54 + getNormalizedPathString(file)); 55 } 56 return new BufferedReader( 57 new InputStreamReader( 58 new FileInputStream(file), 59 encoding), 60 4 * 1024); 61 } 62 openUTF8Writer(String dir, String filename)63 public static PrintWriter openUTF8Writer(String dir, String filename) throws IOException { 64 return openWriter(dir, filename, "UTF-8"); 65 } 66 openWriter(String dir, String filename, String encoding)67 public static PrintWriter openWriter(String dir, String filename, String encoding) throws IOException { 68 File file = new File(dir, filename); 69 if (SHOW_FILES && log != null) { 70 log.println("Creating File: " + getNormalizedPathString(file)); 71 } 72 String parentName = file.getParent(); 73 if (parentName != null) { 74 File parent = new File(parentName); 75 parent.mkdirs(); 76 } 77 return new PrintWriter( 78 new BufferedWriter( 79 new OutputStreamWriter( 80 new FileOutputStream(file), 81 encoding), 82 4 * 1024)); 83 } 84 85 public static abstract class SemiFileReader extends FileProcessor { 86 public final static Pattern SPLIT = PatternCache.get("\\s*;\\s*"); 87 handleLine(int lineCount, int start, int end, String[] items)88 protected abstract boolean handleLine(int lineCount, int start, int end, String[] items); 89 90 @Override handleEnd()91 protected void handleEnd() { 92 } 93 isCodePoint()94 protected boolean isCodePoint() { 95 return true; 96 } 97 splitLine(String line)98 protected String[] splitLine(String line) { 99 return SPLIT.split(line); 100 } 101 102 @Override handleLine(int lineCount, String line)103 protected boolean handleLine(int lineCount, String line) { 104 String[] parts = splitLine(line); 105 int start, end; 106 if (isCodePoint()) { 107 String source = parts[0]; 108 int range = source.indexOf(".."); 109 if (range >= 0) { 110 start = Integer.parseInt(source.substring(0, range), 16); 111 end = Integer.parseInt(source.substring(range + 2), 16); 112 } else { 113 start = end = Integer.parseInt(source, 16); 114 } 115 } else { 116 start = end = -1; 117 } 118 return handleLine(lineCount, start, end, parts); 119 } 120 } 121 122 public static class FileProcessor { 123 private int lineCount; 124 handleStart()125 protected void handleStart() { 126 } 127 128 /** 129 * Return false to abort 130 * 131 * @param lineCount 132 * @param line 133 * @return 134 */ handleLine(int lineCount, String line)135 protected boolean handleLine(int lineCount, String line) { 136 return true; 137 } 138 handleEnd()139 protected void handleEnd() { 140 } 141 getLineCount()142 public int getLineCount() { 143 return lineCount; 144 } 145 handleComment(String line, int commentCharPosition)146 public void handleComment(String line, int commentCharPosition) { 147 } 148 process(Class<?> classLocation, String fileName)149 public FileProcessor process(Class<?> classLocation, String fileName) { 150 try { 151 BufferedReader in = openFile(classLocation, fileName); 152 return process(in, fileName); 153 } catch (Exception e) { 154 throw new ICUUncheckedIOException(lineCount + ":\t" + 0, e); 155 } 156 157 } 158 process(String fileName)159 public FileProcessor process(String fileName) { 160 try { 161 FileInputStream fileStream = new FileInputStream(fileName); 162 InputStreamReader reader = new InputStreamReader(fileStream, UTF8); 163 BufferedReader bufferedReader = new BufferedReader(reader, 1024 * 64); 164 return process(bufferedReader, fileName); 165 } catch (Exception e) { 166 throw new ICUUncheckedIOException(lineCount + ":\t" + 0, e); 167 } 168 } 169 process(String directory, String fileName)170 public FileProcessor process(String directory, String fileName) { 171 try { 172 FileInputStream fileStream = new FileInputStream(directory + File.separator + fileName); 173 InputStreamReader reader = new InputStreamReader(fileStream, UTF8); 174 BufferedReader bufferedReader = new BufferedReader(reader, 1024 * 64); 175 return process(bufferedReader, fileName); 176 } catch (Exception e) { 177 throw new ICUUncheckedIOException(lineCount + ":\t" + 0, e); 178 } 179 } 180 process(BufferedReader in, String fileName)181 public FileProcessor process(BufferedReader in, String fileName) { 182 handleStart(); 183 String line = null; 184 lineCount = 1; 185 try { 186 for (;; ++lineCount) { 187 line = in.readLine(); 188 if (line == null) { 189 break; 190 } 191 int comment = line.indexOf("#"); 192 if (comment >= 0) { 193 handleComment(line, comment); 194 line = line.substring(0, comment); 195 } 196 if (line.startsWith("\uFEFF")) { 197 line = line.substring(1); 198 } 199 line = line.trim(); 200 if (line.length() == 0) { 201 continue; 202 } 203 if (!handleLine(lineCount, line)) { 204 break; 205 } 206 } 207 in.close(); 208 handleEnd(); 209 } catch (Exception e) { 210 throw new ICUUncheckedIOException(lineCount + ":\t" + line, e); 211 } 212 return this; 213 } 214 } 215 216 // 217 // public static SemiFileReader fillMapFromSemi(Class classLocation, String fileName, SemiFileReader handler) { 218 // return handler.process(classLocation, fileName); 219 // } openFile(Class<?> class1, String file)220 public static BufferedReader openFile(Class<?> class1, String file) { 221 return openFile(class1, file, UTF8); 222 } 223 openFile(Class<?> class1, String file, Charset charset)224 public static BufferedReader openFile(Class<?> class1, String file, Charset charset) { 225 // URL path = null; 226 // String externalForm = null; 227 try { 228 // //System.out.println("Reading:\t" + file1.getCanonicalPath()); 229 // path = class1.getResource(file); 230 // externalForm = path.toExternalForm(); 231 // if (externalForm.startsWith("file:")) { 232 // externalForm = externalForm.substring(5); 233 // } 234 // File file1 = new File(externalForm); 235 // boolean x = file1.canRead(); 236 // final InputStream resourceAsStream = new FileInputStream(file1); 237 final InputStream resourceAsStream = class1.getResourceAsStream(file); 238 // String foo = class1.getResource(".").toString(); 239 if (charset == null) { 240 charset = UTF8; 241 } 242 InputStreamReader reader = new InputStreamReader(resourceAsStream, charset); 243 BufferedReader bufferedReader = new BufferedReader(reader, 1024 * 64); 244 return bufferedReader; 245 } catch (Exception e) { 246 String className = class1 == null ? null : class1.getCanonicalName(); 247 String normalizedPath = null; 248 try { 249 String relativeFileName = getRelativeFileName(class1, "../util/"); 250 normalizedPath = getNormalizedPathString(relativeFileName); 251 } catch (Exception e1) { 252 throw new ICUUncheckedIOException("Couldn't open file: " + file + "; relative to class: " 253 + className, e); 254 } 255 throw new ICUUncheckedIOException("Couldn't open file " + file + "; in path " + normalizedPath + "; relative to class: " 256 + className, e); 257 } 258 } 259 openFile(String directory, String file, Charset charset)260 public static BufferedReader openFile(String directory, String file, Charset charset) { 261 try { 262 return new BufferedReader(new InputStreamReader(new FileInputStream(new File(directory, file)), charset)); 263 } catch (FileNotFoundException e) { 264 throw new ICUUncheckedIOException(e); // handle dang'd checked exception 265 } 266 } 267 openFile(File file, Charset charset)268 public static BufferedReader openFile(File file, Charset charset) { 269 try { 270 return new BufferedReader(new InputStreamReader(new FileInputStream(file), charset)); 271 } catch (FileNotFoundException e) { 272 throw new ICUUncheckedIOException(e); // handle dang'd checked exception 273 } 274 } 275 openFile(File file)276 public static BufferedReader openFile(File file) { 277 return openFile(file, UTF8); 278 } 279 openFile(String directory, String file)280 public static BufferedReader openFile(String directory, String file) { 281 return openFile(directory, file, UTF8); 282 } 283 284 public static final Charset UTF8 = Charset.forName("utf-8"); 285 splitCommaSeparated(String line)286 public static String[] splitCommaSeparated(String line) { 287 // items are separated by ',' 288 // each item is of the form abc... 289 // or "..." (required if a comma or quote is contained) 290 // " in a field is represented by "" 291 List<String> result = new ArrayList<>(); 292 StringBuilder item = new StringBuilder(); 293 boolean inQuote = false; 294 for (int i = 0; i < line.length(); ++i) { 295 char ch = line.charAt(i); // don't worry about supplementaries 296 switch (ch) { 297 case '"': 298 inQuote = !inQuote; 299 // at start or end, that's enough 300 // if get a quote when we are not in a quote, and not at start, then add it and return to inQuote 301 if (inQuote && item.length() != 0) { 302 item.append('"'); 303 inQuote = true; 304 } 305 break; 306 case ',': 307 if (!inQuote) { 308 result.add(item.toString()); 309 item.setLength(0); 310 } else { 311 item.append(ch); 312 } 313 break; 314 default: 315 item.append(ch); 316 break; 317 } 318 } 319 result.add(item.toString()); 320 return result.toArray(new String[result.size()]); 321 } 322 appendFile(Class<?> class1, String filename, PrintWriter out)323 public static void appendFile(Class<?> class1, String filename, PrintWriter out) { 324 appendFile(class1, filename, UTF8, null, out); 325 } 326 appendFile(Class<?> class1, String filename, String[] replacementList, PrintWriter out)327 public static void appendFile(Class<?> class1, String filename, String[] replacementList, PrintWriter out) { 328 appendFile(class1, filename, UTF8, replacementList, out); 329 } 330 appendFile(Class<?> class1, String filename, Charset charset, String[] replacementList, PrintWriter out)331 public static void appendFile(Class<?> class1, String filename, Charset charset, String[] replacementList, 332 PrintWriter out) { 333 BufferedReader br = openFile(class1, filename, charset); 334 try { 335 try { 336 appendBufferedReader(br, out, replacementList); 337 } finally { 338 br.close(); 339 } 340 } catch (IOException e) { 341 throw new ICUUncheckedIOException(e); // wrap darn'd checked exception 342 } 343 } 344 appendFile(String filename, String encoding, PrintWriter output)345 public static void appendFile(String filename, String encoding, PrintWriter output) throws IOException { 346 appendFile(filename, encoding, output, null); 347 } 348 appendFile(String filename, String encoding, PrintWriter output, String[] replacementList)349 public static void appendFile(String filename, String encoding, PrintWriter output, String[] replacementList) throws IOException { 350 BufferedReader br = openReader("", filename, encoding); 351 try { 352 appendBufferedReader(br, output, replacementList); 353 } finally { 354 br.close(); 355 } 356 } 357 appendBufferedReader(BufferedReader br, PrintWriter output, String[] replacementList)358 public static void appendBufferedReader(BufferedReader br, 359 PrintWriter output, String[] replacementList) throws IOException { 360 while (true) { 361 String line = br.readLine(); 362 if (line == null) break; 363 if (replacementList != null) { 364 for (int i = 0; i < replacementList.length; i += 2) { 365 line = replace(line, replacementList[i], replacementList[i + 1]); 366 } 367 } 368 output.println(line); 369 } 370 br.close(); 371 } 372 373 /** 374 * Replaces all occurrences of piece with replacement, and returns new String 375 */ replace(String source, String piece, String replacement)376 public static String replace(String source, String piece, String replacement) { 377 if (source == null || source.length() < piece.length()) return source; 378 int pos = 0; 379 while (true) { 380 pos = source.indexOf(piece, pos); 381 if (pos < 0) return source; 382 source = source.substring(0, pos) + replacement + source.substring(pos + piece.length()); 383 pos += replacement.length(); 384 } 385 } 386 replace(String source, String[][] replacements)387 public static String replace(String source, String[][] replacements) { 388 return replace(source, replacements, replacements.length); 389 } 390 replace(String source, String[][] replacements, int count)391 public static String replace(String source, String[][] replacements, int count) { 392 for (int i = 0; i < count; ++i) { 393 source = replace(source, replacements[i][0], replacements[i][1]); 394 } 395 return source; 396 } 397 replace(String source, String[][] replacements, boolean reverse)398 public static String replace(String source, String[][] replacements, boolean reverse) { 399 if (!reverse) return replace(source, replacements); 400 for (int i = 0; i < replacements.length; ++i) { 401 source = replace(source, replacements[i][1], replacements[i][0]); 402 } 403 return source; 404 } 405 anchorize(String source)406 public static String anchorize(String source) { 407 String result = source.toLowerCase(Locale.ENGLISH).replaceAll("[^\\p{L}\\p{N}]+", "_"); 408 if (result.endsWith("_")) result = result.substring(0, result.length() - 1); 409 if (result.startsWith("_")) result = result.substring(1); 410 return result; 411 } 412 copyFile(Class<?> class1, String sourceFile, String targetDirectory)413 public static void copyFile(Class<?> class1, String sourceFile, String targetDirectory) { 414 copyFile(class1, sourceFile, targetDirectory, sourceFile, null); 415 } 416 copyFile(Class<?> class1, String sourceFile, String targetDirectory, String newName)417 public static void copyFile(Class<?> class1, String sourceFile, String targetDirectory, String newName) { 418 copyFile(class1, sourceFile, targetDirectory, newName, null); 419 } 420 copyFile(Class<?> class1, String sourceFile, String targetDirectory, String newName, String[] replacementList)421 public static void copyFile(Class<?> class1, String sourceFile, String targetDirectory, String newName, String[] replacementList) { 422 try { 423 PrintWriter out = openUTF8Writer(targetDirectory, newName); 424 appendFile(class1, sourceFile, UTF8, replacementList, out); 425 out.close(); 426 } catch (IOException e) { 427 throw new ICUUncheckedIOException(e); // dang'd checked exceptions 428 } 429 } 430 getRelativeFileName(Class<?> class1, String filename)431 public static String getRelativeFileName(Class<?> class1, String filename) { 432 URL resource = class1.getResource(filename); 433 String resourceString = resource.toString(); 434 if (resourceString.startsWith("file:")) { 435 return resourceString.substring(5); 436 } else if (resourceString.startsWith("jar:file:")) { 437 return resourceString.substring(9); 438 } else { 439 throw new ICUUncheckedIOException("File not found: " + resourceString); 440 } 441 } 442 443 /** 444 * Simple API to iterate over file lines. Example: 445 * for (String s : FileUtilities.in(directory,name)) { 446 * ... 447 * } 448 * 449 * @author markdavis 450 * 451 */ in(Class<?> class1, String file)452 public static Iterable<String> in(Class<?> class1, String file) { 453 return With.in(new FileLines(openFile(class1, file, UTF8))); 454 } 455 456 /** 457 * Simple API to iterate over file lines. Example: 458 * for (String s : FileUtilities.in(directory,name)) { 459 * ... 460 * } 461 * 462 * @author markdavis 463 * 464 */ in(Class<?> class1, String file, Charset charset)465 public static Iterable<String> in(Class<?> class1, String file, Charset charset) { 466 return With.in(new FileLines(openFile(class1, file, charset))); 467 } 468 469 /** 470 * Simple API to iterate over file lines. Example: 471 * for (String s : FileUtilities.in(directory,name)) { 472 * ... 473 * } 474 * 475 * @author markdavis 476 * 477 */ in(String directory, String file)478 public static Iterable<String> in(String directory, String file) { 479 return With.in(new FileLines(openFile(directory, file, UTF8))); 480 } 481 482 /** 483 * Simple API to iterate over file lines. Example: 484 * for (String s : FileUtilities.in(directory,name)) { 485 * ... 486 * } 487 * 488 * @author markdavis 489 * 490 */ in(BufferedReader reader)491 public static Iterable<String> in(BufferedReader reader) { 492 return With.in(new FileLines(reader)); 493 } 494 495 /** 496 * Simple API to iterate over file lines. Example: 497 * for (String s : FileUtilities.in(directory,name)) { 498 * ... 499 * } 500 * 501 * @author markdavis 502 * 503 */ in(String directory, String file, Charset charset)504 public static Iterable<String> in(String directory, String file, Charset charset) { 505 return With.in(new FileLines(openFile(directory, file, charset))); 506 } 507 508 private static class FileLines implements SimpleIterator<String> { 509 private BufferedReader input; 510 FileLines(BufferedReader input)511 public FileLines(BufferedReader input) { 512 this.input = input; 513 } 514 515 @Override next()516 public String next() { 517 try { 518 String result = input.readLine(); 519 if (result == null) { 520 input.close(); 521 } 522 return result; 523 } catch (IOException e) { 524 throw new ICUUncheckedIOException(e); // handle dang'd checked exception 525 } 526 } 527 528 } 529 cleanLine(String line)530 public static String cleanLine(String line) { 531 int comment = line.indexOf("#"); 532 if (comment >= 0) { 533 line = line.substring(0, comment); 534 } 535 if (line.startsWith("\uFEFF")) { 536 line = line.substring(1); 537 } 538 return line.trim(); 539 } 540 541 public final static Pattern SEMI_SPLIT = PatternCache.get("\\s*;\\s*"); 542 private static final boolean SHOW_SKIP = false; 543 cleanSemiFields(String line)544 public static String[] cleanSemiFields(String line) { 545 line = cleanLine(line); 546 return line.isEmpty() ? null : SEMI_SPLIT.split(line); 547 } 548 549 public interface LineHandler { 550 /** 551 * Return false if line was skipped 552 * 553 * @param line 554 * @return 555 */ handle(String line)556 boolean handle(String line) throws Exception; 557 } 558 handleFile(String filename, LineHandler handler)559 public static void handleFile(String filename, LineHandler handler) throws IOException { 560 BufferedReader in = CldrUtility.getUTF8Data(filename); 561 while (true) { 562 String line = in.readLine(); 563 if (line == null) { 564 break; 565 } 566 try { 567 if (!handler.handle(line)) { 568 if (SHOW_SKIP) System.out.println("Skipping line: " + line); 569 } 570 } catch (Exception e) { 571 throw new ICUUncheckedIOException("Problem with line: " + line, e); 572 } 573 } 574 in.close(); 575 } 576 in(File file)577 public static Iterable<String> in(File file) { 578 return With.in(new FileLines(openFile(file, UTF8))); 579 } 580 } 581