1 package org.unicode.cldr.draft; 2 3 import java.io.BufferedReader; 4 import java.io.BufferedWriter; 5 import java.io.File; 6 import java.io.FileInputStream; 7 import java.io.FileNotFoundException; 8 import java.io.FileOutputStream; 9 import java.io.IOException; 10 import java.io.InputStream; 11 import java.io.InputStreamReader; 12 import java.io.OutputStreamWriter; 13 import java.io.PrintWriter; 14 import java.net.URL; 15 import java.nio.charset.Charset; 16 import java.util.ArrayList; 17 import java.util.List; 18 import java.util.Locale; 19 import java.util.regex.Pattern; 20 21 import org.unicode.cldr.util.CldrUtility; 22 import org.unicode.cldr.util.PatternCache; 23 import org.unicode.cldr.util.With; 24 import org.unicode.cldr.util.With.SimpleIterator; 25 26 import com.ibm.icu.util.ICUUncheckedIOException; 27 28 public final class FileUtilities { 29 public static final boolean SHOW_FILES; 30 static { 31 boolean showFiles = false; 32 try { 33 showFiles = System.getProperty("SHOW_FILES") != null; 34 } catch (SecurityException ignored) { 35 } 36 SHOW_FILES = showFiles; 37 } 38 39 public static final PrintWriter CONSOLE = new PrintWriter(System.out, true); 40 41 private static PrintWriter log = CONSOLE; 42 openUTF8Reader(String dir, String filename)43 public static BufferedReader openUTF8Reader(String dir, String filename) throws IOException { 44 return openReader(dir, filename, "UTF-8"); 45 } 46 openReader(String dir, String filename, String encoding)47 public static BufferedReader openReader(String dir, String filename, String encoding) throws IOException { 48 File file = dir.length() == 0 ? new File(filename) : new File(dir, filename); 49 if (SHOW_FILES && log != null) { 50 log.println("Opening File: " 51 + file.getCanonicalPath()); 52 } 53 return new BufferedReader( 54 new InputStreamReader( 55 new FileInputStream(file), 56 encoding), 57 4 * 1024); 58 } 59 openUTF8Writer(String dir, String filename)60 public static PrintWriter openUTF8Writer(String dir, String filename) throws IOException { 61 return openWriter(dir, filename, "UTF-8"); 62 } 63 openWriter(String dir, String filename, String encoding)64 public static PrintWriter openWriter(String dir, String filename, String encoding) throws IOException { 65 File file = new File(dir, filename); 66 if (SHOW_FILES && log != null) { 67 log.println("Creating File: " 68 + file.getCanonicalPath()); 69 } 70 String parentName = file.getParent(); 71 if (parentName != null) { 72 File parent = new File(parentName); 73 parent.mkdirs(); 74 } 75 return new PrintWriter( 76 new BufferedWriter( 77 new OutputStreamWriter( 78 new FileOutputStream(file), 79 encoding), 80 4 * 1024)); 81 } 82 83 public static abstract class SemiFileReader extends FileProcessor { 84 public final static Pattern SPLIT = PatternCache.get("\\s*;\\s*"); 85 handleLine(int lineCount, int start, int end, String[] items)86 protected abstract boolean handleLine(int lineCount, int start, int end, String[] items); 87 handleEnd()88 protected void handleEnd() { 89 } 90 isCodePoint()91 protected boolean isCodePoint() { 92 return true; 93 } 94 splitLine(String line)95 protected String[] splitLine(String line) { 96 return SPLIT.split(line); 97 } 98 99 @Override handleLine(int lineCount, String line)100 protected boolean handleLine(int lineCount, String line) { 101 String[] parts = splitLine(line); 102 int start, end; 103 if (isCodePoint()) { 104 String source = parts[0]; 105 int range = source.indexOf(".."); 106 if (range >= 0) { 107 start = Integer.parseInt(source.substring(0, range), 16); 108 end = Integer.parseInt(source.substring(range + 2), 16); 109 } else { 110 start = end = Integer.parseInt(source, 16); 111 } 112 } else { 113 start = end = -1; 114 } 115 return handleLine(lineCount, start, end, parts); 116 } 117 } 118 119 public static class FileProcessor { 120 private int lineCount; 121 handleStart()122 protected void handleStart() { 123 } 124 125 /** 126 * Return false to abort 127 * 128 * @param lineCount 129 * @param line 130 * @return 131 */ handleLine(int lineCount, String line)132 protected boolean handleLine(int lineCount, String line) { 133 return true; 134 } 135 handleEnd()136 protected void handleEnd() { 137 } 138 getLineCount()139 public int getLineCount() { 140 return lineCount; 141 } 142 handleComment(String line, int commentCharPosition)143 public void handleComment(String line, int commentCharPosition) { 144 } 145 process(Class<?> classLocation, String fileName)146 public FileProcessor process(Class<?> classLocation, String fileName) { 147 try { 148 BufferedReader in = openFile(classLocation, fileName); 149 return process(in, fileName); 150 } catch (Exception e) { 151 throw new ICUUncheckedIOException(lineCount + ":\t" + 0, e); 152 } 153 154 } 155 process(String fileName)156 public FileProcessor process(String fileName) { 157 try { 158 FileInputStream fileStream = new FileInputStream(fileName); 159 InputStreamReader reader = new InputStreamReader(fileStream, UTF8); 160 BufferedReader bufferedReader = new BufferedReader(reader, 1024 * 64); 161 return process(bufferedReader, fileName); 162 } catch (Exception e) { 163 throw new ICUUncheckedIOException(lineCount + ":\t" + 0, e); 164 } 165 } 166 process(String directory, String fileName)167 public FileProcessor process(String directory, String fileName) { 168 try { 169 FileInputStream fileStream = new FileInputStream(directory + File.separator + fileName); 170 InputStreamReader reader = new InputStreamReader(fileStream, UTF8); 171 BufferedReader bufferedReader = new BufferedReader(reader, 1024 * 64); 172 return process(bufferedReader, fileName); 173 } catch (Exception e) { 174 throw new ICUUncheckedIOException(lineCount + ":\t" + 0, e); 175 } 176 } 177 process(BufferedReader in, String fileName)178 public FileProcessor process(BufferedReader in, String fileName) { 179 handleStart(); 180 String line = null; 181 lineCount = 1; 182 try { 183 for (;; ++lineCount) { 184 line = in.readLine(); 185 if (line == null) { 186 break; 187 } 188 int comment = line.indexOf("#"); 189 if (comment >= 0) { 190 handleComment(line, comment); 191 line = line.substring(0, comment); 192 } 193 if (line.startsWith("\uFEFF")) { 194 line = line.substring(1); 195 } 196 line = line.trim(); 197 if (line.length() == 0) { 198 continue; 199 } 200 if (!handleLine(lineCount, line)) { 201 break; 202 } 203 } 204 in.close(); 205 handleEnd(); 206 } catch (Exception e) { 207 throw (RuntimeException) new ICUUncheckedIOException(lineCount + ":\t" + line, e); 208 } 209 return this; 210 } 211 } 212 213 // 214 // public static SemiFileReader fillMapFromSemi(Class classLocation, String fileName, SemiFileReader handler) { 215 // return handler.process(classLocation, fileName); 216 // } openFile(Class<?> class1, String file)217 public static BufferedReader openFile(Class<?> class1, String file) { 218 return openFile(class1, file, UTF8); 219 } 220 openFile(Class<?> class1, String file, Charset charset)221 public static BufferedReader openFile(Class<?> class1, String file, Charset charset) { 222 // URL path = null; 223 // String externalForm = null; 224 try { 225 // //System.out.println("Reading:\t" + file1.getCanonicalPath()); 226 // path = class1.getResource(file); 227 // externalForm = path.toExternalForm(); 228 // if (externalForm.startsWith("file:")) { 229 // externalForm = externalForm.substring(5); 230 // } 231 // File file1 = new File(externalForm); 232 // boolean x = file1.canRead(); 233 // final InputStream resourceAsStream = new FileInputStream(file1); 234 final InputStream resourceAsStream = class1.getResourceAsStream(file); 235 // String foo = class1.getResource(".").toString(); 236 if (charset == null) { 237 charset = UTF8; 238 } 239 InputStreamReader reader = new InputStreamReader(resourceAsStream, charset); 240 BufferedReader bufferedReader = new BufferedReader(reader, 1024 * 64); 241 return bufferedReader; 242 } catch (Exception e) { 243 String className = class1 == null ? null : class1.getCanonicalName(); 244 String canonicalName = null; 245 try { 246 String relativeFileName = getRelativeFileName(class1, "../util/"); 247 canonicalName = new File(relativeFileName).getCanonicalPath(); 248 } catch (Exception e1) { 249 throw new ICUUncheckedIOException("Couldn't open file: " + file + "; relative to class: " 250 + className, e); 251 } 252 throw new ICUUncheckedIOException("Couldn't open file " + file + "; in path " + canonicalName + "; relative to class: " 253 + className, e); 254 } 255 } 256 openFile(String directory, String file, Charset charset)257 public static BufferedReader openFile(String directory, String file, Charset charset) { 258 try { 259 return new BufferedReader(new InputStreamReader(new FileInputStream(new File(directory, file)), charset)); 260 } catch (FileNotFoundException e) { 261 throw new ICUUncheckedIOException(e); // handle dang'd checked exception 262 } 263 } 264 openFile(File file, Charset charset)265 public static BufferedReader openFile(File file, Charset charset) { 266 try { 267 return new BufferedReader(new InputStreamReader(new FileInputStream(file), charset)); 268 } catch (FileNotFoundException e) { 269 throw new ICUUncheckedIOException(e); // handle dang'd checked exception 270 } 271 } 272 openFile(File file)273 public static BufferedReader openFile(File file) { 274 return openFile(file, UTF8); 275 } 276 openFile(String directory, String file)277 public static BufferedReader openFile(String directory, String file) { 278 return openFile(directory, file, UTF8); 279 } 280 281 public static final Charset UTF8 = Charset.forName("utf-8"); 282 splitCommaSeparated(String line)283 public static String[] splitCommaSeparated(String line) { 284 // items are separated by ',' 285 // each item is of the form abc... 286 // or "..." (required if a comma or quote is contained) 287 // " in a field is represented by "" 288 List<String> result = new ArrayList<String>(); 289 StringBuilder item = new StringBuilder(); 290 boolean inQuote = false; 291 for (int i = 0; i < line.length(); ++i) { 292 char ch = line.charAt(i); // don't worry about supplementaries 293 switch (ch) { 294 case '"': 295 inQuote = !inQuote; 296 // at start or end, that's enough 297 // if get a quote when we are not in a quote, and not at start, then add it and return to inQuote 298 if (inQuote && item.length() != 0) { 299 item.append('"'); 300 inQuote = true; 301 } 302 break; 303 case ',': 304 if (!inQuote) { 305 result.add(item.toString()); 306 item.setLength(0); 307 } else { 308 item.append(ch); 309 } 310 break; 311 default: 312 item.append(ch); 313 break; 314 } 315 } 316 result.add(item.toString()); 317 return result.toArray(new String[result.size()]); 318 } 319 appendFile(Class<?> class1, String filename, PrintWriter out)320 public static void appendFile(Class<?> class1, String filename, PrintWriter out) { 321 appendFile(class1, filename, UTF8, null, out); 322 } 323 appendFile(Class<?> class1, String filename, Charset charset, String[] replacementList, PrintWriter out)324 public static void appendFile(Class<?> class1, String filename, Charset charset, String[] replacementList, 325 PrintWriter out) { 326 BufferedReader br = openFile(class1, filename, charset); 327 try { 328 try { 329 appendBufferedReader(br, out, replacementList); 330 } finally { 331 br.close(); 332 } 333 } catch (IOException e) { 334 throw new ICUUncheckedIOException(e); // wrap darn'd checked exception 335 } 336 } 337 appendFile(String filename, String encoding, PrintWriter output)338 public static void appendFile(String filename, String encoding, PrintWriter output) throws IOException { 339 appendFile(filename, encoding, output, null); 340 } 341 appendFile(String filename, String encoding, PrintWriter output, String[] replacementList)342 public static void appendFile(String filename, String encoding, PrintWriter output, String[] replacementList) throws IOException { 343 BufferedReader br = openReader("", filename, encoding); 344 try { 345 appendBufferedReader(br, output, replacementList); 346 } finally { 347 br.close(); 348 } 349 } 350 appendBufferedReader(BufferedReader br, PrintWriter output, String[] replacementList)351 public static void appendBufferedReader(BufferedReader br, 352 PrintWriter output, String[] replacementList) throws IOException { 353 while (true) { 354 String line = br.readLine(); 355 if (line == null) break; 356 if (replacementList != null) { 357 for (int i = 0; i < replacementList.length; i += 2) { 358 line = replace(line, replacementList[i], replacementList[i + 1]); 359 } 360 } 361 output.println(line); 362 } 363 br.close(); 364 } 365 366 /** 367 * Replaces all occurrences of piece with replacement, and returns new String 368 */ replace(String source, String piece, String replacement)369 public static String replace(String source, String piece, String replacement) { 370 if (source == null || source.length() < piece.length()) return source; 371 int pos = 0; 372 while (true) { 373 pos = source.indexOf(piece, pos); 374 if (pos < 0) return source; 375 source = source.substring(0, pos) + replacement + source.substring(pos + piece.length()); 376 pos += replacement.length(); 377 } 378 } 379 replace(String source, String[][] replacements)380 public static String replace(String source, String[][] replacements) { 381 return replace(source, replacements, replacements.length); 382 } 383 replace(String source, String[][] replacements, int count)384 public static String replace(String source, String[][] replacements, int count) { 385 for (int i = 0; i < count; ++i) { 386 source = replace(source, replacements[i][0], replacements[i][1]); 387 } 388 return source; 389 } 390 replace(String source, String[][] replacements, boolean reverse)391 public static String replace(String source, String[][] replacements, boolean reverse) { 392 if (!reverse) return replace(source, replacements); 393 for (int i = 0; i < replacements.length; ++i) { 394 source = replace(source, replacements[i][1], replacements[i][0]); 395 } 396 return source; 397 } 398 anchorize(String source)399 public static String anchorize(String source) { 400 String result = source.toLowerCase(Locale.ENGLISH).replaceAll("[^\\p{L}\\p{N}]+", "_"); 401 if (result.endsWith("_")) result = result.substring(0, result.length() - 1); 402 if (result.startsWith("_")) result = result.substring(1); 403 return result; 404 } 405 copyFile(Class<?> class1, String sourceFile, String targetDirectory)406 public static void copyFile(Class<?> class1, String sourceFile, String targetDirectory) { 407 copyFile(class1, sourceFile, targetDirectory, sourceFile, null); 408 } 409 copyFile(Class<?> class1, String sourceFile, String targetDirectory, String newName)410 public static void copyFile(Class<?> class1, String sourceFile, String targetDirectory, String newName) { 411 copyFile(class1, sourceFile, targetDirectory, newName, null); 412 } 413 copyFile(Class<?> class1, String sourceFile, String targetDirectory, String newName, String[] replacementList)414 public static void copyFile(Class<?> class1, String sourceFile, String targetDirectory, String newName, String[] replacementList) { 415 try { 416 PrintWriter out = openUTF8Writer(targetDirectory, newName); 417 appendFile(class1, sourceFile, UTF8, replacementList, out); 418 out.close(); 419 } catch (IOException e) { 420 throw new ICUUncheckedIOException(e); // dang'd checked exceptions 421 } 422 } 423 getRelativeFileName(Class<?> class1, String filename)424 public static String getRelativeFileName(Class<?> class1, String filename) { 425 URL resource = class1.getResource(filename); 426 String resourceString = resource.toString(); 427 if (resourceString.startsWith("file:")) { 428 return resourceString.substring(5); 429 } else if (resourceString.startsWith("jar:file:")) { 430 return resourceString.substring(9); 431 } else { 432 throw new ICUUncheckedIOException("File not found: " + resourceString); 433 } 434 } 435 436 /** 437 * Simple API to iterate over file lines. Example: 438 * for (String s : FileUtilities.in(directory,name)) { 439 * ... 440 * } 441 * 442 * @author markdavis 443 * 444 */ in(Class<?> class1, String file)445 public static Iterable<String> in(Class<?> class1, String file) { 446 return With.in(new FileLines(openFile(class1, file, UTF8))); 447 } 448 449 /** 450 * Simple API to iterate over file lines. Example: 451 * for (String s : FileUtilities.in(directory,name)) { 452 * ... 453 * } 454 * 455 * @author markdavis 456 * 457 */ in(Class<?> class1, String file, Charset charset)458 public static Iterable<String> in(Class<?> class1, String file, Charset charset) { 459 return With.in(new FileLines(openFile(class1, file, charset))); 460 } 461 462 /** 463 * Simple API to iterate over file lines. Example: 464 * for (String s : FileUtilities.in(directory,name)) { 465 * ... 466 * } 467 * 468 * @author markdavis 469 * 470 */ in(String directory, String file)471 public static Iterable<String> in(String directory, String file) { 472 return With.in(new FileLines(openFile(directory, file, UTF8))); 473 } 474 475 /** 476 * Simple API to iterate over file lines. Example: 477 * for (String s : FileUtilities.in(directory,name)) { 478 * ... 479 * } 480 * 481 * @author markdavis 482 * 483 */ in(BufferedReader reader)484 public static Iterable<String> in(BufferedReader reader) { 485 return With.in(new FileLines(reader)); 486 } 487 488 /** 489 * Simple API to iterate over file lines. Example: 490 * for (String s : FileUtilities.in(directory,name)) { 491 * ... 492 * } 493 * 494 * @author markdavis 495 * 496 */ in(String directory, String file, Charset charset)497 public static Iterable<String> in(String directory, String file, Charset charset) { 498 return With.in(new FileLines(openFile(directory, file, charset))); 499 } 500 501 private static class FileLines implements SimpleIterator<String> { 502 private BufferedReader input; 503 FileLines(BufferedReader input)504 public FileLines(BufferedReader input) { 505 this.input = input; 506 } 507 508 @Override next()509 public String next() { 510 try { 511 String result = input.readLine(); 512 if (result == null) { 513 input.close(); 514 } 515 return result; 516 } catch (IOException e) { 517 throw new ICUUncheckedIOException(e); // handle dang'd checked exception 518 } 519 } 520 521 } 522 cleanLine(String line)523 public static String cleanLine(String line) { 524 int comment = line.indexOf("#"); 525 if (comment >= 0) { 526 line = line.substring(0, comment); 527 } 528 if (line.startsWith("\uFEFF")) { 529 line = line.substring(1); 530 } 531 return line.trim(); 532 } 533 534 public final static Pattern SEMI_SPLIT = PatternCache.get("\\s*;\\s*"); 535 private static final boolean SHOW_SKIP = false; 536 cleanSemiFields(String line)537 public static String[] cleanSemiFields(String line) { 538 line = cleanLine(line); 539 return line.isEmpty() ? null : SEMI_SPLIT.split(line); 540 } 541 542 public interface LineHandler { 543 /** 544 * Return false if line was skipped 545 * 546 * @param line 547 * @return 548 */ handle(String line)549 boolean handle(String line) throws Exception; 550 } 551 handleFile(String filename, LineHandler handler)552 public static void handleFile(String filename, LineHandler handler) throws IOException { 553 BufferedReader in = CldrUtility.getUTF8Data(filename); 554 while (true) { 555 String line = in.readLine(); 556 if (line == null) { 557 break; 558 } 559 try { 560 if (!handler.handle(line)) { 561 if (SHOW_SKIP) System.out.println("Skipping line: " + line); 562 } 563 } catch (Exception e) { 564 throw new ICUUncheckedIOException("Problem with line: " + line, e); 565 } 566 } 567 in.close(); 568 } 569 in(File file)570 public static Iterable<String> in(File file) { 571 return With.in(new FileLines(openFile(file, UTF8))); 572 } 573 } 574