1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (c) 2002-2008, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 */ 9 package com.ibm.icu.dev.test.perf; 10 11 import java.io.BufferedReader; 12 import java.io.FileInputStream; 13 import java.io.IOException; 14 import java.io.InputStream; 15 import java.io.InputStreamReader; 16 import java.io.PushbackInputStream; 17 import java.io.Reader; 18 import java.lang.reflect.Method; 19 import java.util.ArrayList; 20 import java.util.Arrays; 21 import java.util.HashMap; 22 import java.util.HashSet; 23 import java.util.Iterator; 24 import java.util.Locale; 25 import java.util.Map; 26 import java.util.Set; 27 28 import com.ibm.icu.dev.tool.UOption; 29 import com.ibm.icu.impl.LocaleUtility; 30 31 /** 32 * Base class for performance testing framework. To use, the subclass can simply 33 * define one or more instance methods with names beginning with "test" (case 34 * ignored). The prototype of the method is 35 * 36 * PerfTest.Function testTheName() 37 * 38 * The actual performance test will execute on the returned Commond object 39 * (refer to Command Pattern). To call a test from command line, the 'test' 40 * prefix of the test method name can be ignored/removed. 41 * 42 * In addition, the subclass should define a main() method that calls 43 * PerfTest.run() as defined here. 44 * 45 * If the subclasses uses any command line arguments (beyond those handled 46 * automatically by this calss) then it should override PerfTest.setup() to 47 * handle its arguments. If the subclasse needs more sophisticated management 48 * for controlling finding/calling test method, it can replace the default 49 * implementation for PerfTest.testProvider before calling PerfTest.run(). 50 * 51 * Example invocation: java -cp classes -verbose:gc 52 * com.ibm.icu.dev.test.perf.UnicodeSetPerf --gc --passes 4 --iterations 100 53 * UnicodeSetAdd [[:l:][:c:]] 54 * 55 * Example output: [GC 511K->192K(1984K), 0.0086170 secs] [GC 704K->353K(1984K), 56 * 0.0059619 secs] [Full GC 618K->371K(1984K), 0.0242779 secs] [Full GC 57 * 371K->371K(1984K), 0.0228649 secs] = testUnicodeSetAdd begin 100 = 58 * testUnicodeSetAdd end 11977 1109044 = testUnicodeSetAdd begin 100 = 59 * testUnicodeSetAdd end 12047 1109044 = testUnicodeSetAdd begin 100 = 60 * testUnicodeSetAdd end 11987 1109044 = testUnicodeSetAdd begin 100 = 61 * testUnicodeSetAdd end 11978 1109044 62 * 63 * The [] lines are emitted by the JVM as a result of the -verbose:gc switch. 64 * 65 * Lines beginning with '=' are emitted by PerfTest: = testUnicodeSetAdd begin 66 * 100 A 'begin' statement contains the name of the setup method, which 67 * determines what test function is measures, and the number of iterations that 68 * will be times. = testUnicodeSetAdd end 12047 1109044 An 'end' statement gives 69 * the name of the setup method again, and then two integers. The first is the 70 * total elapsed time in milliseconds, and the second is the number of events 71 * per iteration. In this example, the time per event is 12047 / (100 * 1109044) 72 * or 108.6 ns/event. 73 * 74 * Raw times are given as integer ms, because this is what the system measures. 75 * 76 * @author Alan Liu 77 * @since ICU 2.4 78 */ 79 public abstract class PerfTest { 80 // Command-line options set these: 81 protected boolean verbose; 82 protected String sourceDir; 83 protected String fileName; 84 85 // protected String resolvedFileName; 86 protected String encoding; 87 protected String testName; 88 protected boolean uselen; 89 protected int iterations; 90 protected int passes; 91 protected int time; 92 protected boolean line_mode; 93 protected boolean bulk_mode; 94 protected Locale locale; 95 protected boolean doPriorGC; 96 protected int threads; 97 98 protected TestCmdProvider testProvider = new TestPrefixProvider(this); 99 100 static interface TestCmdProvider { 101 /** 102 * @return The names for all available test. 103 */ getAllTestCmdNames()104 public Set getAllTestCmdNames(); 105 106 /** 107 * @param name 108 * @return Whether the given name is a test name. The implementation may 109 * have more sophisticated naming control here. 110 * TestCmdProvider.isTestCmd() != Set.contains() 111 */ isTestCmd(String name)112 public boolean isTestCmd(String name); 113 114 /** 115 * @param name 116 * @return the test Command or null 117 */ getTestCmd(String name)118 public PerfTest.Function getTestCmd(String name); 119 } 120 121 /** 122 * Treat all method beginning with 'test' prefix (ignoring case) for given 123 * object as the test methods. 124 */ 125 static class TestPrefixProvider implements TestCmdProvider { 126 private Map theTests = null; // Map<string(no case), string(with case)> 127 private Set orgNames = null; // shadow reference, ==theTests, for better output 128 private Object refer; 129 TestPrefixProvider(Object theProvider)130 TestPrefixProvider(Object theProvider) { 131 refer = theProvider; 132 } 133 getAllTestCmdNames()134 public Set getAllTestCmdNames() { 135 if (theTests == null) { 136 theTests = new HashMap(); 137 orgNames = new HashSet(); 138 Method[] methods = refer.getClass().getDeclaredMethods(); 139 for (int i = 0; i < methods.length; i++) { 140 String org = methods[i].getName(); 141 String name = org.toLowerCase(); // ignoring case 142 // beginning with 'test' 143 // Note: methods named 'test()' are ignored 144 if (name.length() > 4 && name.startsWith("test")) { 145 if (theTests.containsKey(name)) { 146 throw new Error( 147 "Duplicate method name ignoring case: " 148 + name); 149 } 150 theTests.put(name, org); 151 orgNames.add(org); 152 } 153 } 154 } 155 return orgNames; // beginning with 'test', keeping case 156 } 157 158 /** 159 * The given name will map to a method of the same name, or a method 160 * named "test" + name. Case is ignored. 161 */ isTestCmd_impl(String name)162 private String isTestCmd_impl(String name) { 163 getAllTestCmdNames(); 164 String tn1 = name.toLowerCase(); 165 String tn2 = "test" + tn1; 166 if (theTests.containsKey(tn1)) { 167 return tn1; 168 } else if (theTests.containsKey(tn2)) { 169 return tn2; 170 } 171 return null; 172 } 173 isTestCmd(String name)174 public boolean isTestCmd(String name) { 175 return isTestCmd_impl(name) != null; 176 } 177 getTestCmd(String aname)178 public Function getTestCmd(String aname) { 179 String name = (String) theTests.get(isTestCmd_impl(aname)); 180 if (name == null) { 181 return null; 182 } 183 184 try { 185 Method m = refer.getClass().getDeclaredMethod(name, 186 (Class[]) null); 187 return (Function) m.invoke(refer, new Object[] {}); 188 } catch (Exception e) { 189 throw new Error( 190 "TestPrefixProvider implementation error. Finding: " 191 + name, e); 192 } 193 } 194 } 195 196 /** 197 * Subclasses of PerfTest will need to create subclasses of Function that 198 * define a call() method which contains the code to be timed. They then 199 * call setTestFunction() in their "Test..." method to establish this as the 200 * current test functor. 201 */ 202 public abstract static class Function { 203 204 /** 205 * Subclasses should implement this method to do the action to be 206 * measured if the action is thread-safe 207 */ call()208 public void call() { call(0); } 209 210 /** 211 * Subclasses should implement this method if the action is not thread-safe 212 */ call(int i)213 public void call(int i) { call(); } 214 215 /** 216 * Subclasses may implement this method to return positive integer 217 * indicating the number of operations in a single call to this object's 218 * call() method. If subclasses do not override this method, the default 219 * implementation returns 1. 220 */ getOperationsPerIteration()221 public long getOperationsPerIteration() { 222 return 1; 223 } 224 225 /** 226 * Subclasses may implement this method to return either positive or 227 * negative integer indicating the number of events in a single call to 228 * this object's call() method. If subclasses do not override this 229 * method, the default implementation returns -1, indicating that events 230 * are not applicable to this test. e.g: Number of breaks / iterations 231 * for break iterator 232 */ getEventsPerIteration()233 public long getEventsPerIteration() { 234 return -1; 235 } 236 237 /** 238 * Call call() n times in a tight loop and return the elapsed 239 * milliseconds. If n is small and call() is fast the return result may 240 * be zero. Small return values have limited meaningfulness, depending 241 * on the underlying VM and OS. 242 */ time(long n)243 public final long time(long n) { 244 long start, stop; 245 start = System.currentTimeMillis(); 246 while (n-- > 0) { 247 call(); 248 } 249 stop = System.currentTimeMillis(); 250 return stop - start; // ms 251 } 252 253 254 /** 255 * init is called each time before looping through call 256 */ init()257 public void init() {} 258 259 getID()260 public final int getID() { 261 return id; 262 } 263 setID(int id)264 public final void setID(int id) { 265 this.id = id; 266 } 267 268 private int id; 269 } 270 271 private class FunctionRunner implements Runnable { FunctionRunner(Function f, long loops, int id)272 public FunctionRunner(Function f, long loops, int id) { 273 this.f = f; 274 this.loops = loops; 275 this.id = id; 276 } 277 run()278 public void run() { 279 long n = loops; 280 while (n-- > 0) 281 f.call(id); 282 } 283 284 private Function f; 285 286 private long loops; 287 private int id; 288 } 289 290 291 /** 292 * Exception indicating a usage error. 293 */ 294 public static class UsageException extends Exception { 295 /** 296 * For serialization 297 */ 298 private static final long serialVersionUID = -1201256240606806242L; 299 UsageException(String message)300 public UsageException(String message) { 301 super(message); 302 } 303 UsageException()304 public UsageException() { 305 super(); 306 } 307 } 308 309 /** 310 * Constructor. 311 */ PerfTest()312 protected PerfTest() { 313 } 314 315 /** 316 * Framework method. Default implementation does not parse any extra 317 * arguments. Subclasses may override this to parse extra arguments. 318 * Subclass implementations should NOT call the base class implementation. 319 */ setup(String[] args)320 protected void setup(String[] args) { 321 if (args.length > 0) { 322 throw new RuntimeException("Extra arguments received"); 323 } 324 } 325 326 /** 327 * These must be kept in sync with getOptions(). 328 */ 329 static final int HELP1 = 0; 330 static final int HELP2 = 1; 331 static final int VERBOSE = 2; 332 static final int SOURCEDIR = 3; 333 static final int ENCODING = 4; 334 static final int USELEN = 5; 335 static final int FILE_NAME = 6; 336 static final int PASSES = 7; 337 static final int ITERATIONS = 8; 338 static final int TIME = 9; 339 static final int LINE_MODE = 10; 340 static final int BULK_MODE = 11; 341 static final int LOCALE = 12; 342 static final int TEST_NAME = 13; 343 static final int THREADS = 14; 344 345 // Options above here are identical to those in C; keep in sync with C 346 // Options below here are unique to Java; shift down as necessary 347 static final int GARBAGE_COLLECT = 14; 348 static final int LIST = 15; 349 getOptions()350 UOption[] getOptions() { 351 return new UOption[] { 352 UOption.HELP_H(), 353 UOption.HELP_QUESTION_MARK(), 354 UOption.VERBOSE(), 355 UOption.SOURCEDIR(), 356 UOption.ENCODING(), 357 UOption.DEF("uselen", 'u', UOption.NO_ARG), 358 UOption.DEF("filename", 'f', UOption.REQUIRES_ARG), 359 UOption.DEF("passes", 'p', UOption.REQUIRES_ARG), 360 UOption.DEF("iterations", 'i', UOption.REQUIRES_ARG), 361 UOption.DEF("time", 't', UOption.REQUIRES_ARG), 362 UOption.DEF("line-mode", 'l', UOption.NO_ARG), 363 UOption.DEF("bulk-mode", 'b', UOption.NO_ARG), 364 UOption.DEF("locale", 'L', UOption.REQUIRES_ARG), 365 UOption.DEF("testname", 'T', UOption.REQUIRES_ARG), 366 UOption.DEF("threads", 'r', UOption.REQUIRES_ARG), 367 368 // Options above here are identical to those in C; keep in sync 369 // Options below here are unique to Java 370 371 UOption.DEF("gc", 'g', UOption.NO_ARG), 372 UOption.DEF("list", (char) -1, UOption.NO_ARG), }; 373 } 374 375 /** 376 * Subclasses should call this method in their main(). run() will in turn 377 * call setup() with any arguments it does not parse. This method parses the 378 * command line and runs the tests given on the command line, with the given 379 * parameters. See the class description for details. 380 */ run(String[] args)381 protected final void run(String[] args) throws Exception { 382 Set testList = parseOptions(args); 383 384 // Run the tests 385 for (Iterator iter = testList.iterator(); iter.hasNext();) { 386 String meth = (String) iter.next(); 387 388 // Call meth to set up the test 389 // long eventsPerCall = -1; 390 Function testFunction = testProvider.getTestCmd(meth); 391 if (testFunction == null) { 392 throw new RuntimeException(meth 393 + " failed to return a test function"); 394 } 395 if (testFunction.getOperationsPerIteration() < 1) { 396 throw new RuntimeException(meth 397 + " returned an illegal operations/iteration()"); 398 } 399 400 long t; 401 // long b = System.currentTimeMillis(); 402 long loops = getIteration(meth, testFunction); 403 // System.out.println("The guess cost: " + (System.currentTimeMillis() - b)/1000. + " s."); 404 405 for (int j = 0; j < passes; ++j) { 406 long events = -1; 407 if (verbose) { 408 if (iterations > 0) { 409 System.out.println("= " + meth + " begin " + iterations); 410 } else { 411 System.out.println("= " + meth + " begin " + time + " seconds"); 412 } 413 } else { 414 System.out.println("= " + meth + " begin "); 415 } 416 417 t = performLoops(testFunction, loops); 418 419 events = testFunction.getEventsPerIteration(); 420 421 if (verbose) { 422 if (events == -1) { 423 System.out.println("= " + meth + " end " + (t / 1000.0) + " loops: " + loops + " operations: " 424 + testFunction.getOperationsPerIteration()); 425 } else { 426 System.out.println("= " + meth + " end " + (t / 1000.0) + " loops: " + loops + " operations: " 427 + testFunction.getOperationsPerIteration() + " events: " + events); 428 } 429 } else { 430 if (events == -1) { 431 System.out.println("= " + meth + " end " + (t / 1000.0) + " " + loops + " " 432 + testFunction.getOperationsPerIteration()); 433 } else { 434 System.out.println("= " + meth + " end " + (t / 1000.0) + " " + loops + " " 435 + testFunction.getOperationsPerIteration() + " " + events); 436 } 437 } 438 439 } 440 } 441 } 442 443 /** 444 * @param args 445 * @return the method list to call 446 * @throws UsageException 447 */ parseOptions(String[] args)448 private Set parseOptions(String[] args) throws UsageException { 449 450 doPriorGC = false; 451 encoding = ""; 452 uselen = false; 453 fileName = null; 454 sourceDir = null; 455 line_mode = false; 456 verbose = false; 457 bulk_mode = false; 458 passes = iterations = time = -1; 459 locale = null; 460 testName = null; 461 threads = 1; 462 463 UOption[] options = getOptions(); 464 int remainingArgc = UOption.parseArgs(args, options); 465 466 if (args.length == 0 || options[HELP1].doesOccur || options[HELP2].doesOccur) 467 throw new UsageException(); 468 469 if (options[LIST].doesOccur) { 470 System.err.println("Available tests:"); 471 Set testNames = testProvider.getAllTestCmdNames(); 472 for (Iterator iter = testNames.iterator(); iter.hasNext();) { 473 String name = (String) iter.next(); 474 System.err.println(" " + name); 475 } 476 System.exit(0); 477 } 478 479 if (options[TIME].doesOccur && options[ITERATIONS].doesOccur) 480 throw new UsageException("Cannot specify both '-t <seconds>' and '-i <iterations>'"); 481 else if (!options[TIME].doesOccur && !options[ITERATIONS].doesOccur) 482 throw new UsageException("Either '-t <seconds>' or '-i <iterations>' must be specified"); 483 else if (options[ITERATIONS].doesOccur) { 484 try { 485 iterations = Integer.parseInt(options[ITERATIONS].value); 486 } catch (NumberFormatException ex) { 487 throw new UsageException("'-i <iterations>' requires an integer number of iterations"); 488 } 489 } else { //if (options[TIME].doesOccur) 490 try { 491 time = Integer.parseInt(options[TIME].value); 492 } catch (NumberFormatException ex) { 493 throw new UsageException("'-r <seconds>' requires an integer number of seconds"); 494 } 495 } 496 497 if (!options[PASSES].doesOccur) 498 throw new UsageException("'-p <passes>' must be specified"); 499 else 500 passes = Integer.parseInt(options[PASSES].value); 501 502 if (options[LINE_MODE].doesOccur && options[BULK_MODE].doesOccur) 503 throw new UsageException("Cannot specify both '-l' (line mode) and '-b' (bulk mode)"); 504 505 if (options[THREADS].doesOccur) { 506 try { 507 threads = Integer.parseInt(options[THREADS].value); 508 } catch (NumberFormatException ex) { 509 throw new UsageException("'-r <threads>' requires an integer number of threads"); 510 } 511 if (threads <= 0) 512 throw new UsageException("'-r <threads>' requires an number of threads greater than 0"); 513 } 514 515 line_mode = options[LINE_MODE].doesOccur; 516 bulk_mode = options[BULK_MODE].doesOccur; 517 verbose = options[VERBOSE].doesOccur; 518 uselen = options[USELEN].doesOccur; 519 doPriorGC = options[GARBAGE_COLLECT].doesOccur; 520 521 if (options[SOURCEDIR].doesOccur) sourceDir = options[SOURCEDIR].value; 522 if (options[ENCODING].doesOccur) encoding = options[ENCODING].value; 523 if (options[FILE_NAME].doesOccur) fileName = options[FILE_NAME].value; 524 if (options[TEST_NAME].doesOccur) testName = options[TEST_NAME].value; 525 if (options[LOCALE].doesOccur) locale = LocaleUtility.getLocaleFromName(options[LOCALE].value); 526 527 528 // build the test list 529 Set testList = new HashSet(); 530 int i, j; 531 for (i = 0; i < remainingArgc; ++i) { 532 // is args[i] a method name? 533 if (testProvider.isTestCmd(args[i])) { 534 testList.add(args[i]); 535 } else { 536 // args[i] is neither a method name nor a number. Pass 537 // everything from here on through to the subclass via 538 // setup(). 539 break; 540 } 541 } 542 543 // if no tests were specified, put all the tests in the test list 544 if (testList.size() == 0) { 545 Set testNames = testProvider.getAllTestCmdNames(); 546 Iterator iter = testNames.iterator(); 547 while (iter.hasNext()) 548 testList.add((String)iter.next()); 549 } 550 551 // pass remaining arguments, if any, through to the subclass via setup() method. 552 String[] subclassArgs = new String[remainingArgc - i]; 553 for (j = 0; i < remainingArgc; j++) 554 subclassArgs[j] = args[i++]; 555 setup(subclassArgs); 556 557 // Put the heap in a consistent state 558 if (doPriorGC) 559 gc(); 560 561 return testList; 562 } 563 564 /** 565 * Translate '-t time' to iterations (or just return '-i iteration') 566 * 567 * @param meth 568 * @param fn 569 * @return rt 570 */ getIteration(String methName, Function fn)571 private long getIteration(String methName, Function fn) throws InterruptedException { 572 long iter = 0; 573 if (time < 0) { // && iterations > 0 574 iter = iterations; 575 } else { // && iterations < 0 576 // Translate time to iteration 577 // Assuming there is a linear relation between time and iterations 578 579 if (verbose) { 580 System.out.println("= " + methName + " calibrating " + time 581 + " seconds"); 582 } 583 584 long base = time * 1000; 585 // System.out.println("base :" + base); 586 long seed = 1; 587 long t = 0; 588 while (t < base * 0.9 || base * 1.1 < t) { // + - 10% 589 if (iter == 0 || t == 0) { 590 iter = seed; // start up from 1 591 seed *= 100; // if the method is too fast (t == 0), 592 // multiply 100 times 593 // 100 is rational because 'base' is always larger than 1000 594 } else { 595 // If 't' is large enough, use linear function to calculate 596 // new iteration 597 // 598 // new iter(base) old iter 599 // -------------- = -------- = k 600 // new time old time 601 // 602 // System.out.println("before guess t: " + t); 603 // System.out.println("before guess iter: " + iter); 604 iter = (long) ((double) iter / t * base); // avoid long 605 // cut, eg. 1/10 606 // == 0 607 if (iter == 0) { 608 throw new RuntimeException( 609 "Unable to converge on desired duration"); 610 } 611 } 612 t = performLoops(fn, iter); 613 } 614 // System.out.println("final t : " + t); 615 // System.out.println("final i : " + iter); 616 } 617 return iter; 618 } 619 620 performLoops(Function function, long loops)621 private long performLoops(Function function, long loops) throws InterruptedException { 622 function.init(); 623 if (threads > 1) { 624 Thread[] threadList = new Thread[threads]; 625 for (int i=0; i<threads; i++) 626 threadList[i] = new Thread(new FunctionRunner(function, loops, i)); 627 628 long start = System.currentTimeMillis(); 629 for (int i=0; i<threads; i++) 630 threadList[i].start(); 631 for (int i=0; i<threads; i++) 632 threadList[i].join(); 633 return System.currentTimeMillis() - start; 634 635 } else { 636 return function.time(loops); // ms 637 } 638 } 639 640 641 /** 642 * Invoke the runtime's garbage collection procedure repeatedly until the 643 * amount of free memory stabilizes to within 10%. 644 */ gc()645 protected void gc() { 646 if (false) { 647 long last; 648 long free = 1; 649 Runtime runtime = Runtime.getRuntime(); 650 do { 651 runtime.gc(); 652 last = free; 653 free = runtime.freeMemory(); 654 } while (((double) Math.abs(free - last)) / free > 0.1); 655 // Wait for the change in free memory to drop under 10% 656 // between successive calls. 657 } 658 659 // From "Java Platform Performance". This is the procedure 660 // recommended by Javasoft. 661 try { 662 System.gc(); 663 Thread.sleep(100); 664 System.runFinalization(); 665 Thread.sleep(100); 666 667 System.gc(); 668 Thread.sleep(100); 669 System.runFinalization(); 670 Thread.sleep(100); 671 } catch (InterruptedException e) { 672 } 673 } 674 675 readToEOS(Reader reader)676 public static char[] readToEOS(Reader reader) { 677 ArrayList vec = new ArrayList(); 678 int count = 0; 679 int pos = 0; 680 final int MAXLENGTH = 0x8000; // max buffer size - 32K 681 int length = 0x80; // start with small buffers and work up 682 do { 683 pos = 0; 684 length = length >= MAXLENGTH ? MAXLENGTH : length * 2; 685 char[] buffer = new char[length]; 686 try { 687 do { 688 int n = reader.read(buffer, pos, length - pos); 689 if (n == -1) { 690 break; 691 } 692 pos += n; 693 } while (pos < length); 694 } 695 catch (IOException e) { 696 } 697 vec.add(buffer); 698 count += pos; 699 } while (pos == length); 700 701 char[] data = new char[count]; 702 pos = 0; 703 for (int i = 0; i < vec.size(); ++i) { 704 char[] buf = (char[]) vec.get(i); 705 int len = Math.min(buf.length, count - pos); 706 System.arraycopy(buf, 0, data, pos, len); 707 pos += len; 708 } 709 return data; 710 } readToEOS(InputStream stream)711 public static byte[] readToEOS(InputStream stream) { 712 713 ArrayList vec = new ArrayList(); 714 int count = 0; 715 int pos = 0; 716 final int MAXLENGTH = 0x8000; // max buffer size - 32K 717 int length = 0x80; // start with small buffers and work up 718 do { 719 pos = 0; 720 length = length >= MAXLENGTH ? MAXLENGTH : length * 2; 721 byte[] buffer = new byte[length]; 722 try { 723 do { 724 int n = stream.read(buffer, pos, length - pos); 725 if (n == -1) { 726 break; 727 } 728 pos += n; 729 } while (pos < length); 730 } 731 catch (IOException e) { 732 } 733 vec.add(buffer); 734 count += pos; 735 } while (pos == length); 736 737 738 byte[] data = new byte[count]; 739 pos = 0; 740 for (int i = 0; i < vec.size(); ++i) { 741 byte[] buf = (byte[]) vec.get(i); 742 int len = Math.min(buf.length, count - pos); 743 System.arraycopy(buf, 0, data, pos, len); 744 pos += len; 745 } 746 return data; 747 } 748 readLines(String filename, String srcEncoding, boolean bulkMode)749 protected String[] readLines(String filename, String srcEncoding, boolean bulkMode) { 750 FileInputStream fis = null; 751 InputStreamReader isr = null; 752 BufferedReader br = null; 753 try { 754 fis = new FileInputStream(filename); 755 isr = new InputStreamReader(fis, srcEncoding); 756 br = new BufferedReader(isr); 757 } catch (Exception e) { 758 System.err.println("Error: File access exception: " + e.getMessage() + "!"); 759 System.exit(1); 760 } 761 ArrayList list = new ArrayList(); 762 while (true) { 763 String line = null; 764 try { 765 line = readDataLine(br); 766 } catch (Exception e) { 767 System.err.println("Read File Error" + e.getMessage() + "!"); 768 System.exit(1); 769 } 770 if (line == null) break; 771 if (line.length() == 0) continue; 772 list.add(line); 773 } 774 775 int size = list.size(); 776 String[] lines = null; 777 778 if (bulkMode) { 779 lines = new String[1]; 780 StringBuffer buffer = new StringBuffer(""); 781 for (int i = 0; i < size; ++i) { 782 buffer.append((String) list.get(i)); 783 /*if (i < (size - 1)) { 784 buffer.append("\r\n"); 785 }*/ 786 } 787 lines[0] = buffer.toString(); 788 } else { 789 lines = new String[size]; 790 for (int i = 0; i < size; ++i) { 791 lines[i] = (String) list.get(i); 792 } 793 } 794 795 return lines; 796 } 797 readDataLine(BufferedReader br)798 public String readDataLine(BufferedReader br) throws Exception { 799 String originalLine = ""; 800 String line = ""; 801 try { 802 line = originalLine = br.readLine(); 803 if (line == null) return null; 804 if (line.length() > 0 && line.charAt(0) == 0xFEFF) line = line.substring(1); 805 int commentPos = line.indexOf('#'); 806 if (commentPos >= 0) line = line.substring(0, commentPos); 807 line = line.trim(); 808 } catch (Exception e) { 809 throw new Exception("Line \"{0}\", \"{1}\"" + originalLine + " " 810 + line + " " + e.toString()); 811 } 812 return line; 813 } 814 815 816 public static class BOMFreeReader extends Reader { 817 InputStreamReader reader; 818 String encoding; 819 int MAX_BOM_LENGTH = 5; 820 821 /** 822 * Creates a new reader, skipping a BOM associated with the given 823 * encoding. Equivalent to BOMFreeReader(in, null). 824 * 825 * @param in 826 * The input stream. 827 * @throws IOException 828 * Thrown if reading for a BOM causes an IOException. 829 */ BOMFreeReader(InputStream in)830 public BOMFreeReader(InputStream in) throws IOException { 831 this(in, null); 832 } 833 834 /** 835 * Creates a new reader, skipping a BOM associated with the given 836 * encoding. If encoding is null, attempts to detect the encoding by the 837 * BOM. 838 * 839 * @param in 840 * The input stream. 841 * @param encoding 842 * The encoding to use. Can be null. 843 * @throws IOException 844 * Thrown if reading for a BOM causes an IOException. 845 */ BOMFreeReader(InputStream in, String encoding)846 public BOMFreeReader(InputStream in, String encoding) throws IOException { 847 PushbackInputStream pushback = new PushbackInputStream(in, MAX_BOM_LENGTH); 848 this.encoding = encoding; 849 850 byte[] start = new byte[MAX_BOM_LENGTH]; 851 Arrays.fill(start, (byte)0xa5); 852 853 int amountRead = pushback.read(start, 0, MAX_BOM_LENGTH); 854 int bomLength = detectBOMLength(start); 855 if (amountRead > bomLength) 856 pushback.unread(start, bomLength, amountRead - bomLength); 857 858 reader = (encoding == null) ? new InputStreamReader(pushback) : new InputStreamReader(pushback, encoding); 859 } 860 861 /** 862 * Determines the length of a BOM in the beginning of start. Assumes 863 * start is at least a length 5 array. If encoding is null, the check 864 * will not be encoding specific and it will set the encoding of this 865 * BOMFreeReader. 866 * 867 * @param start 868 * The starting bytes. 869 * @param encoding 870 * The encoding. Can be null. 871 * @return The length of a detected BOM. 872 */ detectBOMLength(byte[] start)873 private int detectBOMLength(byte[] start) { 874 if ((encoding == null || "UTF-16BE".equals(encoding)) && start[0] == (byte) 0xFE && start[1] == (byte) 0xFF) { 875 if (encoding == null) this.encoding = "UTF-16BE"; 876 return 2; // "UTF-16BE"; 877 } else if (start[0] == (byte) 0xFF && start[1] == (byte) 0xFE) { 878 if ((encoding == null || "UTF-32LE".equals(encoding)) && start[2] == (byte) 0x00 879 && start[3] == (byte) 0x00) { 880 if (encoding == null) this.encoding = "UTF-32LE"; 881 return 4; // "UTF-32LE"; 882 } else if ((encoding == null || "UTF-16LE".equals(encoding))) { 883 if (encoding == null) this.encoding = "UTF-16LE"; 884 return 2; // "UTF-16LE"; 885 } 886 } else if ((encoding == null || "UTF-8".equals(encoding)) && start[0] == (byte) 0xEF 887 && start[1] == (byte) 0xBB && start[2] == (byte) 0xBF) { 888 if (encoding == null) this.encoding = "UTF-8"; 889 return 3; // "UTF-8"; 890 } else if ((encoding == null || "UTF-32BE".equals(encoding)) && start[0] == (byte) 0x00 891 && start[1] == (byte) 0x00 && start[2] == (byte) 0xFE && start[3] == (byte) 0xFF) { 892 if (encoding == null) this.encoding = "UTF-32BE"; 893 return 4; // "UTF-32BE"; 894 } else if ((encoding == null || "SCSU".equals(encoding)) && start[0] == (byte) 0x0E 895 && start[1] == (byte) 0xFE && start[2] == (byte) 0xFF) { 896 if (encoding == null) this.encoding = "SCSU"; 897 return 3; // "SCSU"; 898 } else if ((encoding == null || "BOCU-1".equals(encoding)) && start[0] == (byte) 0xFB 899 && start[1] == (byte) 0xEE && start[2] == (byte) 0x28) { 900 if (encoding == null) this.encoding = "BOCU-1"; 901 return 3; // "BOCU-1"; 902 } else if ((encoding == null || "UTF-7".equals(encoding)) && start[0] == (byte) 0x2B 903 && start[1] == (byte) 0x2F && start[2] == (byte) 0x76) { 904 if (start[3] == (byte) 0x38 && start[4] == (byte) 0x2D) { 905 if (encoding == null) this.encoding = "UTF-7"; 906 return 5; // "UTF-7"; 907 } else if (start[3] == (byte) 0x38 || start[3] == (byte) 0x39 || start[3] == (byte) 0x2B 908 || start[3] == (byte) 0x2F) { 909 if (encoding == null) this.encoding = "UTF-7"; 910 return 4; // "UTF-7"; 911 } 912 } else if ((encoding == null || "UTF-EBCDIC".equals(encoding)) && start[0] == (byte) 0xDD 913 && start[2] == (byte) 0x73 && start[2] == (byte) 0x66 && start[3] == (byte) 0x73) { 914 if (encoding == null) this.encoding = "UTF-EBCDIC"; 915 return 4; // "UTF-EBCDIC"; 916 } 917 918 /* no known Unicode signature byte sequence recognized */ 919 return 0; 920 } 921 read(char[] cbuf, int off, int len)922 public int read(char[] cbuf, int off, int len) throws IOException { 923 return reader.read(cbuf, off, len); 924 } 925 close()926 public void close() throws IOException { 927 reader.close(); 928 } 929 } 930 } 931 932 933 934 // eof 935