1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (c) 2002-2008, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 */ 9 package com.ibm.icu.dev.test.perf; 10 11 import java.io.BufferedReader; 12 import java.io.FileInputStream; 13 import java.io.IOException; 14 import java.io.InputStream; 15 import java.io.InputStreamReader; 16 import java.io.PushbackInputStream; 17 import java.io.Reader; 18 import java.lang.reflect.Method; 19 import java.util.ArrayList; 20 import java.util.Arrays; 21 import java.util.HashMap; 22 import java.util.HashSet; 23 import java.util.Iterator; 24 import java.util.Locale; 25 import java.util.Map; 26 import java.util.Set; 27 28 import com.ibm.icu.dev.tool.UOption; 29 import com.ibm.icu.impl.LocaleUtility; 30 31 /** 32 * Base class for performance testing framework. To use, the subclass can simply 33 * define one or more instance methods with names beginning with "test" (case 34 * ignored). The prototype of the method is 35 * 36 * PerfTest.Function testTheName() 37 * 38 * The actual performance test will execute on the returned Command object 39 * (refer to Command Pattern). To call a test from command line, the 'test' 40 * prefix of the test method name can be ignored/removed. 41 * 42 * In addition, the subclass should define a main() method that calls 43 * PerfTest.run() as defined here. 44 * 45 * If the subclasses uses any command line arguments (beyond those handled 46 * automatically by this class) then it should override PerfTest.setup() to 47 * handle its arguments. If the subclasses needs more sophisticated management 48 * for controlling finding/calling test method, it can replace the default 49 * implementation for PerfTest.testProvider before calling PerfTest.run(). 50 * 51 * Example invocation: java -cp classes -verbose:gc 52 * com.ibm.icu.dev.test.perf.UnicodeSetPerf --gc --passes 4 --iterations 100 53 * UnicodeSetAdd [[:l:][:c:]] 54 * 55 * Example output: [GC 511K->192K(1984K), 0.0086170 secs] [GC 704K->353K(1984K), 56 * 0.0059619 secs] [Full GC 618K->371K(1984K), 0.0242779 secs] [Full GC 57 * 371K->371K(1984K), 0.0228649 secs] = testUnicodeSetAdd begin 100 = 58 * testUnicodeSetAdd end 11977 1109044 = testUnicodeSetAdd begin 100 = 59 * testUnicodeSetAdd end 12047 1109044 = testUnicodeSetAdd begin 100 = 60 * testUnicodeSetAdd end 11987 1109044 = testUnicodeSetAdd begin 100 = 61 * testUnicodeSetAdd end 11978 1109044 62 * 63 * The [] lines are emitted by the JVM as a result of the -verbose:gc switch. 64 * 65 * Lines beginning with '=' are emitted by PerfTest: = testUnicodeSetAdd begin 66 * 100 A 'begin' statement contains the name of the setup method, which 67 * determines what test function is measures, and the number of iterations that 68 * will be times. = testUnicodeSetAdd end 12047 1109044 An 'end' statement gives 69 * the name of the setup method again, and then two integers. The first is the 70 * total elapsed time in milliseconds, and the second is the number of events 71 * per iteration. In this example, the time per event is 12047 / (100 * 1109044) 72 * or 108.6 ns/event. 73 * 74 * Raw times are given as integer ms, because this is what the system measures. 75 * 76 * @author Alan Liu 77 * @since ICU 2.4 78 */ 79 public abstract class PerfTest { 80 // Command-line options set these: 81 protected boolean verbose; 82 protected String sourceDir; 83 protected String fileName; 84 85 // protected String resolvedFileName; 86 protected String encoding; 87 protected String testName; 88 protected boolean uselen; 89 protected int iterations; 90 protected int passes; 91 protected int time; 92 protected boolean line_mode; 93 protected boolean bulk_mode; 94 protected Locale locale; 95 protected boolean doPriorGC; 96 protected int threads; 97 protected int duration; 98 protected boolean action; 99 100 protected TestCmdProvider testProvider = new TestPrefixProvider(this); 101 102 static interface TestCmdProvider { 103 /** 104 * @return The names for all available test. 105 */ getAllTestCmdNames()106 public Set getAllTestCmdNames(); 107 108 /** 109 * @param name 110 * @return Whether the given name is a test name. The implementation may 111 * have more sophisticated naming control here. 112 * TestCmdProvider.isTestCmd() != Set.contains() 113 */ isTestCmd(String name)114 public boolean isTestCmd(String name); 115 116 /** 117 * @param name 118 * @return the test Command or null 119 */ getTestCmd(String name)120 public PerfTest.Function getTestCmd(String name); 121 } 122 123 /** 124 * Treat all method beginning with 'test' prefix (ignoring case) for given 125 * object as the test methods. 126 */ 127 static class TestPrefixProvider implements TestCmdProvider { 128 private Map theTests = null; // Map<string(no case), string(with case)> 129 private Set orgNames = null; // shadow reference, ==theTests, for better output 130 private Object refer; 131 TestPrefixProvider(Object theProvider)132 TestPrefixProvider(Object theProvider) { 133 refer = theProvider; 134 } 135 getAllTestCmdNames()136 public Set getAllTestCmdNames() { 137 if (theTests == null) { 138 theTests = new HashMap(); 139 orgNames = new HashSet(); 140 Method[] methods = refer.getClass().getDeclaredMethods(); 141 for (int i = 0; i < methods.length; i++) { 142 String org = methods[i].getName(); 143 String name = org.toLowerCase(); // ignoring case 144 // beginning with 'test' 145 // Note: methods named 'test()' are ignored 146 if (name.length() > 4 && name.startsWith("test")) { 147 if (theTests.containsKey(name)) { 148 throw new Error( 149 "Duplicate method name ignoring case: " 150 + name); 151 } 152 theTests.put(name, org); 153 orgNames.add(org); 154 } 155 } 156 } 157 return orgNames; // beginning with 'test', keeping case 158 } 159 160 /** 161 * The given name will map to a method of the same name, or a method 162 * named "test" + name. Case is ignored. 163 */ isTestCmd_impl(String name)164 private String isTestCmd_impl(String name) { 165 getAllTestCmdNames(); 166 String tn1 = name.toLowerCase(); 167 String tn2 = "test" + tn1; 168 if (theTests.containsKey(tn1)) { 169 return tn1; 170 } else if (theTests.containsKey(tn2)) { 171 return tn2; 172 } 173 return null; 174 } 175 isTestCmd(String name)176 public boolean isTestCmd(String name) { 177 return isTestCmd_impl(name) != null; 178 } 179 getTestCmd(String aname)180 public Function getTestCmd(String aname) { 181 String name = (String) theTests.get(isTestCmd_impl(aname)); 182 if (name == null) { 183 return null; 184 } 185 186 try { 187 Method m = refer.getClass().getDeclaredMethod(name, 188 (Class[]) null); 189 return (Function) m.invoke(refer, new Object[] {}); 190 } catch (Exception e) { 191 throw new Error( 192 "TestPrefixProvider implementation error. Finding: " 193 + name, e); 194 } 195 } 196 } 197 198 /** 199 * Subclasses of PerfTest will need to create subclasses of Function that 200 * define a call() method which contains the code to be timed. They then 201 * call setTestFunction() in their "Test..." method to establish this as the 202 * current test functor. 203 */ 204 public abstract static class Function { 205 206 /** 207 * Subclasses should implement this method to do the action to be 208 * measured if the action is thread-safe 209 */ call()210 public void call() { call(0); } 211 212 /** 213 * Subclasses should implement this method if the action is not thread-safe 214 */ call(int i)215 public void call(int i) { call(); } 216 217 /** 218 * Subclasses may implement this method to return positive integer 219 * indicating the number of operations in a single call to this object's 220 * call() method. If subclasses do not override this method, the default 221 * implementation returns 1. 222 */ getOperationsPerIteration()223 public long getOperationsPerIteration() { 224 return 1; 225 } 226 227 /** 228 * Subclasses may implement this method to return either positive or 229 * negative integer indicating the number of events in a single call to 230 * this object's call() method. If subclasses do not override this 231 * method, the default implementation returns -1, indicating that events 232 * are not applicable to this test. e.g: Number of breaks / iterations 233 * for break iterator 234 */ getEventsPerIteration()235 public long getEventsPerIteration() { 236 return -1; 237 } 238 239 /** 240 * Call call() n times in a tight loop and return the elapsed 241 * milliseconds. If n is small and call() is fast the return result may 242 * be zero. Small return values have limited meaningfulness, depending 243 * on the underlying VM and OS. 244 */ time(long n)245 public final long time(long n) { 246 long start, stop; 247 start = System.currentTimeMillis(); 248 while (n-- > 0) { 249 call(); 250 } 251 stop = System.currentTimeMillis(); 252 return stop - start; // ms 253 } 254 255 256 /** 257 * init is called each time before looping through call 258 */ init()259 public void init() {} 260 261 getID()262 public final int getID() { 263 return id; 264 } 265 setID(int id)266 public final void setID(int id) { 267 this.id = id; 268 } 269 270 private int id; 271 } 272 273 private class FunctionRunner implements Runnable { FunctionRunner(Function f, long loops, int id)274 public FunctionRunner(Function f, long loops, int id) { 275 this.f = f; 276 this.loops = loops; 277 this.id = id; 278 } 279 run()280 public void run() { 281 long n = loops; 282 while (n-- > 0) 283 f.call(id); 284 } 285 286 private Function f; 287 288 private long loops; 289 private int id; 290 } 291 292 293 /** 294 * Exception indicating a usage error. 295 */ 296 public static class UsageException extends Exception { 297 /** 298 * For serialization 299 */ 300 private static final long serialVersionUID = -1201256240606806242L; 301 UsageException(String message)302 public UsageException(String message) { 303 super(message); 304 } 305 UsageException()306 public UsageException() { 307 super(); 308 } 309 } 310 311 /** 312 * Constructor. 313 */ PerfTest()314 protected PerfTest() { 315 } 316 317 /** 318 * Framework method. Default implementation does not parse any extra 319 * arguments. Subclasses may override this to parse extra arguments. 320 * Subclass implementations should NOT call the base class implementation. 321 */ setup(String[] args)322 protected void setup(String[] args) { 323 if (args.length > 0) { 324 throw new RuntimeException("Extra arguments received"); 325 } 326 } 327 328 /** 329 * These must be kept in sync with getOptions(). 330 */ 331 static final int HELP1 = 0; 332 static final int HELP2 = 1; 333 static final int VERBOSE = 2; 334 static final int SOURCEDIR = 3; 335 static final int ENCODING = 4; 336 static final int USELEN = 5; 337 static final int FILE_NAME = 6; 338 static final int PASSES = 7; 339 static final int ITERATIONS = 8; 340 static final int TIME = 9; 341 static final int LINE_MODE = 10; 342 static final int BULK_MODE = 11; 343 static final int LOCALE = 12; 344 static final int TEST_NAME = 13; 345 static final int THREADS = 14; 346 static final int DURATION = 15; 347 static final int ACTION = 16; 348 349 // Options above here are identical to those in C; keep in sync with C 350 // Options below here are unique to Java; shift down as necessary 351 static final int GARBAGE_COLLECT = 17; 352 static final int LIST = 18; 353 getOptions()354 UOption[] getOptions() { 355 return new UOption[] { 356 UOption.HELP_H(), 357 UOption.HELP_QUESTION_MARK(), 358 UOption.VERBOSE(), 359 UOption.SOURCEDIR(), 360 UOption.ENCODING(), 361 UOption.DEF("uselen", 'u', UOption.NO_ARG), 362 UOption.DEF("filename", 'f', UOption.REQUIRES_ARG), 363 UOption.DEF("passes", 'p', UOption.REQUIRES_ARG), 364 UOption.DEF("iterations", 'i', UOption.REQUIRES_ARG), 365 UOption.DEF("time", 't', UOption.REQUIRES_ARG), 366 UOption.DEF("line-mode", 'l', UOption.NO_ARG), 367 UOption.DEF("bulk-mode", 'b', UOption.NO_ARG), 368 UOption.DEF("locale", 'L', UOption.REQUIRES_ARG), 369 UOption.DEF("testname", 'T', UOption.REQUIRES_ARG), 370 UOption.DEF("threads", 'r', UOption.REQUIRES_ARG), 371 UOption.DEF("duration", 'd', UOption.NO_ARG), 372 UOption.DEF("action", 'a', UOption.NO_ARG), 373 374 // Options above here are identical to those in C; keep in sync 375 // Options below here are unique to Java 376 377 UOption.DEF("gc", 'g', UOption.NO_ARG), 378 UOption.DEF("list", (char) -1, UOption.NO_ARG), }; 379 } 380 381 /** 382 * Subclasses should call this method in their main(). run() will in turn 383 * call setup() with any arguments it does not parse. This method parses the 384 * command line and runs the tests given on the command line, with the given 385 * parameters. See the class description for details. 386 */ run(String[] args)387 protected final void run(String[] args) throws Exception { 388 Set testList = parseOptions(args); 389 390 // Run the tests 391 for (Iterator iter = testList.iterator(); iter.hasNext();) { 392 String meth = (String) iter.next(); 393 394 // Call meth to set up the test 395 // long eventsPerCall = -1; 396 Function testFunction = testProvider.getTestCmd(meth); 397 if (testFunction == null) { 398 throw new RuntimeException(meth 399 + " failed to return a test function"); 400 } 401 long ops = testFunction.getOperationsPerIteration(); 402 if (ops < 1) { 403 throw new RuntimeException(meth 404 + " returned an illegal operations/iteration()"); 405 } 406 407 long min_t = 1000000; 408 long t; 409 // long b = System.currentTimeMillis(); 410 long calibration_iter = getIteration(meth, testFunction); 411 // System.out.println("The guess cost: " + (System.currentTimeMillis() - b)/1000. + " s."); 412 413 // Calculate iterations for the specified duration/pass. 414 double timePerIter = performLoops(testFunction, calibration_iter)/1000./calibration_iter; 415 long iterationCount = (long) (duration/timePerIter + 0.5); 416 417 for (int j = 0; j < passes; ++j) { 418 long events = -1; 419 if (verbose) { 420 if (iterations > 0) { 421 System.out.println("= " + meth + " begin " + iterations); 422 } else { 423 System.out.println("= " + meth + " begin " + time + " seconds"); 424 } 425 } else if (!action) { 426 System.out.println("= " + meth + " begin "); 427 } 428 429 t = performLoops(testFunction, iterationCount); 430 if (t < min_t) { 431 min_t = t; 432 } 433 events = testFunction.getEventsPerIteration(); 434 435 if (verbose) { 436 if (events == -1) { 437 System.out.println("= " + meth + " end " + (t / 1000.0) + " loops: " + iterationCount + " operations: " 438 + ops); 439 } else { 440 System.out.println("= " + meth + " end " + (t / 1000.0) + " loops: " + iterationCount + " operations: " 441 + ops + " events: " + events); 442 } 443 } else if (!action) { 444 if (events == -1) { 445 System.out.println("= " + meth + " end " + (t / 1000.0) + " " + iterationCount + " " + ops); 446 } else { 447 System.out.println("= " + meth + " end " + (t / 1000.0) + " " + iterationCount + " " 448 + ops + " " + events); 449 } 450 } 451 } 452 if (action) { 453 // Print results in ndjson format for GHA Benchmark to process. 454 System.out.println("{\"biggerIsBetter\":false,\"name\":\"" + meth + 455 "\",\"unit\":\"ns/iter\",\"value\":" + (min_t*1E6) / (iterationCount*ops) + "}"); 456 } 457 } 458 } 459 460 /** 461 * @param args 462 * @return the method list to call 463 * @throws UsageException 464 */ parseOptions(String[] args)465 private Set parseOptions(String[] args) throws UsageException { 466 467 doPriorGC = false; 468 encoding = ""; 469 uselen = false; 470 fileName = null; 471 sourceDir = null; 472 line_mode = false; 473 verbose = false; 474 bulk_mode = false; 475 passes = iterations = time = -1; 476 locale = null; 477 testName = null; 478 threads = 1; 479 duration = 10; // Default used by Perl scripts 480 action = false; // If test is invoked on command line, includes GitHub Action 481 482 UOption[] options = getOptions(); 483 int remainingArgc = UOption.parseArgs(args, options); 484 485 if (args.length == 0 || options[HELP1].doesOccur || options[HELP2].doesOccur) 486 throw new UsageException(); 487 488 if (options[LIST].doesOccur) { 489 System.err.println("Available tests:"); 490 Set testNames = testProvider.getAllTestCmdNames(); 491 for (Iterator iter = testNames.iterator(); iter.hasNext();) { 492 String name = (String) iter.next(); 493 System.err.println(" " + name); 494 } 495 System.exit(0); 496 } 497 498 if (options[TIME].doesOccur && options[ITERATIONS].doesOccur) 499 throw new UsageException("Cannot specify both '-t <seconds>' and '-i <iterations>'"); 500 else if (!options[TIME].doesOccur && !options[ITERATIONS].doesOccur) 501 throw new UsageException("Either '-t <seconds>' or '-i <iterations>' must be specified"); 502 else if (options[ITERATIONS].doesOccur) { 503 try { 504 iterations = Integer.parseInt(options[ITERATIONS].value); 505 } catch (NumberFormatException ex) { 506 throw new UsageException("'-i <iterations>' requires an integer number of iterations"); 507 } 508 } else { //if (options[TIME].doesOccur) 509 try { 510 time = Integer.parseInt(options[TIME].value); 511 } catch (NumberFormatException ex) { 512 throw new UsageException("'-r <seconds>' requires an integer number of seconds"); 513 } 514 } 515 516 if (!options[PASSES].doesOccur) 517 throw new UsageException("'-p <passes>' must be specified"); 518 else 519 passes = Integer.parseInt(options[PASSES].value); 520 521 if (options[LINE_MODE].doesOccur && options[BULK_MODE].doesOccur) 522 throw new UsageException("Cannot specify both '-l' (line mode) and '-b' (bulk mode)"); 523 524 if (options[THREADS].doesOccur) { 525 try { 526 threads = Integer.parseInt(options[THREADS].value); 527 } catch (NumberFormatException ex) { 528 throw new UsageException("'-r <threads>' requires an integer number of threads"); 529 } 530 if (threads <= 0) 531 throw new UsageException("'-r <threads>' requires an number of threads greater than 0"); 532 } 533 if (options[DURATION].doesOccur) { 534 try { 535 duration = Integer.parseInt(options[DURATION].value); 536 } catch (NumberFormatException ex) { 537 throw new UsageException("'-d <duration>' requires an integer number of threads"); 538 } 539 } 540 541 line_mode = options[LINE_MODE].doesOccur; 542 bulk_mode = options[BULK_MODE].doesOccur; 543 verbose = options[VERBOSE].doesOccur; 544 uselen = options[USELEN].doesOccur; 545 doPriorGC = options[GARBAGE_COLLECT].doesOccur; 546 action = options[ACTION].doesOccur; 547 548 if (options[SOURCEDIR].doesOccur) sourceDir = options[SOURCEDIR].value; 549 if (options[ENCODING].doesOccur) encoding = options[ENCODING].value; 550 if (options[FILE_NAME].doesOccur) fileName = options[FILE_NAME].value; 551 if (options[TEST_NAME].doesOccur) testName = options[TEST_NAME].value; 552 if (options[LOCALE].doesOccur) locale = LocaleUtility.getLocaleFromName(options[LOCALE].value); 553 554 555 // build the test list 556 Set testList = new HashSet(); 557 int i, j; 558 for (i = 0; i < remainingArgc; ++i) { 559 // is args[i] a method name? 560 if (testProvider.isTestCmd(args[i])) { 561 testList.add(args[i]); 562 } else { 563 // args[i] is neither a method name nor a number. Pass 564 // everything from here on through to the subclass via 565 // setup(). 566 break; 567 } 568 } 569 570 // if no tests were specified, put all the tests in the test list 571 if (testList.size() == 0) { 572 Set testNames = testProvider.getAllTestCmdNames(); 573 Iterator iter = testNames.iterator(); 574 while (iter.hasNext()) 575 testList.add((String)iter.next()); 576 } 577 578 // pass remaining arguments, if any, through to the subclass via setup() method. 579 String[] subclassArgs = new String[remainingArgc - i]; 580 for (j = 0; i < remainingArgc; j++) 581 subclassArgs[j] = args[i++]; 582 setup(subclassArgs); 583 584 // Put the heap in a consistent state 585 if (doPriorGC) 586 gc(); 587 588 return testList; 589 } 590 591 /** 592 * Translate '-t time' to iterations (or just return '-i iteration') 593 * 594 * @param meth 595 * @param fn 596 * @return rt 597 */ getIteration(String methName, Function fn)598 private long getIteration(String methName, Function fn) throws InterruptedException { 599 long iter = 0; 600 if (iterations > 0) { 601 iter = iterations; 602 } else { // iterations not in input, calibrate iterations for given time. 603 // Translate time to iteration 604 // Assuming there is a linear relation between time and iterations 605 606 if (verbose) { 607 System.out.println("= " + methName + " calibrating " + time 608 + " seconds"); 609 } 610 611 long base = time * 1000; 612 // System.out.println("base :" + base); 613 long seed = 1; 614 long t = 0; 615 while (t < base * 0.9 || base * 1.1 < t) { // + - 10% 616 if (iter == 0 || t == 0) { 617 iter = seed; // start up from 1 618 seed *= 100; // if the method is too fast (t == 0), 619 // multiply 100 times 620 // 100 is rational because 'base' is always larger than 1000 621 } else { 622 // If 't' is large enough, use linear function to calculate 623 // new iteration 624 // 625 // new iter(base) old iter 626 // -------------- = -------- = k 627 // new time old time 628 // 629 // System.out.println("before guess t: " + t); 630 // System.out.println("before guess iter: " + iter); 631 iter = (long) ((double) iter / t * base); // avoid long 632 // cut, eg. 1/10 633 // == 0 634 if (iter == 0) { 635 throw new RuntimeException( 636 "Unable to converge on desired duration"); 637 } 638 } 639 t = performLoops(fn, iter); 640 } 641 // System.out.println("final t : " + t); 642 // System.out.println("final i : " + iter); 643 } 644 return iter; 645 } 646 647 performLoops(Function function, long loops)648 private long performLoops(Function function, long loops) throws InterruptedException { 649 function.init(); 650 if (threads > 1) { 651 Thread[] threadList = new Thread[threads]; 652 for (int i=0; i<threads; i++) 653 threadList[i] = new Thread(new FunctionRunner(function, loops, i)); 654 655 long start = System.currentTimeMillis(); 656 for (int i=0; i<threads; i++) 657 threadList[i].start(); 658 for (int i=0; i<threads; i++) 659 threadList[i].join(); 660 return System.currentTimeMillis() - start; 661 662 } else { 663 return function.time(loops); // ms 664 } 665 } 666 667 668 /** 669 * Invoke the runtime's garbage collection procedure repeatedly until the 670 * amount of free memory stabilizes to within 10%. 671 */ gc()672 protected void gc() { 673 if (false) { 674 long last; 675 long free = 1; 676 Runtime runtime = Runtime.getRuntime(); 677 do { 678 runtime.gc(); 679 last = free; 680 free = runtime.freeMemory(); 681 } while (((double) Math.abs(free - last)) / free > 0.1); 682 // Wait for the change in free memory to drop under 10% 683 // between successive calls. 684 } 685 686 // From "Java Platform Performance". This is the procedure 687 // recommended by Javasoft. 688 try { 689 System.gc(); 690 Thread.sleep(100); 691 System.runFinalization(); 692 Thread.sleep(100); 693 694 System.gc(); 695 Thread.sleep(100); 696 System.runFinalization(); 697 Thread.sleep(100); 698 } catch (InterruptedException e) { 699 } 700 } 701 702 readToEOS(Reader reader)703 public static char[] readToEOS(Reader reader) { 704 ArrayList vec = new ArrayList(); 705 int count = 0; 706 int pos = 0; 707 final int MAXLENGTH = 0x8000; // max buffer size - 32K 708 int length = 0x80; // start with small buffers and work up 709 do { 710 pos = 0; 711 length = length >= MAXLENGTH ? MAXLENGTH : length * 2; 712 char[] buffer = new char[length]; 713 try { 714 do { 715 int n = reader.read(buffer, pos, length - pos); 716 if (n == -1) { 717 break; 718 } 719 pos += n; 720 } while (pos < length); 721 } 722 catch (IOException e) { 723 } 724 vec.add(buffer); 725 count += pos; 726 } while (pos == length); 727 728 char[] data = new char[count]; 729 pos = 0; 730 for (int i = 0; i < vec.size(); ++i) { 731 char[] buf = (char[]) vec.get(i); 732 int len = Math.min(buf.length, count - pos); 733 System.arraycopy(buf, 0, data, pos, len); 734 pos += len; 735 } 736 return data; 737 } readToEOS(InputStream stream)738 public static byte[] readToEOS(InputStream stream) { 739 740 ArrayList vec = new ArrayList(); 741 int count = 0; 742 int pos = 0; 743 final int MAXLENGTH = 0x8000; // max buffer size - 32K 744 int length = 0x80; // start with small buffers and work up 745 do { 746 pos = 0; 747 length = length >= MAXLENGTH ? MAXLENGTH : length * 2; 748 byte[] buffer = new byte[length]; 749 try { 750 do { 751 int n = stream.read(buffer, pos, length - pos); 752 if (n == -1) { 753 break; 754 } 755 pos += n; 756 } while (pos < length); 757 } 758 catch (IOException e) { 759 } 760 vec.add(buffer); 761 count += pos; 762 } while (pos == length); 763 764 765 byte[] data = new byte[count]; 766 pos = 0; 767 for (int i = 0; i < vec.size(); ++i) { 768 byte[] buf = (byte[]) vec.get(i); 769 int len = Math.min(buf.length, count - pos); 770 System.arraycopy(buf, 0, data, pos, len); 771 pos += len; 772 } 773 return data; 774 } 775 readLines(String filename, String srcEncoding, boolean bulkMode)776 protected String[] readLines(String filename, String srcEncoding, boolean bulkMode) { 777 FileInputStream fis = null; 778 InputStreamReader isr = null; 779 BufferedReader br = null; 780 try { 781 fis = new FileInputStream(filename); 782 isr = new InputStreamReader(fis, srcEncoding); 783 br = new BufferedReader(isr); 784 } catch (Exception e) { 785 System.err.println("Error: File access exception: " + e.getMessage() + "!"); 786 System.exit(1); 787 } 788 ArrayList list = new ArrayList(); 789 while (true) { 790 String line = null; 791 try { 792 line = readDataLine(br); 793 } catch (Exception e) { 794 System.err.println("Read File Error" + e.getMessage() + "!"); 795 System.exit(1); 796 } 797 if (line == null) break; 798 if (line.length() == 0) continue; 799 list.add(line); 800 } 801 802 int size = list.size(); 803 String[] lines = null; 804 805 if (bulkMode) { 806 lines = new String[1]; 807 StringBuffer buffer = new StringBuffer(""); 808 for (int i = 0; i < size; ++i) { 809 buffer.append((String) list.get(i)); 810 /*if (i < (size - 1)) { 811 buffer.append("\r\n"); 812 }*/ 813 } 814 lines[0] = buffer.toString(); 815 } else { 816 lines = new String[size]; 817 for (int i = 0; i < size; ++i) { 818 lines[i] = (String) list.get(i); 819 } 820 } 821 822 return lines; 823 } 824 readDataLine(BufferedReader br)825 public String readDataLine(BufferedReader br) throws Exception { 826 String originalLine = ""; 827 String line = ""; 828 try { 829 line = originalLine = br.readLine(); 830 if (line == null) return null; 831 if (line.length() > 0 && line.charAt(0) == 0xFEFF) line = line.substring(1); 832 int commentPos = line.indexOf('#'); 833 if (commentPos >= 0) line = line.substring(0, commentPos); 834 line = line.trim(); 835 } catch (Exception e) { 836 throw new Exception("Line \"{0}\", \"{1}\"" + originalLine + " " 837 + line + " " + e.toString()); 838 } 839 return line; 840 } 841 842 843 public static class BOMFreeReader extends Reader { 844 InputStreamReader reader; 845 String encoding; 846 int MAX_BOM_LENGTH = 5; 847 848 /** 849 * Creates a new reader, skipping a BOM associated with the given 850 * encoding. Equivalent to BOMFreeReader(in, null). 851 * 852 * @param in 853 * The input stream. 854 * @throws IOException 855 * Thrown if reading for a BOM causes an IOException. 856 */ BOMFreeReader(InputStream in)857 public BOMFreeReader(InputStream in) throws IOException { 858 this(in, null); 859 } 860 861 /** 862 * Creates a new reader, skipping a BOM associated with the given 863 * encoding. If encoding is null, attempts to detect the encoding by the 864 * BOM. 865 * 866 * @param in 867 * The input stream. 868 * @param encoding 869 * The encoding to use. Can be null. 870 * @throws IOException 871 * Thrown if reading for a BOM causes an IOException. 872 */ BOMFreeReader(InputStream in, String encoding)873 public BOMFreeReader(InputStream in, String encoding) throws IOException { 874 PushbackInputStream pushback = new PushbackInputStream(in, MAX_BOM_LENGTH); 875 this.encoding = encoding; 876 877 byte[] start = new byte[MAX_BOM_LENGTH]; 878 Arrays.fill(start, (byte)0xa5); 879 880 int amountRead = pushback.read(start, 0, MAX_BOM_LENGTH); 881 int bomLength = detectBOMLength(start); 882 if (amountRead > bomLength) 883 pushback.unread(start, bomLength, amountRead - bomLength); 884 885 reader = (encoding == null) ? new InputStreamReader(pushback) : new InputStreamReader(pushback, encoding); 886 } 887 888 /** 889 * Determines the length of a BOM in the beginning of start. Assumes 890 * start is at least a length 5 array. If encoding is null, the check 891 * will not be encoding specific and it will set the encoding of this 892 * BOMFreeReader. 893 * 894 * @param start 895 * The starting bytes. 896 * @param encoding 897 * The encoding. Can be null. 898 * @return The length of a detected BOM. 899 */ detectBOMLength(byte[] start)900 private int detectBOMLength(byte[] start) { 901 if ((encoding == null || "UTF-16BE".equals(encoding)) && start[0] == (byte) 0xFE && start[1] == (byte) 0xFF) { 902 if (encoding == null) this.encoding = "UTF-16BE"; 903 return 2; // "UTF-16BE"; 904 } else if (start[0] == (byte) 0xFF && start[1] == (byte) 0xFE) { 905 if ((encoding == null || "UTF-32LE".equals(encoding)) && start[2] == (byte) 0x00 906 && start[3] == (byte) 0x00) { 907 if (encoding == null) this.encoding = "UTF-32LE"; 908 return 4; // "UTF-32LE"; 909 } else if ((encoding == null || "UTF-16LE".equals(encoding))) { 910 if (encoding == null) this.encoding = "UTF-16LE"; 911 return 2; // "UTF-16LE"; 912 } 913 } else if ((encoding == null || "UTF-8".equals(encoding)) && start[0] == (byte) 0xEF 914 && start[1] == (byte) 0xBB && start[2] == (byte) 0xBF) { 915 if (encoding == null) this.encoding = "UTF-8"; 916 return 3; // "UTF-8"; 917 } else if ((encoding == null || "UTF-32BE".equals(encoding)) && start[0] == (byte) 0x00 918 && start[1] == (byte) 0x00 && start[2] == (byte) 0xFE && start[3] == (byte) 0xFF) { 919 if (encoding == null) this.encoding = "UTF-32BE"; 920 return 4; // "UTF-32BE"; 921 } else if ((encoding == null || "SCSU".equals(encoding)) && start[0] == (byte) 0x0E 922 && start[1] == (byte) 0xFE && start[2] == (byte) 0xFF) { 923 if (encoding == null) this.encoding = "SCSU"; 924 return 3; // "SCSU"; 925 } else if ((encoding == null || "BOCU-1".equals(encoding)) && start[0] == (byte) 0xFB 926 && start[1] == (byte) 0xEE && start[2] == (byte) 0x28) { 927 if (encoding == null) this.encoding = "BOCU-1"; 928 return 3; // "BOCU-1"; 929 } else if ((encoding == null || "UTF-7".equals(encoding)) && start[0] == (byte) 0x2B 930 && start[1] == (byte) 0x2F && start[2] == (byte) 0x76) { 931 if (start[3] == (byte) 0x38 && start[4] == (byte) 0x2D) { 932 if (encoding == null) this.encoding = "UTF-7"; 933 return 5; // "UTF-7"; 934 } else if (start[3] == (byte) 0x38 || start[3] == (byte) 0x39 || start[3] == (byte) 0x2B 935 || start[3] == (byte) 0x2F) { 936 if (encoding == null) this.encoding = "UTF-7"; 937 return 4; // "UTF-7"; 938 } 939 } else if ((encoding == null || "UTF-EBCDIC".equals(encoding)) && start[0] == (byte) 0xDD 940 && start[2] == (byte) 0x73 && start[2] == (byte) 0x66 && start[3] == (byte) 0x73) { 941 if (encoding == null) this.encoding = "UTF-EBCDIC"; 942 return 4; // "UTF-EBCDIC"; 943 } 944 945 /* no known Unicode signature byte sequence recognized */ 946 return 0; 947 } 948 read(char[] cbuf, int off, int len)949 public int read(char[] cbuf, int off, int len) throws IOException { 950 return reader.read(cbuf, off, len); 951 } 952 close()953 public void close() throws IOException { 954 reader.close(); 955 } 956 } 957 } 958 959 960 961 // eof 962