• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  **********************************************************************
5  * Copyright (c) 2002-2008, International Business Machines
6  * Corporation and others.  All Rights Reserved.
7  **********************************************************************
8  */
9 package com.ibm.icu.dev.test.perf;
10 
11 import java.io.BufferedReader;
12 import java.io.FileInputStream;
13 import java.io.IOException;
14 import java.io.InputStream;
15 import java.io.InputStreamReader;
16 import java.io.PushbackInputStream;
17 import java.io.Reader;
18 import java.lang.reflect.Method;
19 import java.util.ArrayList;
20 import java.util.Arrays;
21 import java.util.HashMap;
22 import java.util.HashSet;
23 import java.util.Iterator;
24 import java.util.Locale;
25 import java.util.Map;
26 import java.util.Set;
27 
28 import com.ibm.icu.dev.tool.UOption;
29 import com.ibm.icu.impl.LocaleUtility;
30 
31 /**
32  * Base class for performance testing framework. To use, the subclass can simply
33  * define one or more instance methods with names beginning with "test" (case
34  * ignored). The prototype of the method is
35  *
36  * PerfTest.Function testTheName()
37  *
38  * The actual performance test will execute on the returned Command object
39  * (refer to Command Pattern). To call a test from command line, the 'test'
40  * prefix of the test method name can be ignored/removed.
41  *
42  * In addition, the subclass should define a main() method that calls
43  * PerfTest.run() as defined here.
44  *
45  * If the subclasses uses any command line arguments (beyond those handled
46  * automatically by this class) then it should override PerfTest.setup() to
47  * handle its arguments. If the subclasses needs more sophisticated management
48  * for controlling finding/calling test method, it can replace the default
49  * implementation for PerfTest.testProvider before calling PerfTest.run().
50  *
51  * Example invocation: java -cp classes -verbose:gc
52  * com.ibm.icu.dev.test.perf.UnicodeSetPerf --gc --passes 4 --iterations 100
53  * UnicodeSetAdd [[:l:][:c:]]
54  *
55  * Example output: [GC 511K->192K(1984K), 0.0086170 secs] [GC 704K->353K(1984K),
56  * 0.0059619 secs] [Full GC 618K->371K(1984K), 0.0242779 secs] [Full GC
57  * 371K->371K(1984K), 0.0228649 secs] = testUnicodeSetAdd begin 100 =
58  * testUnicodeSetAdd end 11977 1109044 = testUnicodeSetAdd begin 100 =
59  * testUnicodeSetAdd end 12047 1109044 = testUnicodeSetAdd begin 100 =
60  * testUnicodeSetAdd end 11987 1109044 = testUnicodeSetAdd begin 100 =
61  * testUnicodeSetAdd end 11978 1109044
62  *
63  * The [] lines are emitted by the JVM as a result of the -verbose:gc switch.
64  *
65  * Lines beginning with '=' are emitted by PerfTest: = testUnicodeSetAdd begin
66  * 100 A 'begin' statement contains the name of the setup method, which
67  * determines what test function is measures, and the number of iterations that
68  * will be times. = testUnicodeSetAdd end 12047 1109044 An 'end' statement gives
69  * the name of the setup method again, and then two integers. The first is the
70  * total elapsed time in milliseconds, and the second is the number of events
71  * per iteration. In this example, the time per event is 12047 / (100 * 1109044)
72  * or 108.6 ns/event.
73  *
74  * Raw times are given as integer ms, because this is what the system measures.
75  *
76  * @author Alan Liu
77  * @since ICU 2.4
78  */
79 public abstract class PerfTest {
80     // Command-line options set these:
81     protected boolean verbose;
82     protected String sourceDir;
83     protected String fileName;
84 
85     // protected String resolvedFileName;
86     protected String encoding;
87     protected String testName;
88     protected boolean uselen;
89     protected int iterations;
90     protected int passes;
91     protected int time;
92     protected boolean line_mode;
93     protected boolean bulk_mode;
94     protected Locale locale;
95     protected boolean doPriorGC;
96     protected int threads;
97     protected int duration;
98     protected boolean action;
99 
100     protected TestCmdProvider testProvider = new TestPrefixProvider(this);
101 
102     static interface TestCmdProvider {
103         /**
104          * @return The names for all available test.
105          */
getAllTestCmdNames()106         public Set getAllTestCmdNames();
107 
108         /**
109          * @param name
110          * @return Whether the given name is a test name. The implementation may
111          *         have more sophisticated naming control here.
112          *         TestCmdProvider.isTestCmd() != Set.contains()
113          */
isTestCmd(String name)114         public boolean isTestCmd(String name);
115 
116         /**
117          * @param name
118          * @return the test Command or null
119          */
getTestCmd(String name)120         public PerfTest.Function getTestCmd(String name);
121     }
122 
123     /**
124      * Treat all method beginning with 'test' prefix (ignoring case) for given
125      * object as the test methods.
126      */
127     static class TestPrefixProvider implements TestCmdProvider {
128         private Map theTests = null; // Map<string(no case), string(with case)>
129         private Set orgNames = null; // shadow reference, ==theTests, for better output
130         private Object refer;
131 
TestPrefixProvider(Object theProvider)132         TestPrefixProvider(Object theProvider) {
133             refer = theProvider;
134         }
135 
getAllTestCmdNames()136         public Set getAllTestCmdNames() {
137             if (theTests == null) {
138                 theTests = new HashMap();
139                 orgNames = new HashSet();
140                 Method[] methods = refer.getClass().getDeclaredMethods();
141                 for (int i = 0; i < methods.length; i++) {
142                     String org = methods[i].getName();
143                     String name = org.toLowerCase(); // ignoring case
144                     // beginning with 'test'
145                     // Note: methods named 'test()' are ignored
146                     if (name.length() > 4 && name.startsWith("test")) {
147                         if (theTests.containsKey(name)) {
148                             throw new Error(
149                                     "Duplicate method name ignoring case: "
150                                             + name);
151                         }
152                         theTests.put(name, org);
153                         orgNames.add(org);
154                     }
155                 }
156             }
157             return orgNames; // beginning with 'test', keeping case
158         }
159 
160         /**
161          * The given name will map to a method of the same name, or a method
162          * named "test" + name. Case is ignored.
163          */
isTestCmd_impl(String name)164         private String isTestCmd_impl(String name) {
165             getAllTestCmdNames();
166             String tn1 = name.toLowerCase();
167             String tn2 = "test" + tn1;
168             if (theTests.containsKey(tn1)) {
169                 return tn1;
170             } else if (theTests.containsKey(tn2)) {
171                 return tn2;
172             }
173             return null;
174         }
175 
isTestCmd(String name)176         public boolean isTestCmd(String name) {
177             return isTestCmd_impl(name) != null;
178         }
179 
getTestCmd(String aname)180         public Function getTestCmd(String aname) {
181             String name = (String) theTests.get(isTestCmd_impl(aname));
182             if (name == null) {
183                 return null;
184             }
185 
186             try {
187                 Method m = refer.getClass().getDeclaredMethod(name,
188                         (Class[]) null);
189                 return (Function) m.invoke(refer, new Object[] {});
190             } catch (Exception e) {
191                 throw new Error(
192                         "TestPrefixProvider implementation error. Finding: "
193                                 + name, e);
194             }
195         }
196     }
197 
198     /**
199      * Subclasses of PerfTest will need to create subclasses of Function that
200      * define a call() method which contains the code to be timed. They then
201      * call setTestFunction() in their "Test..." method to establish this as the
202      * current test functor.
203      */
204     public abstract static class Function {
205 
206         /**
207          * Subclasses should implement this method to do the action to be
208          * measured if the action is thread-safe
209          */
call()210         public void call() { call(0); }
211 
212         /**
213          * Subclasses should implement this method if the action is not thread-safe
214          */
call(int i)215         public void call(int i) { call(); }
216 
217         /**
218          * Subclasses may implement this method to return positive integer
219          * indicating the number of operations in a single call to this object's
220          * call() method. If subclasses do not override this method, the default
221          * implementation returns 1.
222          */
getOperationsPerIteration()223         public long getOperationsPerIteration() {
224             return 1;
225         }
226 
227         /**
228          * Subclasses may implement this method to return either positive or
229          * negative integer indicating the number of events in a single call to
230          * this object's call() method. If subclasses do not override this
231          * method, the default implementation returns -1, indicating that events
232          * are not applicable to this test. e.g: Number of breaks / iterations
233          * for break iterator
234          */
getEventsPerIteration()235         public long getEventsPerIteration() {
236             return -1;
237         }
238 
239         /**
240          * Call call() n times in a tight loop and return the elapsed
241          * milliseconds. If n is small and call() is fast the return result may
242          * be zero. Small return values have limited meaningfulness, depending
243          * on the underlying VM and OS.
244          */
time(long n)245         public final long time(long n) {
246             long start, stop;
247             start = System.currentTimeMillis();
248             while (n-- > 0) {
249                 call();
250             }
251             stop = System.currentTimeMillis();
252             return stop - start; // ms
253         }
254 
255 
256         /**
257          * init is called each time before looping through call
258          */
init()259         public void init() {}
260 
261 
getID()262         public final int getID() {
263             return id;
264         }
265 
setID(int id)266         public final void setID(int id) {
267             this.id = id;
268         }
269 
270         private int id;
271     }
272 
273     private class FunctionRunner implements Runnable {
FunctionRunner(Function f, long loops, int id)274         public FunctionRunner(Function f, long loops, int id) {
275             this.f = f;
276             this.loops = loops;
277             this.id = id;
278         }
279 
run()280         public void run() {
281             long n = loops;
282             while (n-- > 0)
283                 f.call(id);
284         }
285 
286         private Function f;
287 
288         private long loops;
289         private int id;
290     }
291 
292 
293     /**
294      * Exception indicating a usage error.
295      */
296     public static class UsageException extends Exception {
297         /**
298          * For serialization
299          */
300         private static final long serialVersionUID = -1201256240606806242L;
301 
UsageException(String message)302         public UsageException(String message) {
303             super(message);
304         }
305 
UsageException()306         public UsageException() {
307             super();
308         }
309     }
310 
311     /**
312      * Constructor.
313      */
PerfTest()314     protected PerfTest() {
315     }
316 
317     /**
318      * Framework method. Default implementation does not parse any extra
319      * arguments. Subclasses may override this to parse extra arguments.
320      * Subclass implementations should NOT call the base class implementation.
321      */
setup(String[] args)322     protected void setup(String[] args) {
323         if (args.length > 0) {
324             throw new RuntimeException("Extra arguments received");
325         }
326     }
327 
328     /**
329      * These must be kept in sync with getOptions().
330      */
331     static final int HELP1 = 0;
332     static final int HELP2 = 1;
333     static final int VERBOSE = 2;
334     static final int SOURCEDIR = 3;
335     static final int ENCODING = 4;
336     static final int USELEN = 5;
337     static final int FILE_NAME = 6;
338     static final int PASSES = 7;
339     static final int ITERATIONS = 8;
340     static final int TIME = 9;
341     static final int LINE_MODE = 10;
342     static final int BULK_MODE = 11;
343     static final int LOCALE = 12;
344     static final int TEST_NAME = 13;
345     static final int THREADS = 14;
346     static final int DURATION = 15;
347     static final int ACTION = 16;
348 
349     // Options above here are identical to those in C; keep in sync with C
350     // Options below here are unique to Java; shift down as necessary
351     static final int GARBAGE_COLLECT = 17;
352     static final int LIST = 18;
353 
getOptions()354     UOption[] getOptions() {
355         return new UOption[] {
356                 UOption.HELP_H(),
357                 UOption.HELP_QUESTION_MARK(),
358                 UOption.VERBOSE(),
359                 UOption.SOURCEDIR(),
360                 UOption.ENCODING(),
361                 UOption.DEF("uselen",     'u', UOption.NO_ARG),
362                 UOption.DEF("filename",   'f', UOption.REQUIRES_ARG),
363                 UOption.DEF("passes",     'p', UOption.REQUIRES_ARG),
364                 UOption.DEF("iterations", 'i', UOption.REQUIRES_ARG),
365                 UOption.DEF("time",       't', UOption.REQUIRES_ARG),
366                 UOption.DEF("line-mode",  'l', UOption.NO_ARG),
367                 UOption.DEF("bulk-mode",  'b', UOption.NO_ARG),
368                 UOption.DEF("locale",     'L', UOption.REQUIRES_ARG),
369                 UOption.DEF("testname",   'T', UOption.REQUIRES_ARG),
370                 UOption.DEF("threads",    'r', UOption.REQUIRES_ARG),
371                 UOption.DEF("duration",   'd', UOption.NO_ARG),
372                 UOption.DEF("action",     'a', UOption.NO_ARG),
373 
374                 // Options above here are identical to those in C; keep in sync
375                 // Options below here are unique to Java
376 
377                 UOption.DEF("gc", 'g', UOption.NO_ARG),
378                 UOption.DEF("list", (char) -1, UOption.NO_ARG), };
379     }
380 
381     /**
382      * Subclasses should call this method in their main(). run() will in turn
383      * call setup() with any arguments it does not parse. This method parses the
384      * command line and runs the tests given on the command line, with the given
385      * parameters. See the class description for details.
386      */
run(String[] args)387     protected final void run(String[] args) throws Exception {
388         Set testList = parseOptions(args);
389 
390         // Run the tests
391         for (Iterator iter = testList.iterator(); iter.hasNext();) {
392             String meth = (String) iter.next();
393 
394             // Call meth to set up the test
395             // long eventsPerCall = -1;
396             Function testFunction = testProvider.getTestCmd(meth);
397             if (testFunction == null) {
398                 throw new RuntimeException(meth
399                         + " failed to return a test function");
400             }
401             long ops = testFunction.getOperationsPerIteration();
402             if (ops < 1) {
403                 throw new RuntimeException(meth
404                         + " returned an illegal operations/iteration()");
405             }
406 
407             long min_t = 1000000;
408             long t;
409             // long b = System.currentTimeMillis();
410             long calibration_iter = getIteration(meth, testFunction);
411             // System.out.println("The guess cost: " + (System.currentTimeMillis() - b)/1000. + " s.");
412 
413             // Calculate iterations for the specified duration/pass.
414             double timePerIter = performLoops(testFunction, calibration_iter)/1000./calibration_iter;
415             long iterationCount = (long) (duration/timePerIter + 0.5);
416 
417             for (int j = 0; j < passes; ++j) {
418                 long events = -1;
419                 if (verbose) {
420                     if (iterations > 0) {
421                         System.out.println("= " + meth + " begin " + iterations);
422                     } else {
423                         System.out.println("= " + meth + " begin " + time + " seconds");
424                     }
425                 } else if (!action) {
426                     System.out.println("= " + meth + " begin ");
427                 }
428 
429                 t = performLoops(testFunction, iterationCount);
430                 if (t < min_t) {
431                   min_t = t;
432                 }
433                 events = testFunction.getEventsPerIteration();
434 
435                 if (verbose) {
436                     if (events == -1) {
437                         System.out.println("= " + meth + " end " + (t / 1000.0) + " loops: " + iterationCount + " operations: "
438                                 + ops);
439                     } else {
440                         System.out.println("= " + meth + " end " + (t / 1000.0) + " loops: " + iterationCount + " operations: "
441                                 + ops + " events: " + events);
442                     }
443                 } else if (!action) {
444                     if (events == -1) {
445                         System.out.println("= " + meth + " end " + (t / 1000.0) + " " + iterationCount + " " + ops);
446                     } else {
447                         System.out.println("= " + meth + " end " + (t / 1000.0) + " " + iterationCount + " "
448                                 + ops + " " + events);
449                     }
450                 }
451             }
452             if (action) {
453                 // Print results in ndjson format for GHA Benchmark to process.
454                 System.out.println("{\"biggerIsBetter\":false,\"name\":\"" + meth +
455                         "\",\"unit\":\"ns/iter\",\"value\":" + (min_t*1E6) / (iterationCount*ops) + "}");
456             }
457         }
458     }
459 
460     /**
461      * @param args
462      * @return the method list to call
463      * @throws UsageException
464      */
parseOptions(String[] args)465     private Set parseOptions(String[] args) throws UsageException {
466 
467         doPriorGC = false;
468         encoding = "";
469         uselen = false;
470         fileName = null;
471         sourceDir = null;
472         line_mode = false;
473         verbose = false;
474         bulk_mode = false;
475         passes = iterations = time = -1;
476         locale = null;
477         testName = null;
478         threads = 1;
479         duration = 10;   // Default used by Perl scripts
480         action = false;  // If test is invoked on command line, includes GitHub Action
481 
482         UOption[] options = getOptions();
483         int remainingArgc = UOption.parseArgs(args, options);
484 
485         if (args.length == 0 || options[HELP1].doesOccur || options[HELP2].doesOccur)
486             throw new UsageException();
487 
488         if (options[LIST].doesOccur) {
489             System.err.println("Available tests:");
490             Set testNames = testProvider.getAllTestCmdNames();
491             for (Iterator iter = testNames.iterator(); iter.hasNext();) {
492                 String name = (String) iter.next();
493                 System.err.println(" " + name);
494             }
495             System.exit(0);
496         }
497 
498         if (options[TIME].doesOccur && options[ITERATIONS].doesOccur)
499             throw new UsageException("Cannot specify both '-t <seconds>' and '-i <iterations>'");
500         else if (!options[TIME].doesOccur && !options[ITERATIONS].doesOccur)
501             throw new UsageException("Either '-t <seconds>' or '-i <iterations>' must be specified");
502         else if (options[ITERATIONS].doesOccur) {
503             try {
504                 iterations = Integer.parseInt(options[ITERATIONS].value);
505             } catch (NumberFormatException ex) {
506                 throw new UsageException("'-i <iterations>' requires an integer number of iterations");
507             }
508         } else { //if (options[TIME].doesOccur)
509             try {
510                 time = Integer.parseInt(options[TIME].value);
511             } catch (NumberFormatException ex) {
512                 throw new UsageException("'-r <seconds>' requires an integer number of seconds");
513             }
514         }
515 
516         if (!options[PASSES].doesOccur)
517             throw new UsageException("'-p <passes>' must be specified");
518         else
519             passes = Integer.parseInt(options[PASSES].value);
520 
521         if (options[LINE_MODE].doesOccur && options[BULK_MODE].doesOccur)
522             throw new UsageException("Cannot specify both '-l' (line mode) and '-b' (bulk mode)");
523 
524         if (options[THREADS].doesOccur) {
525             try {
526                 threads = Integer.parseInt(options[THREADS].value);
527             } catch (NumberFormatException ex) {
528                 throw new UsageException("'-r <threads>' requires an integer number of threads");
529             }
530             if (threads <= 0)
531                 throw new UsageException("'-r <threads>' requires an number of threads greater than 0");
532         }
533         if (options[DURATION].doesOccur) {
534             try {
535                 duration  = Integer.parseInt(options[DURATION].value);
536             } catch (NumberFormatException ex) {
537                 throw new UsageException("'-d <duration>' requires an integer number of threads");
538             }
539         }
540 
541         line_mode = options[LINE_MODE].doesOccur;
542         bulk_mode = options[BULK_MODE].doesOccur;
543         verbose   = options[VERBOSE].doesOccur;
544         uselen    = options[USELEN].doesOccur;
545         doPriorGC = options[GARBAGE_COLLECT].doesOccur;
546         action    = options[ACTION].doesOccur;
547 
548         if (options[SOURCEDIR].doesOccur) sourceDir = options[SOURCEDIR].value;
549         if (options[ENCODING].doesOccur)  encoding  = options[ENCODING].value;
550         if (options[FILE_NAME].doesOccur) fileName  = options[FILE_NAME].value;
551         if (options[TEST_NAME].doesOccur) testName  = options[TEST_NAME].value;
552         if (options[LOCALE].doesOccur)    locale    = LocaleUtility.getLocaleFromName(options[LOCALE].value);
553 
554 
555         // build the test list
556         Set testList = new HashSet();
557         int i, j;
558         for (i = 0; i < remainingArgc; ++i) {
559             // is args[i] a method name?
560             if (testProvider.isTestCmd(args[i])) {
561                 testList.add(args[i]);
562             } else {
563                 // args[i] is neither a method name nor a number. Pass
564                 // everything from here on through to the subclass via
565                 // setup().
566                 break;
567             }
568         }
569 
570         // if no tests were specified, put all the tests in the test list
571         if (testList.size() == 0) {
572             Set testNames = testProvider.getAllTestCmdNames();
573             Iterator iter = testNames.iterator();
574             while (iter.hasNext())
575                 testList.add((String)iter.next());
576         }
577 
578         // pass remaining arguments, if any, through to the subclass via setup() method.
579         String[] subclassArgs = new String[remainingArgc - i];
580         for (j = 0; i < remainingArgc; j++)
581             subclassArgs[j] = args[i++];
582         setup(subclassArgs);
583 
584         // Put the heap in a consistent state
585         if (doPriorGC)
586             gc();
587 
588         return testList;
589     }
590 
591     /**
592      * Translate '-t time' to iterations (or just return '-i iteration')
593      *
594      * @param meth
595      * @param fn
596      * @return rt
597      */
getIteration(String methName, Function fn)598     private long getIteration(String methName, Function fn) throws InterruptedException {
599         long iter = 0;
600         if (iterations > 0) {
601             iter = iterations;
602         } else { // iterations not in input, calibrate iterations for given time.
603             // Translate time to iteration
604             // Assuming there is a linear relation between time and iterations
605 
606             if (verbose) {
607                 System.out.println("= " + methName + " calibrating " + time
608                         + " seconds");
609             }
610 
611             long base = time * 1000;
612             // System.out.println("base :" + base);
613             long seed = 1;
614             long t = 0;
615             while (t < base * 0.9 || base * 1.1 < t) { // + - 10%
616                 if (iter == 0 || t == 0) {
617                     iter = seed; // start up from 1
618                     seed *= 100; // if the method is too fast (t == 0),
619                     // multiply 100 times
620                     // 100 is rational because 'base' is always larger than 1000
621                 } else {
622                     // If 't' is large enough, use linear function to calculate
623                     // new iteration
624                     //
625                     // new iter(base) old iter
626                     // -------------- = -------- = k
627                     // new time old time
628                     //
629                     // System.out.println("before guess t: " + t);
630                     // System.out.println("before guess iter: " + iter);
631                     iter = (long) ((double) iter / t * base); // avoid long
632                     // cut, eg. 1/10
633                     // == 0
634                     if (iter == 0) {
635                         throw new RuntimeException(
636                                 "Unable to converge on desired duration");
637                     }
638                 }
639                 t = performLoops(fn, iter);
640             }
641             // System.out.println("final t : " + t);
642             // System.out.println("final i : " + iter);
643         }
644         return iter;
645     }
646 
647 
performLoops(Function function, long loops)648     private long performLoops(Function function, long loops) throws InterruptedException {
649         function.init();
650         if (threads > 1) {
651             Thread[] threadList = new Thread[threads];
652             for (int i=0; i<threads; i++)
653                 threadList[i] = new Thread(new FunctionRunner(function, loops, i));
654 
655             long start = System.currentTimeMillis();
656             for (int i=0; i<threads; i++)
657                 threadList[i].start();
658             for (int i=0; i<threads; i++)
659                 threadList[i].join();
660             return System.currentTimeMillis() - start;
661 
662         } else {
663             return function.time(loops); // ms
664         }
665     }
666 
667 
668     /**
669      * Invoke the runtime's garbage collection procedure repeatedly until the
670      * amount of free memory stabilizes to within 10%.
671      */
gc()672     protected void gc() {
673         if (false) {
674             long last;
675             long free = 1;
676             Runtime runtime = Runtime.getRuntime();
677             do {
678                 runtime.gc();
679                 last = free;
680                 free = runtime.freeMemory();
681             } while (((double) Math.abs(free - last)) / free > 0.1);
682             // Wait for the change in free memory to drop under 10%
683             // between successive calls.
684         }
685 
686         // From "Java Platform Performance". This is the procedure
687         // recommended by Javasoft.
688         try {
689             System.gc();
690             Thread.sleep(100);
691             System.runFinalization();
692             Thread.sleep(100);
693 
694             System.gc();
695             Thread.sleep(100);
696             System.runFinalization();
697             Thread.sleep(100);
698         } catch (InterruptedException e) {
699         }
700     }
701 
702 
readToEOS(Reader reader)703     public static char[] readToEOS(Reader reader) {
704         ArrayList vec = new ArrayList();
705         int count = 0;
706         int pos = 0;
707         final int MAXLENGTH = 0x8000; // max buffer size - 32K
708         int length = 0x80; // start with small buffers and work up
709         do {
710             pos = 0;
711             length = length >= MAXLENGTH ? MAXLENGTH : length * 2;
712             char[] buffer = new char[length];
713             try {
714                 do {
715                     int n = reader.read(buffer, pos, length - pos);
716                     if (n == -1) {
717                         break;
718                     }
719                     pos += n;
720                 } while (pos < length);
721             }
722             catch (IOException e) {
723             }
724             vec.add(buffer);
725             count += pos;
726         } while (pos == length);
727 
728         char[] data = new char[count];
729         pos = 0;
730         for (int i = 0; i < vec.size(); ++i) {
731             char[] buf = (char[]) vec.get(i);
732             int len = Math.min(buf.length, count - pos);
733             System.arraycopy(buf, 0, data, pos, len);
734             pos += len;
735         }
736         return data;
737     }
readToEOS(InputStream stream)738     public static byte[] readToEOS(InputStream stream) {
739 
740         ArrayList vec = new ArrayList();
741         int count = 0;
742         int pos = 0;
743         final int MAXLENGTH = 0x8000; // max buffer size - 32K
744         int length = 0x80; // start with small buffers and work up
745         do {
746             pos = 0;
747             length = length >= MAXLENGTH ? MAXLENGTH : length * 2;
748             byte[] buffer = new byte[length];
749             try {
750                 do {
751                     int n = stream.read(buffer, pos, length - pos);
752                     if (n == -1) {
753                         break;
754                     }
755                     pos += n;
756                 } while (pos < length);
757             }
758             catch (IOException e) {
759             }
760             vec.add(buffer);
761             count += pos;
762         } while (pos == length);
763 
764 
765         byte[] data = new byte[count];
766         pos = 0;
767         for (int i = 0; i < vec.size(); ++i) {
768             byte[] buf = (byte[]) vec.get(i);
769             int len = Math.min(buf.length, count - pos);
770             System.arraycopy(buf, 0, data, pos, len);
771             pos += len;
772         }
773         return data;
774     }
775 
readLines(String filename, String srcEncoding, boolean bulkMode)776     protected String[] readLines(String filename, String srcEncoding, boolean bulkMode) {
777         FileInputStream fis = null;
778         InputStreamReader isr = null;
779         BufferedReader br = null;
780         try {
781             fis = new FileInputStream(filename);
782             isr = new InputStreamReader(fis, srcEncoding);
783             br = new BufferedReader(isr);
784         } catch (Exception e) {
785             System.err.println("Error: File access exception: " + e.getMessage() + "!");
786             System.exit(1);
787         }
788         ArrayList list = new ArrayList();
789         while (true) {
790             String line = null;
791             try {
792                 line = readDataLine(br);
793             } catch (Exception e) {
794                 System.err.println("Read File Error" + e.getMessage() + "!");
795                 System.exit(1);
796             }
797             if (line == null) break;
798             if (line.length() == 0) continue;
799             list.add(line);
800         }
801 
802         int size = list.size();
803         String[] lines = null;
804 
805         if (bulkMode) {
806             lines = new String[1];
807             StringBuffer buffer = new StringBuffer("");
808             for (int i = 0; i < size; ++i) {
809                 buffer.append((String) list.get(i));
810                 /*if (i < (size - 1)) {
811                     buffer.append("\r\n");
812                 }*/
813             }
814             lines[0] = buffer.toString();
815         } else {
816             lines = new String[size];
817             for (int i = 0; i < size; ++i) {
818                 lines[i] = (String) list.get(i);
819             }
820         }
821 
822         return lines;
823     }
824 
readDataLine(BufferedReader br)825     public String readDataLine(BufferedReader br) throws Exception {
826         String originalLine = "";
827         String line = "";
828         try {
829             line = originalLine = br.readLine();
830             if (line == null) return null;
831             if (line.length() > 0 && line.charAt(0) == 0xFEFF) line = line.substring(1);
832             int commentPos = line.indexOf('#');
833             if (commentPos >= 0) line = line.substring(0, commentPos);
834             line = line.trim();
835         } catch (Exception e) {
836             throw new Exception("Line \"{0}\",  \"{1}\"" + originalLine + " "
837                     + line + " " + e.toString());
838         }
839         return line;
840     }
841 
842 
843     public static class BOMFreeReader extends Reader {
844         InputStreamReader reader;
845         String encoding;
846         int MAX_BOM_LENGTH = 5;
847 
848         /**
849          * Creates a new reader, skipping a BOM associated with the given
850          * encoding. Equivalent to BOMFreeReader(in, null).
851          *
852          * @param in
853          *            The input stream.
854          * @throws IOException
855          *             Thrown if reading for a BOM causes an IOException.
856          */
BOMFreeReader(InputStream in)857         public BOMFreeReader(InputStream in) throws IOException {
858             this(in, null);
859         }
860 
861         /**
862          * Creates a new reader, skipping a BOM associated with the given
863          * encoding. If encoding is null, attempts to detect the encoding by the
864          * BOM.
865          *
866          * @param in
867          *            The input stream.
868          * @param encoding
869          *            The encoding to use. Can be null.
870          * @throws IOException
871          *             Thrown if reading for a BOM causes an IOException.
872          */
BOMFreeReader(InputStream in, String encoding)873         public BOMFreeReader(InputStream in, String encoding) throws IOException {
874             PushbackInputStream pushback = new PushbackInputStream(in, MAX_BOM_LENGTH);
875             this.encoding = encoding;
876 
877             byte[] start = new byte[MAX_BOM_LENGTH];
878             Arrays.fill(start, (byte)0xa5);
879 
880             int amountRead = pushback.read(start, 0, MAX_BOM_LENGTH);
881             int bomLength = detectBOMLength(start);
882             if (amountRead > bomLength)
883                 pushback.unread(start, bomLength, amountRead - bomLength);
884 
885             reader = (encoding == null) ? new InputStreamReader(pushback) : new InputStreamReader(pushback, encoding);
886         }
887 
888         /**
889          * Determines the length of a BOM in the beginning of start. Assumes
890          * start is at least a length 5 array. If encoding is null, the check
891          * will not be encoding specific and it will set the encoding of this
892          * BOMFreeReader.
893          *
894          * @param start
895          *            The starting bytes.
896          * @param encoding
897          *            The encoding. Can be null.
898          * @return The length of a detected BOM.
899          */
detectBOMLength(byte[] start)900         private int detectBOMLength(byte[] start) {
901             if ((encoding == null || "UTF-16BE".equals(encoding)) && start[0] == (byte) 0xFE && start[1] == (byte) 0xFF) {
902                 if (encoding == null) this.encoding = "UTF-16BE";
903                 return 2; // "UTF-16BE";
904             } else if (start[0] == (byte) 0xFF && start[1] == (byte) 0xFE) {
905                 if ((encoding == null || "UTF-32LE".equals(encoding)) && start[2] == (byte) 0x00
906                         && start[3] == (byte) 0x00) {
907                     if (encoding == null) this.encoding = "UTF-32LE";
908                     return 4; // "UTF-32LE";
909                 } else if ((encoding == null || "UTF-16LE".equals(encoding))) {
910                     if (encoding == null) this.encoding = "UTF-16LE";
911                     return 2; // "UTF-16LE";
912                 }
913             } else if ((encoding == null || "UTF-8".equals(encoding)) && start[0] == (byte) 0xEF
914                     && start[1] == (byte) 0xBB && start[2] == (byte) 0xBF) {
915                 if (encoding == null) this.encoding = "UTF-8";
916                 return 3; // "UTF-8";
917             } else if ((encoding == null || "UTF-32BE".equals(encoding)) && start[0] == (byte) 0x00
918                     && start[1] == (byte) 0x00 && start[2] == (byte) 0xFE && start[3] == (byte) 0xFF) {
919                 if (encoding == null) this.encoding = "UTF-32BE";
920                 return 4; // "UTF-32BE";
921             } else if ((encoding == null || "SCSU".equals(encoding)) && start[0] == (byte) 0x0E
922                     && start[1] == (byte) 0xFE && start[2] == (byte) 0xFF) {
923                 if (encoding == null) this.encoding = "SCSU";
924                 return 3; // "SCSU";
925             } else if ((encoding == null || "BOCU-1".equals(encoding)) && start[0] == (byte) 0xFB
926                     && start[1] == (byte) 0xEE && start[2] == (byte) 0x28) {
927                 if (encoding == null) this.encoding = "BOCU-1";
928                 return 3; // "BOCU-1";
929             } else if ((encoding == null || "UTF-7".equals(encoding)) && start[0] == (byte) 0x2B
930                     && start[1] == (byte) 0x2F && start[2] == (byte) 0x76) {
931                 if (start[3] == (byte) 0x38 && start[4] == (byte) 0x2D) {
932                     if (encoding == null) this.encoding = "UTF-7";
933                     return 5; // "UTF-7";
934                 } else if (start[3] == (byte) 0x38 || start[3] == (byte) 0x39 || start[3] == (byte) 0x2B
935                         || start[3] == (byte) 0x2F) {
936                     if (encoding == null) this.encoding = "UTF-7";
937                     return 4; // "UTF-7";
938                 }
939             } else if ((encoding == null || "UTF-EBCDIC".equals(encoding)) && start[0] == (byte) 0xDD
940                     && start[2] == (byte) 0x73 && start[2] == (byte) 0x66 && start[3] == (byte) 0x73) {
941                 if (encoding == null) this.encoding = "UTF-EBCDIC";
942                 return 4; // "UTF-EBCDIC";
943             }
944 
945             /* no known Unicode signature byte sequence recognized */
946             return 0;
947         }
948 
read(char[] cbuf, int off, int len)949         public int read(char[] cbuf, int off, int len) throws IOException {
950             return reader.read(cbuf, off, len);
951         }
952 
close()953         public void close() throws IOException {
954             reader.close();
955         }
956     }
957 }
958 
959 
960 
961 // eof
962