• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  **********************************************************************
5  * Copyright (c) 2002-2008, International Business Machines
6  * Corporation and others.  All Rights Reserved.
7  **********************************************************************
8  */
9 package com.ibm.icu.dev.test.perf;
10 
11 import java.io.BufferedReader;
12 import java.io.FileInputStream;
13 import java.io.IOException;
14 import java.io.InputStream;
15 import java.io.InputStreamReader;
16 import java.io.PushbackInputStream;
17 import java.io.Reader;
18 import java.lang.reflect.Method;
19 import java.util.ArrayList;
20 import java.util.Arrays;
21 import java.util.HashMap;
22 import java.util.HashSet;
23 import java.util.Iterator;
24 import java.util.Locale;
25 import java.util.Map;
26 import java.util.Set;
27 
28 import com.ibm.icu.dev.tool.UOption;
29 import com.ibm.icu.impl.LocaleUtility;
30 
31 /**
32  * Base class for performance testing framework. To use, the subclass can simply
33  * define one or more instance methods with names beginning with "test" (case
34  * ignored). The prototype of the method is
35  *
36  * PerfTest.Function testTheName()
37  *
38  * The actual performance test will execute on the returned Commond object
39  * (refer to Command Pattern). To call a test from command line, the 'test'
40  * prefix of the test method name can be ignored/removed.
41  *
42  * In addition, the subclass should define a main() method that calls
43  * PerfTest.run() as defined here.
44  *
45  * If the subclasses uses any command line arguments (beyond those handled
46  * automatically by this calss) then it should override PerfTest.setup() to
47  * handle its arguments. If the subclasse needs more sophisticated management
48  * for controlling finding/calling test method, it can replace the default
49  * implementation for PerfTest.testProvider before calling PerfTest.run().
50  *
51  * Example invocation: java -cp classes -verbose:gc
52  * com.ibm.icu.dev.test.perf.UnicodeSetPerf --gc --passes 4 --iterations 100
53  * UnicodeSetAdd [[:l:][:c:]]
54  *
55  * Example output: [GC 511K->192K(1984K), 0.0086170 secs] [GC 704K->353K(1984K),
56  * 0.0059619 secs] [Full GC 618K->371K(1984K), 0.0242779 secs] [Full GC
57  * 371K->371K(1984K), 0.0228649 secs] = testUnicodeSetAdd begin 100 =
58  * testUnicodeSetAdd end 11977 1109044 = testUnicodeSetAdd begin 100 =
59  * testUnicodeSetAdd end 12047 1109044 = testUnicodeSetAdd begin 100 =
60  * testUnicodeSetAdd end 11987 1109044 = testUnicodeSetAdd begin 100 =
61  * testUnicodeSetAdd end 11978 1109044
62  *
63  * The [] lines are emitted by the JVM as a result of the -verbose:gc switch.
64  *
65  * Lines beginning with '=' are emitted by PerfTest: = testUnicodeSetAdd begin
66  * 100 A 'begin' statement contains the name of the setup method, which
67  * determines what test function is measures, and the number of iterations that
68  * will be times. = testUnicodeSetAdd end 12047 1109044 An 'end' statement gives
69  * the name of the setup method again, and then two integers. The first is the
70  * total elapsed time in milliseconds, and the second is the number of events
71  * per iteration. In this example, the time per event is 12047 / (100 * 1109044)
72  * or 108.6 ns/event.
73  *
74  * Raw times are given as integer ms, because this is what the system measures.
75  *
76  * @author Alan Liu
77  * @since ICU 2.4
78  */
79 public abstract class PerfTest {
80     // Command-line options set these:
81     protected boolean verbose;
82     protected String sourceDir;
83     protected String fileName;
84 
85     // protected String resolvedFileName;
86     protected String encoding;
87     protected String testName;
88     protected boolean uselen;
89     protected int iterations;
90     protected int passes;
91     protected int time;
92     protected boolean line_mode;
93     protected boolean bulk_mode;
94     protected Locale locale;
95     protected boolean doPriorGC;
96     protected int threads;
97 
98     protected TestCmdProvider testProvider = new TestPrefixProvider(this);
99 
100     static interface TestCmdProvider {
101         /**
102          * @return The names for all available test.
103          */
getAllTestCmdNames()104         public Set getAllTestCmdNames();
105 
106         /**
107          * @param name
108          * @return Whether the given name is a test name. The implementation may
109          *         have more sophisticated naming control here.
110          *         TestCmdProvider.isTestCmd() != Set.contains()
111          */
isTestCmd(String name)112         public boolean isTestCmd(String name);
113 
114         /**
115          * @param name
116          * @return the test Command or null
117          */
getTestCmd(String name)118         public PerfTest.Function getTestCmd(String name);
119     }
120 
121     /**
122      * Treat all method beginning with 'test' prefix (ignoring case) for given
123      * object as the test methods.
124      */
125     static class TestPrefixProvider implements TestCmdProvider {
126         private Map theTests = null; // Map<string(no case), string(with case)>
127         private Set orgNames = null; // shadow reference, ==theTests, for better output
128         private Object refer;
129 
TestPrefixProvider(Object theProvider)130         TestPrefixProvider(Object theProvider) {
131             refer = theProvider;
132         }
133 
getAllTestCmdNames()134         public Set getAllTestCmdNames() {
135             if (theTests == null) {
136                 theTests = new HashMap();
137                 orgNames = new HashSet();
138                 Method[] methods = refer.getClass().getDeclaredMethods();
139                 for (int i = 0; i < methods.length; i++) {
140                     String org = methods[i].getName();
141                     String name = org.toLowerCase(); // ignoring case
142                     // beginning with 'test'
143                     // Note: methods named 'test()' are ignored
144                     if (name.length() > 4 && name.startsWith("test")) {
145                         if (theTests.containsKey(name)) {
146                             throw new Error(
147                                     "Duplicate method name ignoring case: "
148                                             + name);
149                         }
150                         theTests.put(name, org);
151                         orgNames.add(org);
152                     }
153                 }
154             }
155             return orgNames; // beginning with 'test', keeping case
156         }
157 
158         /**
159          * The given name will map to a method of the same name, or a method
160          * named "test" + name. Case is ignored.
161          */
isTestCmd_impl(String name)162         private String isTestCmd_impl(String name) {
163             getAllTestCmdNames();
164             String tn1 = name.toLowerCase();
165             String tn2 = "test" + tn1;
166             if (theTests.containsKey(tn1)) {
167                 return tn1;
168             } else if (theTests.containsKey(tn2)) {
169                 return tn2;
170             }
171             return null;
172         }
173 
isTestCmd(String name)174         public boolean isTestCmd(String name) {
175             return isTestCmd_impl(name) != null;
176         }
177 
getTestCmd(String aname)178         public Function getTestCmd(String aname) {
179             String name = (String) theTests.get(isTestCmd_impl(aname));
180             if (name == null) {
181                 return null;
182             }
183 
184             try {
185                 Method m = refer.getClass().getDeclaredMethod(name,
186                         (Class[]) null);
187                 return (Function) m.invoke(refer, new Object[] {});
188             } catch (Exception e) {
189                 throw new Error(
190                         "TestPrefixProvider implementation error. Finding: "
191                                 + name, e);
192             }
193         }
194     }
195 
196     /**
197      * Subclasses of PerfTest will need to create subclasses of Function that
198      * define a call() method which contains the code to be timed. They then
199      * call setTestFunction() in their "Test..." method to establish this as the
200      * current test functor.
201      */
202     public abstract static class Function {
203 
204         /**
205          * Subclasses should implement this method to do the action to be
206          * measured if the action is thread-safe
207          */
call()208         public void call() { call(0); }
209 
210         /**
211          * Subclasses should implement this method if the action is not thread-safe
212          */
call(int i)213         public void call(int i) { call(); }
214 
215         /**
216          * Subclasses may implement this method to return positive integer
217          * indicating the number of operations in a single call to this object's
218          * call() method. If subclasses do not override this method, the default
219          * implementation returns 1.
220          */
getOperationsPerIteration()221         public long getOperationsPerIteration() {
222             return 1;
223         }
224 
225         /**
226          * Subclasses may implement this method to return either positive or
227          * negative integer indicating the number of events in a single call to
228          * this object's call() method. If subclasses do not override this
229          * method, the default implementation returns -1, indicating that events
230          * are not applicable to this test. e.g: Number of breaks / iterations
231          * for break iterator
232          */
getEventsPerIteration()233         public long getEventsPerIteration() {
234             return -1;
235         }
236 
237         /**
238          * Call call() n times in a tight loop and return the elapsed
239          * milliseconds. If n is small and call() is fast the return result may
240          * be zero. Small return values have limited meaningfulness, depending
241          * on the underlying VM and OS.
242          */
time(long n)243         public final long time(long n) {
244             long start, stop;
245             start = System.currentTimeMillis();
246             while (n-- > 0) {
247                 call();
248             }
249             stop = System.currentTimeMillis();
250             return stop - start; // ms
251         }
252 
253 
254         /**
255          * init is called each time before looping through call
256          */
init()257         public void init() {}
258 
259 
getID()260         public final int getID() {
261             return id;
262         }
263 
setID(int id)264         public final void setID(int id) {
265             this.id = id;
266         }
267 
268         private int id;
269     }
270 
271     private class FunctionRunner implements Runnable {
FunctionRunner(Function f, long loops, int id)272         public FunctionRunner(Function f, long loops, int id) {
273             this.f = f;
274             this.loops = loops;
275             this.id = id;
276         }
277 
run()278         public void run() {
279             long n = loops;
280             while (n-- > 0)
281                 f.call(id);
282         }
283 
284         private Function f;
285 
286         private long loops;
287         private int id;
288     }
289 
290 
291     /**
292      * Exception indicating a usage error.
293      */
294     public static class UsageException extends Exception {
295         /**
296          * For serialization
297          */
298         private static final long serialVersionUID = -1201256240606806242L;
299 
UsageException(String message)300         public UsageException(String message) {
301             super(message);
302         }
303 
UsageException()304         public UsageException() {
305             super();
306         }
307     }
308 
309     /**
310      * Constructor.
311      */
PerfTest()312     protected PerfTest() {
313     }
314 
315     /**
316      * Framework method. Default implementation does not parse any extra
317      * arguments. Subclasses may override this to parse extra arguments.
318      * Subclass implementations should NOT call the base class implementation.
319      */
setup(String[] args)320     protected void setup(String[] args) {
321         if (args.length > 0) {
322             throw new RuntimeException("Extra arguments received");
323         }
324     }
325 
326     /**
327      * These must be kept in sync with getOptions().
328      */
329     static final int HELP1 = 0;
330     static final int HELP2 = 1;
331     static final int VERBOSE = 2;
332     static final int SOURCEDIR = 3;
333     static final int ENCODING = 4;
334     static final int USELEN = 5;
335     static final int FILE_NAME = 6;
336     static final int PASSES = 7;
337     static final int ITERATIONS = 8;
338     static final int TIME = 9;
339     static final int LINE_MODE = 10;
340     static final int BULK_MODE = 11;
341     static final int LOCALE = 12;
342     static final int TEST_NAME = 13;
343     static final int THREADS = 14;
344 
345     // Options above here are identical to those in C; keep in sync with C
346     // Options below here are unique to Java; shift down as necessary
347     static final int GARBAGE_COLLECT = 14;
348     static final int LIST = 15;
349 
getOptions()350     UOption[] getOptions() {
351         return new UOption[] {
352                 UOption.HELP_H(),
353                 UOption.HELP_QUESTION_MARK(),
354                 UOption.VERBOSE(),
355                 UOption.SOURCEDIR(),
356                 UOption.ENCODING(),
357                 UOption.DEF("uselen",     'u', UOption.NO_ARG),
358                 UOption.DEF("filename",   'f', UOption.REQUIRES_ARG),
359                 UOption.DEF("passes",     'p', UOption.REQUIRES_ARG),
360                 UOption.DEF("iterations", 'i', UOption.REQUIRES_ARG),
361                 UOption.DEF("time",       't', UOption.REQUIRES_ARG),
362                 UOption.DEF("line-mode",  'l', UOption.NO_ARG),
363                 UOption.DEF("bulk-mode",  'b', UOption.NO_ARG),
364                 UOption.DEF("locale",     'L', UOption.REQUIRES_ARG),
365                 UOption.DEF("testname",   'T', UOption.REQUIRES_ARG),
366                 UOption.DEF("threads",    'r', UOption.REQUIRES_ARG),
367 
368                 // Options above here are identical to those in C; keep in sync
369                 // Options below here are unique to Java
370 
371                 UOption.DEF("gc", 'g', UOption.NO_ARG),
372                 UOption.DEF("list", (char) -1, UOption.NO_ARG), };
373     }
374 
375     /**
376      * Subclasses should call this method in their main(). run() will in turn
377      * call setup() with any arguments it does not parse. This method parses the
378      * command line and runs the tests given on the command line, with the given
379      * parameters. See the class description for details.
380      */
run(String[] args)381     protected final void run(String[] args) throws Exception {
382         Set testList = parseOptions(args);
383 
384         // Run the tests
385         for (Iterator iter = testList.iterator(); iter.hasNext();) {
386             String meth = (String) iter.next();
387 
388             // Call meth to set up the test
389             // long eventsPerCall = -1;
390             Function testFunction = testProvider.getTestCmd(meth);
391             if (testFunction == null) {
392                 throw new RuntimeException(meth
393                         + " failed to return a test function");
394             }
395             if (testFunction.getOperationsPerIteration() < 1) {
396                 throw new RuntimeException(meth
397                         + " returned an illegal operations/iteration()");
398             }
399 
400             long t;
401             // long b = System.currentTimeMillis();
402             long loops = getIteration(meth, testFunction);
403             // System.out.println("The guess cost: " + (System.currentTimeMillis() - b)/1000. + " s.");
404 
405             for (int j = 0; j < passes; ++j) {
406                 long events = -1;
407                 if (verbose) {
408                     if (iterations > 0) {
409                         System.out.println("= " + meth + " begin " + iterations);
410                     } else {
411                         System.out.println("= " + meth + " begin " + time + " seconds");
412                     }
413                 } else {
414                     System.out.println("= " + meth + " begin ");
415                 }
416 
417                 t = performLoops(testFunction, loops);
418 
419                 events = testFunction.getEventsPerIteration();
420 
421                 if (verbose) {
422                     if (events == -1) {
423                         System.out.println("= " + meth + " end " + (t / 1000.0) + " loops: " + loops + " operations: "
424                                 + testFunction.getOperationsPerIteration());
425                     } else {
426                         System.out.println("= " + meth + " end " + (t / 1000.0) + " loops: " + loops + " operations: "
427                                 + testFunction.getOperationsPerIteration() + " events: " + events);
428                     }
429                 } else {
430                     if (events == -1) {
431                         System.out.println("= " + meth + " end " + (t / 1000.0) + " " + loops + " "
432                                 + testFunction.getOperationsPerIteration());
433                     } else {
434                         System.out.println("= " + meth + " end " + (t / 1000.0) + " " + loops + " "
435                                 + testFunction.getOperationsPerIteration() + " " + events);
436                     }
437                 }
438 
439             }
440         }
441     }
442 
443     /**
444      * @param args
445      * @return the method list to call
446      * @throws UsageException
447      */
parseOptions(String[] args)448     private Set parseOptions(String[] args) throws UsageException {
449 
450         doPriorGC = false;
451         encoding = "";
452         uselen = false;
453         fileName = null;
454         sourceDir = null;
455         line_mode = false;
456         verbose = false;
457         bulk_mode = false;
458         passes = iterations = time = -1;
459         locale = null;
460         testName = null;
461         threads = 1;
462 
463         UOption[] options = getOptions();
464         int remainingArgc = UOption.parseArgs(args, options);
465 
466         if (args.length == 0 || options[HELP1].doesOccur || options[HELP2].doesOccur)
467             throw new UsageException();
468 
469         if (options[LIST].doesOccur) {
470             System.err.println("Available tests:");
471             Set testNames = testProvider.getAllTestCmdNames();
472             for (Iterator iter = testNames.iterator(); iter.hasNext();) {
473                 String name = (String) iter.next();
474                 System.err.println(" " + name);
475             }
476             System.exit(0);
477         }
478 
479         if (options[TIME].doesOccur && options[ITERATIONS].doesOccur)
480             throw new UsageException("Cannot specify both '-t <seconds>' and '-i <iterations>'");
481         else if (!options[TIME].doesOccur && !options[ITERATIONS].doesOccur)
482             throw new UsageException("Either '-t <seconds>' or '-i <iterations>' must be specified");
483         else if (options[ITERATIONS].doesOccur) {
484             try {
485                 iterations = Integer.parseInt(options[ITERATIONS].value);
486             } catch (NumberFormatException ex) {
487                 throw new UsageException("'-i <iterations>' requires an integer number of iterations");
488             }
489         } else { //if (options[TIME].doesOccur)
490             try {
491                 time = Integer.parseInt(options[TIME].value);
492             } catch (NumberFormatException ex) {
493                 throw new UsageException("'-r <seconds>' requires an integer number of seconds");
494             }
495         }
496 
497         if (!options[PASSES].doesOccur)
498             throw new UsageException("'-p <passes>' must be specified");
499         else
500             passes = Integer.parseInt(options[PASSES].value);
501 
502         if (options[LINE_MODE].doesOccur && options[BULK_MODE].doesOccur)
503             throw new UsageException("Cannot specify both '-l' (line mode) and '-b' (bulk mode)");
504 
505         if (options[THREADS].doesOccur) {
506             try {
507                 threads = Integer.parseInt(options[THREADS].value);
508             } catch (NumberFormatException ex) {
509                 throw new UsageException("'-r <threads>' requires an integer number of threads");
510             }
511             if (threads <= 0)
512                 throw new UsageException("'-r <threads>' requires an number of threads greater than 0");
513         }
514 
515         line_mode = options[LINE_MODE].doesOccur;
516         bulk_mode = options[BULK_MODE].doesOccur;
517         verbose   = options[VERBOSE].doesOccur;
518         uselen    = options[USELEN].doesOccur;
519         doPriorGC = options[GARBAGE_COLLECT].doesOccur;
520 
521         if (options[SOURCEDIR].doesOccur) sourceDir = options[SOURCEDIR].value;
522         if (options[ENCODING].doesOccur)  encoding  = options[ENCODING].value;
523         if (options[FILE_NAME].doesOccur) fileName  = options[FILE_NAME].value;
524         if (options[TEST_NAME].doesOccur) testName  = options[TEST_NAME].value;
525         if (options[LOCALE].doesOccur)    locale    = LocaleUtility.getLocaleFromName(options[LOCALE].value);
526 
527 
528         // build the test list
529         Set testList = new HashSet();
530         int i, j;
531         for (i = 0; i < remainingArgc; ++i) {
532             // is args[i] a method name?
533             if (testProvider.isTestCmd(args[i])) {
534                 testList.add(args[i]);
535             } else {
536                 // args[i] is neither a method name nor a number. Pass
537                 // everything from here on through to the subclass via
538                 // setup().
539                 break;
540             }
541         }
542 
543         // if no tests were specified, put all the tests in the test list
544         if (testList.size() == 0) {
545             Set testNames = testProvider.getAllTestCmdNames();
546             Iterator iter = testNames.iterator();
547             while (iter.hasNext())
548                 testList.add((String)iter.next());
549         }
550 
551         // pass remaining arguments, if any, through to the subclass via setup() method.
552         String[] subclassArgs = new String[remainingArgc - i];
553         for (j = 0; i < remainingArgc; j++)
554             subclassArgs[j] = args[i++];
555         setup(subclassArgs);
556 
557         // Put the heap in a consistent state
558         if (doPriorGC)
559             gc();
560 
561         return testList;
562     }
563 
564     /**
565      * Translate '-t time' to iterations (or just return '-i iteration')
566      *
567      * @param meth
568      * @param fn
569      * @return rt
570      */
getIteration(String methName, Function fn)571     private long getIteration(String methName, Function fn) throws InterruptedException {
572         long iter = 0;
573         if (time < 0) { // && iterations > 0
574             iter = iterations;
575         } else { // && iterations < 0
576             // Translate time to iteration
577             // Assuming there is a linear relation between time and iterations
578 
579             if (verbose) {
580                 System.out.println("= " + methName + " calibrating " + time
581                         + " seconds");
582             }
583 
584             long base = time * 1000;
585             // System.out.println("base :" + base);
586             long seed = 1;
587             long t = 0;
588             while (t < base * 0.9 || base * 1.1 < t) { // + - 10%
589                 if (iter == 0 || t == 0) {
590                     iter = seed; // start up from 1
591                     seed *= 100; // if the method is too fast (t == 0),
592                     // multiply 100 times
593                     // 100 is rational because 'base' is always larger than 1000
594                 } else {
595                     // If 't' is large enough, use linear function to calculate
596                     // new iteration
597                     //
598                     // new iter(base) old iter
599                     // -------------- = -------- = k
600                     // new time old time
601                     //
602                     // System.out.println("before guess t: " + t);
603                     // System.out.println("before guess iter: " + iter);
604                     iter = (long) ((double) iter / t * base); // avoid long
605                     // cut, eg. 1/10
606                     // == 0
607                     if (iter == 0) {
608                         throw new RuntimeException(
609                                 "Unable to converge on desired duration");
610                     }
611                 }
612                 t = performLoops(fn, iter);
613             }
614             // System.out.println("final t : " + t);
615             // System.out.println("final i : " + iter);
616         }
617         return iter;
618     }
619 
620 
performLoops(Function function, long loops)621     private long performLoops(Function function, long loops) throws InterruptedException {
622         function.init();
623         if (threads > 1) {
624             Thread[] threadList = new Thread[threads];
625             for (int i=0; i<threads; i++)
626                 threadList[i] = new Thread(new FunctionRunner(function, loops, i));
627 
628             long start = System.currentTimeMillis();
629             for (int i=0; i<threads; i++)
630                 threadList[i].start();
631             for (int i=0; i<threads; i++)
632                 threadList[i].join();
633             return System.currentTimeMillis() - start;
634 
635         } else {
636             return function.time(loops); // ms
637         }
638     }
639 
640 
641     /**
642      * Invoke the runtime's garbage collection procedure repeatedly until the
643      * amount of free memory stabilizes to within 10%.
644      */
gc()645     protected void gc() {
646         if (false) {
647             long last;
648             long free = 1;
649             Runtime runtime = Runtime.getRuntime();
650             do {
651                 runtime.gc();
652                 last = free;
653                 free = runtime.freeMemory();
654             } while (((double) Math.abs(free - last)) / free > 0.1);
655             // Wait for the change in free memory to drop under 10%
656             // between successive calls.
657         }
658 
659         // From "Java Platform Performance". This is the procedure
660         // recommended by Javasoft.
661         try {
662             System.gc();
663             Thread.sleep(100);
664             System.runFinalization();
665             Thread.sleep(100);
666 
667             System.gc();
668             Thread.sleep(100);
669             System.runFinalization();
670             Thread.sleep(100);
671         } catch (InterruptedException e) {
672         }
673     }
674 
675 
readToEOS(Reader reader)676     public static char[] readToEOS(Reader reader) {
677         ArrayList vec = new ArrayList();
678         int count = 0;
679         int pos = 0;
680         final int MAXLENGTH = 0x8000; // max buffer size - 32K
681         int length = 0x80; // start with small buffers and work up
682         do {
683             pos = 0;
684             length = length >= MAXLENGTH ? MAXLENGTH : length * 2;
685             char[] buffer = new char[length];
686             try {
687                 do {
688                     int n = reader.read(buffer, pos, length - pos);
689                     if (n == -1) {
690                         break;
691                     }
692                     pos += n;
693                 } while (pos < length);
694             }
695             catch (IOException e) {
696             }
697             vec.add(buffer);
698             count += pos;
699         } while (pos == length);
700 
701         char[] data = new char[count];
702         pos = 0;
703         for (int i = 0; i < vec.size(); ++i) {
704             char[] buf = (char[]) vec.get(i);
705             int len = Math.min(buf.length, count - pos);
706             System.arraycopy(buf, 0, data, pos, len);
707             pos += len;
708         }
709         return data;
710     }
readToEOS(InputStream stream)711     public static byte[] readToEOS(InputStream stream) {
712 
713         ArrayList vec = new ArrayList();
714         int count = 0;
715         int pos = 0;
716         final int MAXLENGTH = 0x8000; // max buffer size - 32K
717         int length = 0x80; // start with small buffers and work up
718         do {
719             pos = 0;
720             length = length >= MAXLENGTH ? MAXLENGTH : length * 2;
721             byte[] buffer = new byte[length];
722             try {
723                 do {
724                     int n = stream.read(buffer, pos, length - pos);
725                     if (n == -1) {
726                         break;
727                     }
728                     pos += n;
729                 } while (pos < length);
730             }
731             catch (IOException e) {
732             }
733             vec.add(buffer);
734             count += pos;
735         } while (pos == length);
736 
737 
738         byte[] data = new byte[count];
739         pos = 0;
740         for (int i = 0; i < vec.size(); ++i) {
741             byte[] buf = (byte[]) vec.get(i);
742             int len = Math.min(buf.length, count - pos);
743             System.arraycopy(buf, 0, data, pos, len);
744             pos += len;
745         }
746         return data;
747     }
748 
readLines(String filename, String srcEncoding, boolean bulkMode)749     protected String[] readLines(String filename, String srcEncoding, boolean bulkMode) {
750         FileInputStream fis = null;
751         InputStreamReader isr = null;
752         BufferedReader br = null;
753         try {
754             fis = new FileInputStream(filename);
755             isr = new InputStreamReader(fis, srcEncoding);
756             br = new BufferedReader(isr);
757         } catch (Exception e) {
758             System.err.println("Error: File access exception: " + e.getMessage() + "!");
759             System.exit(1);
760         }
761         ArrayList list = new ArrayList();
762         while (true) {
763             String line = null;
764             try {
765                 line = readDataLine(br);
766             } catch (Exception e) {
767                 System.err.println("Read File Error" + e.getMessage() + "!");
768                 System.exit(1);
769             }
770             if (line == null) break;
771             if (line.length() == 0) continue;
772             list.add(line);
773         }
774 
775         int size = list.size();
776         String[] lines = null;
777 
778         if (bulkMode) {
779             lines = new String[1];
780             StringBuffer buffer = new StringBuffer("");
781             for (int i = 0; i < size; ++i) {
782                 buffer.append((String) list.get(i));
783                 /*if (i < (size - 1)) {
784                     buffer.append("\r\n");
785                 }*/
786             }
787             lines[0] = buffer.toString();
788         } else {
789             lines = new String[size];
790             for (int i = 0; i < size; ++i) {
791                 lines[i] = (String) list.get(i);
792             }
793         }
794 
795         return lines;
796     }
797 
readDataLine(BufferedReader br)798     public String readDataLine(BufferedReader br) throws Exception {
799         String originalLine = "";
800         String line = "";
801         try {
802             line = originalLine = br.readLine();
803             if (line == null) return null;
804             if (line.length() > 0 && line.charAt(0) == 0xFEFF) line = line.substring(1);
805             int commentPos = line.indexOf('#');
806             if (commentPos >= 0) line = line.substring(0, commentPos);
807             line = line.trim();
808         } catch (Exception e) {
809             throw new Exception("Line \"{0}\",  \"{1}\"" + originalLine + " "
810                     + line + " " + e.toString());
811         }
812         return line;
813     }
814 
815 
816     public static class BOMFreeReader extends Reader {
817         InputStreamReader reader;
818         String encoding;
819         int MAX_BOM_LENGTH = 5;
820 
821         /**
822          * Creates a new reader, skipping a BOM associated with the given
823          * encoding. Equivalent to BOMFreeReader(in, null).
824          *
825          * @param in
826          *            The input stream.
827          * @throws IOException
828          *             Thrown if reading for a BOM causes an IOException.
829          */
BOMFreeReader(InputStream in)830         public BOMFreeReader(InputStream in) throws IOException {
831             this(in, null);
832         }
833 
834         /**
835          * Creates a new reader, skipping a BOM associated with the given
836          * encoding. If encoding is null, attempts to detect the encoding by the
837          * BOM.
838          *
839          * @param in
840          *            The input stream.
841          * @param encoding
842          *            The encoding to use. Can be null.
843          * @throws IOException
844          *             Thrown if reading for a BOM causes an IOException.
845          */
BOMFreeReader(InputStream in, String encoding)846         public BOMFreeReader(InputStream in, String encoding) throws IOException {
847             PushbackInputStream pushback = new PushbackInputStream(in, MAX_BOM_LENGTH);
848             this.encoding = encoding;
849 
850             byte[] start = new byte[MAX_BOM_LENGTH];
851             Arrays.fill(start, (byte)0xa5);
852 
853             int amountRead = pushback.read(start, 0, MAX_BOM_LENGTH);
854             int bomLength = detectBOMLength(start);
855             if (amountRead > bomLength)
856                 pushback.unread(start, bomLength, amountRead - bomLength);
857 
858             reader = (encoding == null) ? new InputStreamReader(pushback) : new InputStreamReader(pushback, encoding);
859         }
860 
861         /**
862          * Determines the length of a BOM in the beginning of start. Assumes
863          * start is at least a length 5 array. If encoding is null, the check
864          * will not be encoding specific and it will set the encoding of this
865          * BOMFreeReader.
866          *
867          * @param start
868          *            The starting bytes.
869          * @param encoding
870          *            The encoding. Can be null.
871          * @return The length of a detected BOM.
872          */
detectBOMLength(byte[] start)873         private int detectBOMLength(byte[] start) {
874             if ((encoding == null || "UTF-16BE".equals(encoding)) && start[0] == (byte) 0xFE && start[1] == (byte) 0xFF) {
875                 if (encoding == null) this.encoding = "UTF-16BE";
876                 return 2; // "UTF-16BE";
877             } else if (start[0] == (byte) 0xFF && start[1] == (byte) 0xFE) {
878                 if ((encoding == null || "UTF-32LE".equals(encoding)) && start[2] == (byte) 0x00
879                         && start[3] == (byte) 0x00) {
880                     if (encoding == null) this.encoding = "UTF-32LE";
881                     return 4; // "UTF-32LE";
882                 } else if ((encoding == null || "UTF-16LE".equals(encoding))) {
883                     if (encoding == null) this.encoding = "UTF-16LE";
884                     return 2; // "UTF-16LE";
885                 }
886             } else if ((encoding == null || "UTF-8".equals(encoding)) && start[0] == (byte) 0xEF
887                     && start[1] == (byte) 0xBB && start[2] == (byte) 0xBF) {
888                 if (encoding == null) this.encoding = "UTF-8";
889                 return 3; // "UTF-8";
890             } else if ((encoding == null || "UTF-32BE".equals(encoding)) && start[0] == (byte) 0x00
891                     && start[1] == (byte) 0x00 && start[2] == (byte) 0xFE && start[3] == (byte) 0xFF) {
892                 if (encoding == null) this.encoding = "UTF-32BE";
893                 return 4; // "UTF-32BE";
894             } else if ((encoding == null || "SCSU".equals(encoding)) && start[0] == (byte) 0x0E
895                     && start[1] == (byte) 0xFE && start[2] == (byte) 0xFF) {
896                 if (encoding == null) this.encoding = "SCSU";
897                 return 3; // "SCSU";
898             } else if ((encoding == null || "BOCU-1".equals(encoding)) && start[0] == (byte) 0xFB
899                     && start[1] == (byte) 0xEE && start[2] == (byte) 0x28) {
900                 if (encoding == null) this.encoding = "BOCU-1";
901                 return 3; // "BOCU-1";
902             } else if ((encoding == null || "UTF-7".equals(encoding)) && start[0] == (byte) 0x2B
903                     && start[1] == (byte) 0x2F && start[2] == (byte) 0x76) {
904                 if (start[3] == (byte) 0x38 && start[4] == (byte) 0x2D) {
905                     if (encoding == null) this.encoding = "UTF-7";
906                     return 5; // "UTF-7";
907                 } else if (start[3] == (byte) 0x38 || start[3] == (byte) 0x39 || start[3] == (byte) 0x2B
908                         || start[3] == (byte) 0x2F) {
909                     if (encoding == null) this.encoding = "UTF-7";
910                     return 4; // "UTF-7";
911                 }
912             } else if ((encoding == null || "UTF-EBCDIC".equals(encoding)) && start[0] == (byte) 0xDD
913                     && start[2] == (byte) 0x73 && start[2] == (byte) 0x66 && start[3] == (byte) 0x73) {
914                 if (encoding == null) this.encoding = "UTF-EBCDIC";
915                 return 4; // "UTF-EBCDIC";
916             }
917 
918             /* no known Unicode signature byte sequence recognized */
919             return 0;
920         }
921 
read(char[] cbuf, int off, int len)922         public int read(char[] cbuf, int off, int len) throws IOException {
923             return reader.read(cbuf, off, len);
924         }
925 
close()926         public void close() throws IOException {
927             reader.close();
928         }
929     }
930 }
931 
932 
933 
934 // eof
935