• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /**
4 *******************************************************************************
5 * Copyright (C) 2002-2007, International Business Machines Corporation and    *
6 * others. All Rights Reserved.                                                *
7 *******************************************************************************
8 */
9 
10 package com.ibm.icu.dev.test.perf;
11 
12 import java.io.BufferedReader;
13 import java.io.FileInputStream;
14 import java.io.IOException;
15 import java.io.InputStreamReader;
16 import java.util.ArrayList;
17 import java.util.Comparator;
18 
19 import com.ibm.icu.impl.LocaleUtility;
20 import com.ibm.icu.text.CollationElementIterator;
21 import com.ibm.icu.text.CollationKey;
22 import com.ibm.icu.text.Normalizer;
23 import com.ibm.icu.text.NumberFormat;
24 import com.ibm.icu.text.RuleBasedCollator;
25 
26 public class CollationPerformanceTest {
27     static final String usageString =
28         "usage:  collperf options...\n"
29         + "-help                      Display this message.\n"
30         + "-file file_name            utf-16 format file of names.\n"
31         + "-locale name               ICU locale to use.  Default is en_US\n"
32         + "-rules file_name           Collation rules file (overrides locale)\n"
33         //+ "-langid 0x1234             Windows Language ID number.  Default to value for -locale option\n"
34         //+ "                              see http://msdn.microsoft.com/library/psdk/winbase/nls_8xo3.htm\n"
35         //+ "-win                       Run test using Windows native services.  (ICU is default)\n"
36         //+ "-unix                      Run test using Unix strxfrm, strcoll services.\n"
37         //+ "-uselen                    Use API with string lengths.  Default is null-terminated strings\n"
38         + "-usekeys                   Run tests using sortkeys rather than strcoll\n"
39         + "-strcmp                    Run tests using u_strcmp rather than strcoll\n"
40         + "-strcmpCPO                 Run tests using u_strcmpCodePointOrder rather than strcoll\n"
41         + "-loop nnnn                 Loopcount for test.  Adjust for reasonable total running time.\n"
42         + "-iloop n                   Inner Loop Count.  Default = 1.  Number of calls to function\n"
43         + "                               under test at each call point.  For measuring test overhead.\n"
44         + "-terse                     Terse numbers-only output.  Intended for use by scripts.\n"
45         + "-french                    French accent ordering\n"
46         + "-frenchoff                 No French accent ordering (for use with French locales.)\n"
47         + "-norm                      Normalizing mode on\n"
48         + "-shifted                   Shifted mode\n"
49         + "-lower                     Lower case first\n"
50         + "-upper                     Upper case first\n"
51         + "-case                      Enable separate case level\n"
52         + "-level n                   Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n"
53         + "-keyhist                   Produce a table sort key size vs. string length\n"
54         + "-binsearch                 Binary Search timing test\n"
55         + "-keygen                    Sort Key Generation timing test\n"
56         + "-qsort                     Quicksort timing test\n"
57         + "-iter                      Iteration Performance Test\n"
58         + "-dump                      Display strings, sort keys and CEs.\n"
59         + "-java                      Run test using java.text.Collator.\n";
60 
61     //enum {FLAG, NUM, STRING} type;
62     static StringBuffer temp_opt_fName      = new StringBuffer("");
63     static StringBuffer temp_opt_locale     = new StringBuffer("en_US");
64     //static StringBuffer temp_opt_langid     = new StringBuffer("0");         // Defaults to value corresponding to opt_locale.
65     static StringBuffer temp_opt_rules      = new StringBuffer("");
66     static StringBuffer temp_opt_help       = new StringBuffer("");
67     static StringBuffer temp_opt_loopCount  = new StringBuffer("1");
68     static StringBuffer temp_opt_iLoopCount = new StringBuffer("1");
69     static StringBuffer temp_opt_terse      = new StringBuffer("false");
70     static StringBuffer temp_opt_qsort      = new StringBuffer("");
71     static StringBuffer temp_opt_binsearch  = new StringBuffer("");
72     static StringBuffer temp_opt_icu        = new StringBuffer("true");
73     //static StringBuffer opt_win        = new StringBuffer("");      // Run with Windows native functions.
74     //static StringBuffer opt_unix       = new StringBuffer("");      // Run with UNIX strcoll, strxfrm functions.
75     //static StringBuffer opt_uselen     = new StringBuffer("");
76     static StringBuffer temp_opt_usekeys    = new StringBuffer("");
77     static StringBuffer temp_opt_strcmp     = new StringBuffer("");
78     static StringBuffer temp_opt_strcmpCPO  = new StringBuffer("");
79     static StringBuffer temp_opt_norm       = new StringBuffer("");
80     static StringBuffer temp_opt_keygen     = new StringBuffer("");
81     static StringBuffer temp_opt_french     = new StringBuffer("");
82     static StringBuffer temp_opt_frenchoff  = new StringBuffer("");
83     static StringBuffer temp_opt_shifted    = new StringBuffer("");
84     static StringBuffer temp_opt_lower      = new StringBuffer("");
85     static StringBuffer temp_opt_upper      = new StringBuffer("");
86     static StringBuffer temp_opt_case       = new StringBuffer("");
87     static StringBuffer temp_opt_level      = new StringBuffer("0");
88     static StringBuffer temp_opt_keyhist    = new StringBuffer("");
89     static StringBuffer temp_opt_itertest   = new StringBuffer("");
90     static StringBuffer temp_opt_dump       = new StringBuffer("");
91     static StringBuffer temp_opt_java       = new StringBuffer("");
92 
93 
94     static String   opt_fName      = "";
95     static String   opt_locale     = "en_US";
96     //static int      opt_langid     = 0;         // Defaults to value corresponding to opt_locale.
97     static String   opt_rules      = "";
98     static boolean  opt_help       = false;
99     static int      opt_loopCount  = 1;
100     static int      opt_iLoopCount = 1;
101     static boolean  opt_terse      = false;
102     static boolean  opt_qsort      = false;
103     static boolean  opt_binsearch  = false;
104     static boolean  opt_icu        = true;
105     //static boolean  opt_win        = false;      // Run with Windows native functions.
106     //static boolean  opt_unix       = false;      // Run with UNIX strcoll, strxfrm functions.
107     //static boolean  opt_uselen     = false;
108     static boolean  opt_usekeys    = false;
109     static boolean  opt_strcmp     = false;
110     static boolean  opt_strcmpCPO  = false;
111     static boolean  opt_norm       = false;
112     static boolean  opt_keygen     = false;
113     static boolean  opt_french     = false;
114     static boolean  opt_frenchoff  = false;
115     static boolean  opt_shifted    = false;
116     static boolean  opt_lower      = false;
117     static boolean  opt_upper      = false;
118     static boolean  opt_case       = false;
119     static int      opt_level      = 0;
120     static boolean  opt_keyhist    = false;
121     static boolean  opt_itertest   = false;
122     static boolean  opt_dump       = false;
123     static boolean  opt_java       = false;
124 
125     static OptionSpec[] options = {
126         new OptionSpec("-file", 2, temp_opt_fName),
127         new OptionSpec("-locale", 2, temp_opt_locale),
128         //new OptionSpec("-langid", 1, temp_opt_langid),
129         new OptionSpec("-rules", 2, temp_opt_rules),
130         new OptionSpec("-qsort", 0, temp_opt_qsort),
131         new OptionSpec("-binsearch", 0, temp_opt_binsearch),
132         new OptionSpec("-iter", 0, temp_opt_itertest),
133         //new OptionSpec("-win", 0, temp_opt_win),
134         //new OptionSpec("-unix", 0, temp_opt_unix),
135         //new OptionSpec("-uselen", 0, temp_opt_uselen),
136         new OptionSpec("-usekeys", 0, temp_opt_usekeys),
137         new OptionSpec("-strcmp", 0, temp_opt_strcmp),
138         new OptionSpec("-strcmpCPO", 0, temp_opt_strcmpCPO),
139         new OptionSpec("-norm", 0, temp_opt_norm),
140         new OptionSpec("-french", 0, temp_opt_french),
141         new OptionSpec("-frenchoff", 0, temp_opt_frenchoff),
142         new OptionSpec("-shifted", 0, temp_opt_shifted),
143         new OptionSpec("-lower", 0, temp_opt_lower),
144         new OptionSpec("-upper", 0, temp_opt_upper),
145         new OptionSpec("-case", 0, temp_opt_case),
146         new OptionSpec("-level", 1, temp_opt_level),
147         new OptionSpec("-keyhist", 0, temp_opt_keyhist),
148         new OptionSpec("-keygen", 0, temp_opt_keygen),
149         new OptionSpec("-loop", 1, temp_opt_loopCount),
150         new OptionSpec("-iloop", 1, temp_opt_iLoopCount),
151         new OptionSpec("-terse", 0, temp_opt_terse),
152         new OptionSpec("-dump", 0, temp_opt_dump),
153         new OptionSpec("-help", 0, temp_opt_help),
154         new OptionSpec("-?", 0, temp_opt_help),
155         new OptionSpec("-java", 0, temp_opt_java),
156     };
157 
158     static java.text.Collator javaCol = null;
159     static com.ibm.icu.text.Collator icuCol = null;
160     static NumberFormat nf = null;
161     static NumberFormat percent = null;
162     ArrayList list = null;
163     String[] tests = null;
164     int globalCount = 0;
165 
main(String[] args)166     public static void main(String[] args) {
167         CollationPerformanceTest collPerf = new CollationPerformanceTest();
168         if ( !CollationPerformanceTest.processOptions(args) || opt_help || opt_fName.length()==0) {
169             System.out.println(usageString);
170             System.exit(1);
171         }
172 
173         nf = NumberFormat.getInstance();
174         nf.setMaximumFractionDigits(2);
175         percent = NumberFormat.getPercentInstance();
176 
177         collPerf.setOptions();
178         collPerf.readDataLines();
179 
180         if (opt_dump) {
181             collPerf.doDump();
182         }
183 
184         if (opt_qsort) {
185             collPerf.doQSort();
186         }
187 
188         if (opt_binsearch) {
189             collPerf.doBinarySearch();
190         }
191 
192         if (opt_keygen) {
193             collPerf.doKeyGen();
194         }
195 
196         if (opt_keyhist) {
197             collPerf.doKeyHist();
198         }
199 
200         if (opt_itertest) {
201             collPerf.doIterTest();
202         }
203 
204     }
205 
206     //Dump file lines, CEs, Sort Keys if requested
doDump()207     void doDump() {
208         for(int i = 0; i < list.size(); i++) {
209             //print the line
210             String line = com.ibm.icu.impl.Utility.escape((String)list.get(i));
211             System.out.println(line);
212 
213             System.out.print("  CEs:  ");
214             CollationElementIterator CEiter = ((com.ibm.icu.text.RuleBasedCollator)icuCol).getCollationElementIterator(line);
215             int ce;
216             int j = 0;
217             for(;;) {
218                 ce = CEiter.next();
219                 if (ce == CollationElementIterator.NULLORDER) {
220                     break;
221                 }
222                 //System.out.print();
223                 String outStr = Integer.toHexString(ce);
224                 for (int len = 0; len < 8 - outStr.length(); len++) {
225                     outStr ='0' + outStr;
226                 }
227                 System.out.print(outStr + "  ");
228                 if(++j >8) {
229                     System.out.print("\n        ");
230                     j = 0;
231                 }
232             }
233 
234             System.out.print("\n   ICU Sort Key: ");
235             CollationKey ck = ((com.ibm.icu.text.RuleBasedCollator)icuCol).getCollationKey(line);
236             byte[] cks = ck.toByteArray();
237             j = 0;
238             for(int k = 0; k < cks.length; k++) {
239                 String outStr = Integer.toHexString(cks[k]);
240                 switch (outStr.length()) {
241                 case 1:     outStr = '0' + outStr;
242                             break;
243                 case 8:     outStr = outStr.substring(6);
244                             break;
245                 }
246                 System.out.print(outStr);
247                 System.out.print("  ");
248                 j++;
249                 if(j > 0 && j % 20 == 0) {
250                     System.out.print("\n                 ");
251                 }
252             }
253             System.out.println("\n");
254         }
255     }
256 
257     /**---------------------------------------------------------------------------------------
258      *
259      *   doQSort()    The quick sort timing test.
260      *
261      *---------------------------------------------------------------------------------------
262      */
doQSort()263     void doQSort() {
264         callGC();
265         //String[] sortTests = (String[]) tests.clone();
266         //Adjust loop count to compensate for file size. QSort should be nlog(n)
267         double dLoopCount = opt_loopCount * 3000 / ((Math.log(tests.length) / Math.log(10)* tests.length));
268 
269         if(opt_usekeys) {
270             dLoopCount *= 5;
271         }
272 
273         int adj_loopCount = (int)dLoopCount;
274         if(adj_loopCount < 1) {
275             adj_loopCount = 1;
276         }
277 
278         globalCount = 0;
279         long startTime = 0;
280         long endTime = 0;
281         if (opt_icu && opt_usekeys) {
282             startTime = System.currentTimeMillis();
283             qSortImpl_icu_usekeys(tests, 0, tests.length -1, icuCol);
284             endTime = System.currentTimeMillis();
285         }
286         if (opt_icu && !opt_usekeys){
287             startTime = System.currentTimeMillis();
288             qSortImpl_nokeys(tests, 0, tests.length -1, icuCol);
289             endTime = System.currentTimeMillis();
290         }
291         if (opt_java && opt_usekeys) {
292             startTime = System.currentTimeMillis();
293             qSortImpl_java_usekeys(tests, 0, tests.length -1, javaCol);
294             endTime = System.currentTimeMillis();
295         }
296         if (opt_java && !opt_usekeys){
297             startTime = System.currentTimeMillis();
298             qSortImpl_nokeys(tests, 0, tests.length -1, javaCol);
299             endTime = System.currentTimeMillis();
300         }
301         long elapsedTime = endTime - startTime;
302         int ns = (int)(1000000 * elapsedTime / (globalCount + 0.0));
303         if (!opt_terse) {
304             System.out.println("qsort:  total # of string compares = " + globalCount);
305             System.out.println("qsort:  time per compare = " + ns);
306         } else {
307             System.out.println(ns);
308         }
309     }
310 
311     /**---------------------------------------------------------------------------------------
312      *
313      *    doBinarySearch()    Binary Search timing test.  Each name from the list
314      *                        is looked up in the full sorted list of names.
315      *
316      *---------------------------------------------------------------------------------------
317      */
doBinarySearch()318     void doBinarySearch() {
319         callGC();
320         int gCount = 0;
321         int loops = 0;
322         double dLoopCount = opt_loopCount * 3000 / (Math.log(tests.length) / Math.log(10)* tests.length);
323         long startTime = 0;
324         long elapsedTime = 0;
325 
326         if(opt_usekeys) {
327             dLoopCount *= 5;
328         }
329         int adj_loopCount = (int)dLoopCount;
330         if(adj_loopCount < 1) {
331             adj_loopCount = 1;
332         }
333 
334         //int opt2 = 0;
335 
336         for(;;) {   //not really a loop, just allows "break" to work, to simplify
337                     //inadvertently running more than one test through here
338             if(opt_strcmp) {
339                 int r = 0;
340                 startTime = System.currentTimeMillis();
341                 for(loops = 0; loops < adj_loopCount; loops++) {
342                     for (int j = 0; j < tests.length; j++) {
343                         int hi = tests.length-1;
344                         int lo = 0;
345                         int guess = -1;
346                         for(;;) {
347                             int newGuess = (hi + lo) / 2;
348                             if(newGuess == guess){
349                                 break;
350                             }
351                             guess = newGuess;
352                             r = tests[j].compareTo(tests[guess]);
353                             gCount++;
354                             if(r == 0) {
355                                 break;
356                             }
357                             if (r < 0) {
358                                 hi = guess;
359                             } else {
360                                 lo = guess;
361                             }
362                         }
363                     }
364                 }
365                 elapsedTime = System.currentTimeMillis() - startTime;
366                 break;
367             }
368 
369             if (opt_strcmpCPO) {
370                 int r = 0;
371                 startTime = System.currentTimeMillis();
372                 for(loops = 0; loops < adj_loopCount; loops++) {
373                     for (int j = 0; j < tests.length; j++) {
374                         int hi = tests.length-1;
375                         int lo = 0;
376                         int guess = -1;
377                         for(;;) {
378                             int newGuess = (hi + lo) / 2;
379                             if(newGuess == guess){
380                                 break;
381                             }
382                             guess = newGuess;
383                             r = com.ibm.icu.text.Normalizer.compare(tests[j], tests[guess], Normalizer.COMPARE_CODE_POINT_ORDER);
384                             gCount++;
385                             if(r == 0) {
386                                 break;
387                             }
388                             if (r < 0) {
389                                 hi = guess;
390                             } else {
391                                 lo = guess;
392                             }
393                         }
394                     }
395                 }
396                 elapsedTime = System.currentTimeMillis() - startTime;
397                 break;
398             }
399 
400             if (opt_icu) {
401 
402                 int r = 0;
403                 startTime = System.currentTimeMillis();
404                 for (loops = 0; loops < adj_loopCount; loops++) {
405                     for (int j = 0; j < tests.length; j++) {
406                         int hi = tests.length - 1;
407                         int lo = 0;
408                         int guess = -1;
409                         for (;;) {
410                             int newGuess = (hi + lo) / 2;
411                             if (newGuess == guess) {
412                                 break;
413                             }
414                             guess = newGuess;
415                             if (opt_usekeys) {
416                                 com.ibm.icu.text.CollationKey sortKey1 = icuCol.getCollationKey(tests[j]);
417                                 com.ibm.icu.text.CollationKey sortKey2 = icuCol.getCollationKey(tests[guess]);
418                                 r = sortKey1.compareTo(sortKey2);
419                                 gCount ++;
420                             } else {
421                                 r = icuCol.compare(tests[j], tests[guess]);
422                                 gCount++;
423                             }
424                             if (r == 0) {
425                                 break;
426                             }
427                             if (r < 0) {
428                                 hi = guess;
429                             } else {
430                                 lo = guess;
431                             }
432                         }
433                     }
434                 }
435                 elapsedTime = System.currentTimeMillis() - startTime;
436                 break;
437             }
438             if (opt_java) {
439 
440                 int r = 0;
441                 startTime = System.currentTimeMillis();
442                 for (loops = 0; loops < adj_loopCount; loops++) {
443                     for (int j = 0; j < tests.length; j++) {
444                         int hi = tests.length - 1;
445                         int lo = 0;
446                         int guess = -1;
447                         for (;;) {
448                             int newGuess = (hi + lo) / 2;
449                             if (newGuess == guess) {
450                                 break;
451                             }
452                             guess = newGuess;
453                             if (opt_usekeys) {
454                                 java.text.CollationKey sortKey1 = javaCol.getCollationKey(tests[j]);
455                                 java.text.CollationKey sortKey2 = javaCol.getCollationKey(tests[guess]);
456                                 r = sortKey1.compareTo(sortKey2);
457                                 gCount ++;
458                             } else {
459                                 r = javaCol.compare(tests[j], tests[guess]);
460                                 gCount++;
461                             }
462                             if (r == 0) {
463                                 break;
464                             }
465                             if (r < 0) {
466                                 hi = guess;
467                             } else {
468                                 lo = guess;
469                             }
470                         }
471                     }
472                 }
473                 elapsedTime = System.currentTimeMillis() - startTime;
474                 break;
475             }
476             break;
477         }
478         int ns = (int)((float)(1000000) * (float)elapsedTime / (float)gCount);
479         if (!opt_terse) {
480             System.out.println("binary search:  total # of string compares = " + gCount);
481             System.out.println("binary search:  compares per loop = " + gCount / loops);
482             System.out.println("binary search:  time per compare = " + ns);
483         } else {
484             System.out.println(ns);
485         }
486     }
487 
488     /**---------------------------------------------------------------------------------------
489      *
490      *   doKeyGen()     Key Generation Timing Test
491      *
492      *---------------------------------------------------------------------------------------
493      */
doKeyGen()494     void doKeyGen() {
495         callGC();
496 
497         // Adjust loop count to compensate for file size.   Should be order n
498         double dLoopCount = opt_loopCount * (1000.0 /  (double)list.size());
499         int adj_loopCount = (int)dLoopCount;
500         if (adj_loopCount < 1) adj_loopCount = 1;
501 
502         long startTime = 0;
503         long totalKeyLen = 0;
504         long totalChars = 0;
505         if (opt_java) {
506             startTime = System.currentTimeMillis();
507             for (int loops=0; loops<adj_loopCount; loops++) {
508                 for (int line=0; line < tests.length; line++) {
509                     for (int iLoop=0; iLoop < opt_iLoopCount; iLoop++) {
510                         totalChars += tests[line].length();
511                         byte[] sortKey = javaCol.getCollationKey(tests[line]).toByteArray();
512                         totalKeyLen += sortKey.length;
513                     }
514                 }
515             }
516         } else {
517             startTime = System.currentTimeMillis();
518             for (int loops=0; loops<adj_loopCount; loops++) {
519                 for (int line=0; line < tests.length; line++) {
520                     for (int iLoop=0; iLoop < opt_iLoopCount; iLoop++) {
521                         totalChars += tests[line].length();
522                         byte[] sortKey = icuCol.getCollationKey(tests[line]).toByteArray();
523                         totalKeyLen += sortKey.length;
524                     }
525                 }
526             }
527         }
528 
529         long elapsedTime = System.currentTimeMillis() - startTime;
530         long ns = (long)(1000000 * elapsedTime / (adj_loopCount * tests.length + 0.0));
531         if (!opt_terse) {
532             System.out.println("Sort Key Generation:  total # of keys =" + adj_loopCount * tests.length);
533             System.out.println("Sort Key Generation:  time per key = " + ns + " ns");
534             System.out.println("Key Length / character = " + nf.format(totalKeyLen / (totalChars + 0.0)));
535         }
536         else {
537             System.out.print(ns + ",  ");
538             System.out.println(nf.format(totalKeyLen / (totalChars + 0.0)) + ", ");
539         }
540     }
541 
542     /**---------------------------------------------------------------------------------------
543      *
544      *    doKeyHist()       Output a table of data for average sort key size vs. string length.
545      *
546      *---------------------------------------------------------------------------------------
547      */
doKeyHist()548     void doKeyHist() {
549         callGC();
550         int     maxLen = 0;
551 
552         // Find the maximum string length
553         for (int i = 0; i < tests.length; i++) {
554             if (tests[i].length() > maxLen) maxLen = tests[i].length();
555         }
556 
557         int[] accumulatedLen  = new int[maxLen + 1];
558         int[] numKeysOfSize   = new int[maxLen + 1];
559 
560         // Fill the arrays...
561         for (int i = 0; i < tests.length; i++) {
562             int len = tests[i].length();
563             accumulatedLen[len] += icuCol.getCollationKey(tests[i]).toByteArray().length;
564             numKeysOfSize[len]  += 1;
565         }
566 
567         // And write out averages
568         System.out.println("String Length,  Avg Key Length,  Avg Key Len per char");
569         for (int i = 1; i <= maxLen; i++) {
570             if (numKeysOfSize[i] > 0) {
571                 System.out.println(i + ", " + nf.format(accumulatedLen[i] / (numKeysOfSize[i]+ 0.0)) + ", "
572                     + nf.format(accumulatedLen[i] / (numKeysOfSize[i] * i + 0.0)));
573             }
574         }
575 
576     }
577 
doForwardIterTest()578     void doForwardIterTest() {
579         callGC();
580         System.out.print("\n\nPerforming forward iteration performance test with ");
581         System.out.println("performance test on strings from file -----------");
582 
583         CollationElementIterator iter = ((RuleBasedCollator)icuCol).getCollationElementIterator("");
584 
585         int gCount = 0;
586         int count = 0;
587         long startTime = System.currentTimeMillis();
588         while (count < opt_loopCount) {
589             int linecount = 0;
590             while (linecount < tests.length) {
591                 String str = tests[linecount];
592                 iter.setText(str);
593                 while (iter.next() != CollationElementIterator.NULLORDER) {
594                     gCount++;
595                 }
596                 linecount ++;
597             }
598             count ++;
599         }
600 
601         long elapsedTime = System.currentTimeMillis() - startTime;
602         System.out.println("elapsedTime " + elapsedTime + " ms");
603 
604         // empty loop recalculation
605         count = 0;
606         startTime = System.currentTimeMillis();
607         while (count < opt_loopCount) {
608             int linecount = 0;
609             while (linecount < tests.length) {
610                 String str = tests[linecount];
611                 iter.setText(str);
612                 linecount ++;
613             }
614             count ++;
615         }
616         elapsedTime -= (System.currentTimeMillis() - startTime);
617         System.out.println("elapsedTime " + elapsedTime + " ms");
618 
619         int ns = (int)(1000000 * elapsedTime / (gCount + 0.0));
620         System.out.println("Total number of strings compared " + tests.length
621                             + "in " + opt_loopCount + " loops");
622         System.out.println("Average time per CollationElementIterator.next() nano seconds " + ns);
623         System.out.println("performance test on skipped-5 concatenated strings from file -----------");
624 
625         String totalStr = "";
626         int    strlen = 0;
627         // appending all the strings
628         int linecount = 0;
629         while (linecount < tests.length) {
630             totalStr += tests[linecount];
631             strlen += tests[linecount].length();
632             linecount ++;
633         }
634         System.out.println("Total size of strings " + strlen);
635 
636         gCount = 0;
637         count  = 0;
638         iter = ((RuleBasedCollator)icuCol).getCollationElementIterator(totalStr);
639         strlen -= 5; // any left over characters are not iterated,
640                      // this is to ensure the backwards and forwards iterators
641                      // gets the same position
642         int strindex = 0;
643         startTime = System.currentTimeMillis();
644         while (count < opt_loopCount) {
645             int count5 = 5;
646             strindex = 0;
647             iter.setOffset(strindex);
648             while (true) {
649                 if (iter.next() == CollationElementIterator.NULLORDER) {
650                     break;
651                 }
652                 gCount++;
653                 count5 --;
654                 if (count5 == 0) {
655                     strindex += 10;
656                     if (strindex > strlen) {
657                         break;
658                     }
659                     iter.setOffset(strindex);
660                     count5 = 5;
661                 }
662             }
663             count ++;
664         }
665 
666         elapsedTime = System.currentTimeMillis() - startTime;
667         System.out.println("elapsedTime " + elapsedTime);
668 
669         // empty loop recalculation
670         int tempgCount = 0;
671         count = 0;
672         startTime = System.currentTimeMillis();
673         while (count < opt_loopCount) {
674             int count5 = 5;
675             strindex = 0;
676             iter.setOffset(strindex);
677             while (true) {
678                 tempgCount ++;
679                 count5 --;
680                 if (count5 == 0) {
681                     strindex += 10;
682                     if (strindex > strlen) {
683                         break;
684                     }
685                     iter.setOffset(strindex);
686                     count5 = 5;
687                 }
688             }
689             count ++;
690         }
691         elapsedTime -= (System.currentTimeMillis() - startTime);
692         System.out.println("elapsedTime " + elapsedTime);
693 
694         System.out.println("gCount " + gCount);
695         ns = (int)(1000000 * elapsedTime / (gCount + 0.0));
696         System.out.println("Average time per CollationElementIterator.next() nano seconds " + ns);
697     }
698 
doBackwardIterTest()699     void doBackwardIterTest() {
700         System.out.print("\n\nPerforming backward iteration performance test with ");
701         System.out.println("performance test on strings from file -----------\n");
702 
703         CollationElementIterator iter = ((RuleBasedCollator)icuCol).getCollationElementIterator("");
704 
705         int gCount = 0;
706         int count = 0;
707         long startTime = System.currentTimeMillis();
708         while (count < opt_loopCount) {
709             int linecount = 0;
710             while (linecount < tests.length) {
711                 String str = tests[linecount];
712                 iter.setText(str);
713                 while (iter.previous() != CollationElementIterator.NULLORDER) {
714                     gCount++;
715                 }
716                 linecount ++;
717             }
718             count ++;
719         }
720         long elapsedTime = System.currentTimeMillis() - startTime;
721         System.out.println("elapsedTime " + elapsedTime + " ms");
722 
723         // empty loop recalculation
724         count = 0;
725         startTime = System.currentTimeMillis();
726         while (count < opt_loopCount) {
727             int linecount = 0;
728             while (linecount < tests.length) {
729                 String str = tests[linecount];
730                 iter.setText(str);
731                 linecount ++;
732             }
733             count ++;
734         }
735         elapsedTime -= (System.currentTimeMillis() - startTime);
736         System.out.println("elapsedTime " + elapsedTime + " ms");
737 
738         int ns = (int)(1000000 * elapsedTime / (gCount + 0.0));
739         System.out.println("Total number of strings compared " + tests.length
740                             + "in " + opt_loopCount + " loops");
741         System.out.println("Average time per CollationElementIterator.previous() nano seconds " + ns);
742         System.out.println("performance test on skipped-5 concatenated strings from file -----------");
743 
744         String totalStr = "";
745         int    strlen = 0;
746         // appending all the strings
747         int linecount = 0;
748         while (linecount < tests.length) {
749             totalStr += tests[linecount];
750             strlen += tests[linecount].length();
751             linecount ++;
752         }
753         System.out.println("Total size of strings " + strlen);
754 
755         gCount = 0;
756         count  = 0;
757 
758         iter = ((RuleBasedCollator)icuCol).getCollationElementIterator(totalStr);
759         int strindex = 0;
760         startTime = System.currentTimeMillis();
761         while (count < opt_loopCount) {
762             int count5 = 5;
763             strindex = 5;
764             iter.setOffset(strindex);
765             while (true) {
766                 if (iter.previous() == CollationElementIterator.NULLORDER) {
767                     break;
768                 }
769                  gCount ++;
770                  count5 --;
771                  if (count5 == 0) {
772                      strindex += 10;
773                      if (strindex > strlen) {
774                         break;
775                      }
776                      iter.setOffset(strindex);
777                      count5 = 5;
778                  }
779             }
780             count ++;
781         }
782 
783         elapsedTime = System.currentTimeMillis() - startTime;
784         System.out.println("elapsedTime " + elapsedTime);
785 
786         // empty loop recalculation
787         count = 0;
788         int tempgCount = 0;
789         startTime = System.currentTimeMillis();
790         while (count < opt_loopCount) {
791             int count5 = 5;
792             strindex = 5;
793             iter.setOffset(strindex);
794             while (true) {
795                  tempgCount ++;
796                  count5 --;
797                  if (count5 == 0) {
798                      strindex += 10;
799                      if (strindex > strlen) {
800                         break;
801                      }
802                      iter.setOffset(strindex);
803                      count5 = 5;
804                  }
805             }
806             count ++;
807         }
808         elapsedTime -= (System.currentTimeMillis() - startTime);
809         System.out.println("elapsedTime " + elapsedTime);
810 
811         System.out.println("gCount " + gCount);
812         ns = (int)(1000000 * elapsedTime / (gCount + 0.0));
813         System.out.println("Average time per CollationElementIterator.previous() nano seconds " + ns);
814     }
815 
816 
817     /**---------------------------------------------------------------------------------------
818      *
819      *    doIterTest()       Iteration test
820      *
821      *---------------------------------------------------------------------------------------
822      */
doIterTest()823     void doIterTest() {
824         doForwardIterTest();
825         doBackwardIterTest();
826     }
827 
setOptions()828     void setOptions() {
829 
830         if (opt_java) {
831             opt_icu = false;
832         }
833 
834         if (opt_rules.length() != 0) {
835             try {
836                 icuCol = new com.ibm.icu.text.RuleBasedCollator(getCollationRules(opt_rules));
837             } catch (Exception e) {
838                 System.out.println("Cannot open rules:" + e.getMessage());
839                 System.exit(1);
840             }
841         } else {
842             icuCol = com.ibm.icu.text.Collator.getInstance(
843                                 LocaleUtility.getLocaleFromName(opt_locale));
844         }
845 
846         javaCol = java.text.Collator.getInstance(
847                                 LocaleUtility.getLocaleFromName(opt_locale));
848 
849         if (opt_norm) {
850             javaCol.setDecomposition(java.text.Collator.CANONICAL_DECOMPOSITION);
851             icuCol.setDecomposition(com.ibm.icu.text.Collator.CANONICAL_DECOMPOSITION);
852         }
853 
854         if (opt_french && opt_frenchoff) {
855             System.err.println("Error: specified both -french and -frenchoff options.");
856         }
857 
858         if (opt_french) {
859             ((com.ibm.icu.text.RuleBasedCollator)icuCol).setFrenchCollation(true);
860         }
861         if (opt_frenchoff) {
862             ((com.ibm.icu.text.RuleBasedCollator)icuCol).setFrenchCollation(false);
863         }
864 
865         if (opt_lower) {
866             ((com.ibm.icu.text.RuleBasedCollator)icuCol).setLowerCaseFirst(true);
867         }
868 
869         if (opt_upper) {
870             ((com.ibm.icu.text.RuleBasedCollator)icuCol).setUpperCaseFirst(true);
871         }
872 
873         if (opt_shifted) {
874             ((com.ibm.icu.text.RuleBasedCollator)icuCol).setAlternateHandlingShifted(true);
875         }
876 
877         if (opt_level != 0) {
878             switch (opt_level) {
879                 case 1 :
880                         javaCol.setStrength(java.text.Collator.PRIMARY);
881                         icuCol.setStrength(com.ibm.icu.text.Collator.PRIMARY);
882                         break;
883                 case 2 :
884                         javaCol.setStrength(java.text.Collator.SECONDARY);
885                         icuCol.setStrength(com.ibm.icu.text.Collator.SECONDARY);
886                         break;
887                 case 3 :
888                         javaCol.setStrength(java.text.Collator.TERTIARY);
889                         icuCol.setStrength(com.ibm.icu.text.Collator.TERTIARY);
890                         break;
891                 case 4 :
892                         icuCol.setStrength(com.ibm.icu.text.Collator.QUATERNARY);
893                         break;
894                 case 5 :
895                         javaCol.setStrength(java.text.Collator.IDENTICAL);
896                         icuCol.setStrength(com.ibm.icu.text.Collator.IDENTICAL);
897                         break;
898                 default:
899                     System.err.println("-level param must be between 1 and 5\n");
900                     System.exit(1);
901             }
902         }
903         // load classes at least once before starting
904         javaCol.compare("a", "b");
905         icuCol.compare("a", "b");
906     }
907 
processOptions(String[] args)908     static boolean processOptions(String[] args) {
909         int argNum;
910         for (argNum =0; argNum < args.length; argNum++) {
911             for (int i = 0; i < options.length; i++) {
912                 if (args[argNum].equalsIgnoreCase(options[i].name)) {
913                     switch (options[i].type) {
914                         case 0:
915                                 options[i].value.delete(0, options[i].value.capacity()).append("true");
916                                 break;
917                         case 1:
918                                 argNum++;
919                                 if ((argNum >= args.length) || (args[argNum].charAt(0)=='-')) {
920                                     System.err.println("value expected for"+ options[i].name +"option.\n");
921                                     return false;
922                                 }
923                                 try {
924                                    /* int value =*/ Integer.parseInt(args[argNum]);
925                                     options[i].value.delete(0, options[i].value.capacity()).append(args[argNum]);
926                                 } catch (NumberFormatException e) {
927                                     System.err.println("Expected: a number value");
928                                     return false;
929                                 }
930                                 break;
931                         case 2:
932                                 argNum++;
933                                 if ((argNum >= args.length) || (args[argNum].charAt(0)=='-')) {
934                                     System.err.println("value expected for"+ options[i].name +"option.\n");
935                                     return false;
936                                 }
937                                 options[i].value.delete(0, options[i].value.capacity()).append(args[argNum]);
938                                 break;
939                         default:
940                                 System.err.println("Option type error: {FLAG=0, NUM=1, STRING=2}");
941                                 return false;
942                     }
943                 }
944             }
945         }
946 
947         opt_fName      = temp_opt_fName.toString();
948         opt_locale     = temp_opt_locale.toString();
949         opt_rules      = temp_opt_rules.toString();
950         if (temp_opt_help.toString().equalsIgnoreCase("true")) {
951             opt_help = true;
952         }
953         opt_loopCount  = Integer.parseInt(temp_opt_loopCount.toString());
954         opt_iLoopCount = Integer.parseInt(temp_opt_iLoopCount.toString());
955         if (temp_opt_terse.toString().equalsIgnoreCase("true")) {
956             opt_terse = true;
957         }
958         if (temp_opt_qsort.toString().equalsIgnoreCase("true")) {
959             opt_qsort = true;
960         }
961         if (temp_opt_binsearch.toString().equalsIgnoreCase("true")) {
962             opt_binsearch = true;
963         }
964         if (temp_opt_icu.toString().equalsIgnoreCase("true")) {
965             opt_icu = true;
966         }
967         if (temp_opt_usekeys.toString().equalsIgnoreCase("true")) {
968             opt_usekeys = true;
969         }
970         if (temp_opt_strcmp.toString().equalsIgnoreCase("true")) {
971             opt_strcmp = true;
972         }
973         if (temp_opt_strcmpCPO.toString().equalsIgnoreCase("true")) {
974             opt_strcmpCPO = true;
975         }
976         if (temp_opt_keygen.toString().equalsIgnoreCase("true")) {
977             opt_keygen = true;
978         }
979         if (temp_opt_norm.toString().equalsIgnoreCase("true")) {
980             opt_norm = true;
981         }
982         if (temp_opt_french.toString().equalsIgnoreCase("true")) {
983             opt_french = true;
984         }
985         if (temp_opt_frenchoff.toString().equalsIgnoreCase("true")) {
986             opt_frenchoff = true;
987         }
988         if (temp_opt_shifted.toString().equalsIgnoreCase("true")) {
989             opt_shifted = true;
990         }
991         if (temp_opt_lower.toString().equalsIgnoreCase("true")) {
992             opt_lower = true;
993         }
994         if (temp_opt_upper.toString().equalsIgnoreCase("true")) {
995             opt_upper = true;
996         }
997         if (temp_opt_case.toString().equalsIgnoreCase("true")) {
998             opt_case = true;
999         }
1000         opt_level      = Integer.parseInt(temp_opt_level.toString());
1001         if (temp_opt_keyhist.toString().equalsIgnoreCase("true")) {
1002             opt_keyhist = true;
1003         }
1004         if (temp_opt_itertest.toString().equalsIgnoreCase("true")) {
1005             opt_itertest = true;
1006         }
1007         if (temp_opt_dump.toString().equalsIgnoreCase("true")) {
1008             opt_dump = true;
1009         }
1010         if (temp_opt_java.toString().equalsIgnoreCase("true")) {
1011             opt_java = true;
1012         }
1013 
1014         return true;
1015     }
1016 
1017     /**
1018      * Invoke the runtime's garbage collection procedure repeatedly
1019      * until the amount of free memory stabilizes to within 10%.
1020      */
callGC()1021     private void callGC() {
1022         // From "Java Platform Performance".  This is the procedure
1023         // recommended by Javasoft.
1024         try {
1025             System.gc();
1026             Thread.sleep(100);
1027             System.runFinalization();
1028             Thread.sleep(100);
1029 
1030             System.gc();
1031             Thread.sleep(100);
1032             System.runFinalization();
1033             Thread.sleep(100);
1034         } catch (InterruptedException e) {}
1035     }
1036 
1037     //private boolean needCRLF = false;
1038 
1039     public int DOTMASK = 0x7FF;
1040 
dot(int i)1041     void dot(int i) {
1042         if ((i % DOTMASK) == 0) {
1043             //needCRLF = true;
1044             // I do not know why print the dot here
1045             //System.out.print('.');
1046         }
1047     }
1048 
readDataLine(BufferedReader br)1049     String readDataLine(BufferedReader br) throws Exception {
1050         String originalLine = "";
1051         String line = "";
1052 
1053         try {
1054             line = originalLine = br.readLine();
1055             if (line == null) return null;
1056             if (line.length() > 0 && line.charAt(0) == 0xFEFF) line = line.substring(1);
1057             int commentPos = line.indexOf('#');
1058             if (commentPos >= 0) line = line.substring(0, commentPos);
1059             line = line.trim();
1060         } catch (Exception e) {
1061             throw new Exception("Line \"{0}\",  \"{1}\"" + originalLine + " "
1062                                 + line + " " + e.toString());
1063         }
1064         return line;
1065     }
1066 
readDataLines()1067     void readDataLines() {
1068         // Read in  the input file.
1069         //   File assumed to be utf-16.
1070         //   Lines go onto heap buffers.  Global index array to line starts is created.
1071         //   Lines themselves are null terminated.
1072         //
1073         FileInputStream fis = null;
1074         InputStreamReader isr = null;
1075         BufferedReader br = null;
1076         try {
1077             fis = new FileInputStream(opt_fName);
1078             isr = new InputStreamReader(fis, "UTF-8");
1079             br= new BufferedReader(isr, 32*1024);
1080         } catch (Exception e) {
1081             System.err.println("Error: File access exception: " + e.getMessage() + "!");
1082             System.exit(2);
1083         }
1084 
1085         int counter = 0;
1086 
1087         list = new ArrayList();
1088         while (true) {
1089             String line = null;
1090             try {
1091                 line = readDataLine(br);
1092             } catch (Exception e) {
1093                 System.err.println("Read File Error" + e.getMessage() + "!");
1094                 System.exit(1);
1095             }
1096 
1097             if (line == null) break;
1098             if (line.length() == 0) continue;
1099             dot(counter++);
1100             list.add(line);
1101         }
1102         if (!opt_terse) {
1103             System.out.println("Read " + counter + " lines in file");
1104         }
1105 
1106         int size = list.size();
1107         tests = new String [size];
1108 
1109         for (int i = 0; i < size; ++i) {
1110             tests[i] = (String) list.get(i);
1111         }
1112     }
1113 
1114     /**
1115      * Get the Collator Rules
1116      * The Rule File format:
1117      * 1. leading and trailing whitespaces will be omitted
1118      * 2. lines with the leading character '#' will be treated as comments
1119      * 3. File encoding is ISO-8859-1
1120      */
getCollationRules(String ruleFileName)1121     String getCollationRules(String ruleFileName) {
1122         FileInputStream fis = null;
1123         InputStreamReader isr = null;
1124         BufferedReader br = null;
1125         try {
1126             fis = new FileInputStream(opt_rules);
1127             isr = new InputStreamReader(fis,"ISO-8859-1");
1128             br= new BufferedReader(isr);
1129         } catch (Exception e) {
1130             System.err.println("Error: File access exception: " + e.getMessage() + "!");
1131             System.exit(2);
1132         }
1133         String rules = "";
1134         String line = "";
1135         while (true) {
1136             try {
1137                 line = br.readLine();
1138             } catch (IOException e) {
1139                 System.err.println("Read File Error" + e.getMessage() + "!");
1140                 System.exit(1);
1141             }
1142             if (line == null) {
1143                 break;
1144             }
1145             int commentPos = line.indexOf('#');
1146             if (commentPos >= 0) line = line.substring(0, commentPos);
1147             line = line.trim();
1148             rules = rules + line;
1149         }
1150         return rules;
1151     }
1152 
1153     //Implementing qsort
qSortImpl_java_usekeys(String src[], int fromIndex, int toIndex, java.text.Collator c)1154     void qSortImpl_java_usekeys(String src[], int fromIndex, int toIndex, java.text.Collator c) {
1155         int low = fromIndex;
1156         int high = toIndex;
1157         String middle = "";
1158         if (high > low) {
1159             middle = src[ (low + high) / 2 ];
1160             while(low <= high) {
1161                 while((low < toIndex) && (compare(c.getCollationKey(src[low]), c.getCollationKey(middle)) < 0)) {
1162                     ++low;
1163                 }
1164                 while((high > fromIndex) && (compare(c.getCollationKey(src[high]), c.getCollationKey(middle)) > 0)) {
1165                     --high;
1166                 }
1167                 if(low <= high) {
1168                     String swap = src[low];
1169                     src[low] = src[high];
1170                     src[high] = swap;
1171                     ++low;
1172                     --high;
1173                 }
1174             }
1175             if(fromIndex < high) {
1176                 qSortImpl_java_usekeys(src, fromIndex, high, c);
1177             }
1178 
1179             if(low < toIndex) {
1180                 qSortImpl_java_usekeys(src, low, toIndex, c);
1181             }
1182         }
1183     }
1184 
qSortImpl_icu_usekeys(String src[], int fromIndex, int toIndex, com.ibm.icu.text.Collator c)1185     void qSortImpl_icu_usekeys(String src[], int fromIndex, int toIndex, com.ibm.icu.text.Collator c) {
1186         int low = fromIndex;
1187         int high = toIndex;
1188         String middle = "";
1189         if (high > low) {
1190             middle = src[ (low + high) / 2 ];
1191             while(low <= high) {
1192                 while((low < toIndex) && (compare(c.getCollationKey(src[low]), c.getCollationKey(middle)) < 0)) {
1193                     ++low;
1194                 }
1195                 while((high > fromIndex) && (compare(c.getCollationKey(src[high]), c.getCollationKey(middle)) > 0)) {
1196                     --high;
1197                 }
1198                 if(low <= high) {
1199                     String swap = src[low];
1200                     src[low] = src[high];
1201                     src[high] = swap;
1202                     ++low;
1203                     --high;
1204                 }
1205             }
1206             if(fromIndex < high) {
1207                 qSortImpl_icu_usekeys(src, fromIndex, high, c);
1208             }
1209 
1210             if(low < toIndex) {
1211                 qSortImpl_icu_usekeys(src, low, toIndex, c);
1212             }
1213         }
1214     }
1215 
qSortImpl_nokeys(String src[], int fromIndex, int toIndex, Comparator c)1216     void qSortImpl_nokeys(String src[], int fromIndex, int toIndex, Comparator c) {
1217         int low = fromIndex;
1218         int high = toIndex;
1219         String middle = "";
1220         if (high > low) {
1221             middle = src[ (low + high) / 2 ];
1222             while(low <= high) {
1223                 while((low < toIndex) && (compare(src[low], middle, c) < 0)) {
1224                     ++low;
1225                 }
1226                 while((high > fromIndex) && (compare(src[high], middle, c) > 0)) {
1227                     --high;
1228                 }
1229                 if(low <= high) {
1230                     String swap = src[low];
1231                     src[low] = src[high];
1232                     src[high] = swap;
1233                     ++low;
1234                     --high;
1235                 }
1236             }
1237             if(fromIndex < high) {
1238                 qSortImpl_nokeys(src, fromIndex, high, c);
1239             }
1240 
1241             if(low < toIndex) {
1242                 qSortImpl_nokeys(src, low, toIndex, c);
1243             }
1244         }
1245     }
1246 
compare(String source, String target, Comparator c)1247     int compare(String source, String target, Comparator c) {
1248         globalCount++;
1249         return c.compare(source, target);
1250     }
1251 
compare(java.text.CollationKey source, java.text.CollationKey target)1252     int compare(java.text.CollationKey source, java.text.CollationKey target) {
1253         globalCount++;
1254         return source.compareTo(target);
1255     }
1256 
compare(com.ibm.icu.text.CollationKey source, com.ibm.icu.text.CollationKey target)1257     int compare(com.ibm.icu.text.CollationKey source, com.ibm.icu.text.CollationKey target) {
1258         globalCount++;
1259         return source.compareTo(target);
1260     }
1261 
1262     //Class for command line option
1263     static class OptionSpec {
1264         String name;
1265         int type;
1266         StringBuffer value;
OptionSpec(String name, int type, StringBuffer value)1267         public OptionSpec(String name, int type, StringBuffer value) {
1268             this.name = name;
1269             this.type = type;
1270             this.value = value;
1271         }
1272     }
1273 }
1274