1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /** 4 ******************************************************************************* 5 * Copyright (C) 2002-2007, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.dev.test.perf; 11 12 import java.io.BufferedReader; 13 import java.io.FileInputStream; 14 import java.io.IOException; 15 import java.io.InputStreamReader; 16 import java.util.ArrayList; 17 import java.util.Comparator; 18 19 import com.ibm.icu.impl.LocaleUtility; 20 import com.ibm.icu.text.CollationElementIterator; 21 import com.ibm.icu.text.CollationKey; 22 import com.ibm.icu.text.Normalizer; 23 import com.ibm.icu.text.NumberFormat; 24 import com.ibm.icu.text.RuleBasedCollator; 25 26 public class CollationPerformanceTest { 27 static final String usageString = 28 "usage: collperf options...\n" 29 + "-help Display this message.\n" 30 + "-file file_name utf-16 format file of names.\n" 31 + "-locale name ICU locale to use. Default is en_US\n" 32 + "-rules file_name Collation rules file (overrides locale)\n" 33 //+ "-langid 0x1234 Windows Language ID number. Default to value for -locale option\n" 34 //+ " see http://msdn.microsoft.com/library/psdk/winbase/nls_8xo3.htm\n" 35 //+ "-win Run test using Windows native services. (ICU is default)\n" 36 //+ "-unix Run test using Unix strxfrm, strcoll services.\n" 37 //+ "-uselen Use API with string lengths. Default is null-terminated strings\n" 38 + "-usekeys Run tests using sortkeys rather than strcoll\n" 39 + "-strcmp Run tests using u_strcmp rather than strcoll\n" 40 + "-strcmpCPO Run tests using u_strcmpCodePointOrder rather than strcoll\n" 41 + "-loop nnnn Loopcount for test. Adjust for reasonable total running time.\n" 42 + "-iloop n Inner Loop Count. Default = 1. Number of calls to function\n" 43 + " under test at each call point. For measuring test overhead.\n" 44 + "-terse Terse numbers-only output. Intended for use by scripts.\n" 45 + "-french French accent ordering\n" 46 + "-frenchoff No French accent ordering (for use with French locales.)\n" 47 + "-norm Normalizing mode on\n" 48 + "-shifted Shifted mode\n" 49 + "-lower Lower case first\n" 50 + "-upper Upper case first\n" 51 + "-case Enable separate case level\n" 52 + "-level n Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n" 53 + "-keyhist Produce a table sort key size vs. string length\n" 54 + "-binsearch Binary Search timing test\n" 55 + "-keygen Sort Key Generation timing test\n" 56 + "-qsort Quicksort timing test\n" 57 + "-iter Iteration Performance Test\n" 58 + "-dump Display strings, sort keys and CEs.\n" 59 + "-java Run test using java.text.Collator.\n"; 60 61 //enum {FLAG, NUM, STRING} type; 62 static StringBuffer temp_opt_fName = new StringBuffer(""); 63 static StringBuffer temp_opt_locale = new StringBuffer("en_US"); 64 //static StringBuffer temp_opt_langid = new StringBuffer("0"); // Defaults to value corresponding to opt_locale. 65 static StringBuffer temp_opt_rules = new StringBuffer(""); 66 static StringBuffer temp_opt_help = new StringBuffer(""); 67 static StringBuffer temp_opt_loopCount = new StringBuffer("1"); 68 static StringBuffer temp_opt_iLoopCount = new StringBuffer("1"); 69 static StringBuffer temp_opt_terse = new StringBuffer("false"); 70 static StringBuffer temp_opt_qsort = new StringBuffer(""); 71 static StringBuffer temp_opt_binsearch = new StringBuffer(""); 72 static StringBuffer temp_opt_icu = new StringBuffer("true"); 73 //static StringBuffer opt_win = new StringBuffer(""); // Run with Windows native functions. 74 //static StringBuffer opt_unix = new StringBuffer(""); // Run with UNIX strcoll, strxfrm functions. 75 //static StringBuffer opt_uselen = new StringBuffer(""); 76 static StringBuffer temp_opt_usekeys = new StringBuffer(""); 77 static StringBuffer temp_opt_strcmp = new StringBuffer(""); 78 static StringBuffer temp_opt_strcmpCPO = new StringBuffer(""); 79 static StringBuffer temp_opt_norm = new StringBuffer(""); 80 static StringBuffer temp_opt_keygen = new StringBuffer(""); 81 static StringBuffer temp_opt_french = new StringBuffer(""); 82 static StringBuffer temp_opt_frenchoff = new StringBuffer(""); 83 static StringBuffer temp_opt_shifted = new StringBuffer(""); 84 static StringBuffer temp_opt_lower = new StringBuffer(""); 85 static StringBuffer temp_opt_upper = new StringBuffer(""); 86 static StringBuffer temp_opt_case = new StringBuffer(""); 87 static StringBuffer temp_opt_level = new StringBuffer("0"); 88 static StringBuffer temp_opt_keyhist = new StringBuffer(""); 89 static StringBuffer temp_opt_itertest = new StringBuffer(""); 90 static StringBuffer temp_opt_dump = new StringBuffer(""); 91 static StringBuffer temp_opt_java = new StringBuffer(""); 92 93 94 static String opt_fName = ""; 95 static String opt_locale = "en_US"; 96 //static int opt_langid = 0; // Defaults to value corresponding to opt_locale. 97 static String opt_rules = ""; 98 static boolean opt_help = false; 99 static int opt_loopCount = 1; 100 static int opt_iLoopCount = 1; 101 static boolean opt_terse = false; 102 static boolean opt_qsort = false; 103 static boolean opt_binsearch = false; 104 static boolean opt_icu = true; 105 //static boolean opt_win = false; // Run with Windows native functions. 106 //static boolean opt_unix = false; // Run with UNIX strcoll, strxfrm functions. 107 //static boolean opt_uselen = false; 108 static boolean opt_usekeys = false; 109 static boolean opt_strcmp = false; 110 static boolean opt_strcmpCPO = false; 111 static boolean opt_norm = false; 112 static boolean opt_keygen = false; 113 static boolean opt_french = false; 114 static boolean opt_frenchoff = false; 115 static boolean opt_shifted = false; 116 static boolean opt_lower = false; 117 static boolean opt_upper = false; 118 static boolean opt_case = false; 119 static int opt_level = 0; 120 static boolean opt_keyhist = false; 121 static boolean opt_itertest = false; 122 static boolean opt_dump = false; 123 static boolean opt_java = false; 124 125 static OptionSpec[] options = { 126 new OptionSpec("-file", 2, temp_opt_fName), 127 new OptionSpec("-locale", 2, temp_opt_locale), 128 //new OptionSpec("-langid", 1, temp_opt_langid), 129 new OptionSpec("-rules", 2, temp_opt_rules), 130 new OptionSpec("-qsort", 0, temp_opt_qsort), 131 new OptionSpec("-binsearch", 0, temp_opt_binsearch), 132 new OptionSpec("-iter", 0, temp_opt_itertest), 133 //new OptionSpec("-win", 0, temp_opt_win), 134 //new OptionSpec("-unix", 0, temp_opt_unix), 135 //new OptionSpec("-uselen", 0, temp_opt_uselen), 136 new OptionSpec("-usekeys", 0, temp_opt_usekeys), 137 new OptionSpec("-strcmp", 0, temp_opt_strcmp), 138 new OptionSpec("-strcmpCPO", 0, temp_opt_strcmpCPO), 139 new OptionSpec("-norm", 0, temp_opt_norm), 140 new OptionSpec("-french", 0, temp_opt_french), 141 new OptionSpec("-frenchoff", 0, temp_opt_frenchoff), 142 new OptionSpec("-shifted", 0, temp_opt_shifted), 143 new OptionSpec("-lower", 0, temp_opt_lower), 144 new OptionSpec("-upper", 0, temp_opt_upper), 145 new OptionSpec("-case", 0, temp_opt_case), 146 new OptionSpec("-level", 1, temp_opt_level), 147 new OptionSpec("-keyhist", 0, temp_opt_keyhist), 148 new OptionSpec("-keygen", 0, temp_opt_keygen), 149 new OptionSpec("-loop", 1, temp_opt_loopCount), 150 new OptionSpec("-iloop", 1, temp_opt_iLoopCount), 151 new OptionSpec("-terse", 0, temp_opt_terse), 152 new OptionSpec("-dump", 0, temp_opt_dump), 153 new OptionSpec("-help", 0, temp_opt_help), 154 new OptionSpec("-?", 0, temp_opt_help), 155 new OptionSpec("-java", 0, temp_opt_java), 156 }; 157 158 static java.text.Collator javaCol = null; 159 static com.ibm.icu.text.Collator icuCol = null; 160 static NumberFormat nf = null; 161 static NumberFormat percent = null; 162 ArrayList list = null; 163 String[] tests = null; 164 int globalCount = 0; 165 main(String[] args)166 public static void main(String[] args) { 167 CollationPerformanceTest collPerf = new CollationPerformanceTest(); 168 if ( !CollationPerformanceTest.processOptions(args) || opt_help || opt_fName.length()==0) { 169 System.out.println(usageString); 170 System.exit(1); 171 } 172 173 nf = NumberFormat.getInstance(); 174 nf.setMaximumFractionDigits(2); 175 percent = NumberFormat.getPercentInstance(); 176 177 collPerf.setOptions(); 178 collPerf.readDataLines(); 179 180 if (opt_dump) { 181 collPerf.doDump(); 182 } 183 184 if (opt_qsort) { 185 collPerf.doQSort(); 186 } 187 188 if (opt_binsearch) { 189 collPerf.doBinarySearch(); 190 } 191 192 if (opt_keygen) { 193 collPerf.doKeyGen(); 194 } 195 196 if (opt_keyhist) { 197 collPerf.doKeyHist(); 198 } 199 200 if (opt_itertest) { 201 collPerf.doIterTest(); 202 } 203 204 } 205 206 //Dump file lines, CEs, Sort Keys if requested doDump()207 void doDump() { 208 for(int i = 0; i < list.size(); i++) { 209 //print the line 210 String line = com.ibm.icu.impl.Utility.escape((String)list.get(i)); 211 System.out.println(line); 212 213 System.out.print(" CEs: "); 214 CollationElementIterator CEiter = ((com.ibm.icu.text.RuleBasedCollator)icuCol).getCollationElementIterator(line); 215 int ce; 216 int j = 0; 217 for(;;) { 218 ce = CEiter.next(); 219 if (ce == CollationElementIterator.NULLORDER) { 220 break; 221 } 222 //System.out.print(); 223 String outStr = Integer.toHexString(ce); 224 for (int len = 0; len < 8 - outStr.length(); len++) { 225 outStr ='0' + outStr; 226 } 227 System.out.print(outStr + " "); 228 if(++j >8) { 229 System.out.print("\n "); 230 j = 0; 231 } 232 } 233 234 System.out.print("\n ICU Sort Key: "); 235 CollationKey ck = ((com.ibm.icu.text.RuleBasedCollator)icuCol).getCollationKey(line); 236 byte[] cks = ck.toByteArray(); 237 j = 0; 238 for(int k = 0; k < cks.length; k++) { 239 String outStr = Integer.toHexString(cks[k]); 240 switch (outStr.length()) { 241 case 1: outStr = '0' + outStr; 242 break; 243 case 8: outStr = outStr.substring(6); 244 break; 245 } 246 System.out.print(outStr); 247 System.out.print(" "); 248 j++; 249 if(j > 0 && j % 20 == 0) { 250 System.out.print("\n "); 251 } 252 } 253 System.out.println("\n"); 254 } 255 } 256 257 /**--------------------------------------------------------------------------------------- 258 * 259 * doQSort() The quick sort timing test. 260 * 261 *--------------------------------------------------------------------------------------- 262 */ doQSort()263 void doQSort() { 264 callGC(); 265 //String[] sortTests = (String[]) tests.clone(); 266 //Adjust loop count to compensate for file size. QSort should be nlog(n) 267 double dLoopCount = opt_loopCount * 3000 / ((Math.log(tests.length) / Math.log(10)* tests.length)); 268 269 if(opt_usekeys) { 270 dLoopCount *= 5; 271 } 272 273 int adj_loopCount = (int)dLoopCount; 274 if(adj_loopCount < 1) { 275 adj_loopCount = 1; 276 } 277 278 globalCount = 0; 279 long startTime = 0; 280 long endTime = 0; 281 if (opt_icu && opt_usekeys) { 282 startTime = System.currentTimeMillis(); 283 qSortImpl_icu_usekeys(tests, 0, tests.length -1, icuCol); 284 endTime = System.currentTimeMillis(); 285 } 286 if (opt_icu && !opt_usekeys){ 287 startTime = System.currentTimeMillis(); 288 qSortImpl_nokeys(tests, 0, tests.length -1, icuCol); 289 endTime = System.currentTimeMillis(); 290 } 291 if (opt_java && opt_usekeys) { 292 startTime = System.currentTimeMillis(); 293 qSortImpl_java_usekeys(tests, 0, tests.length -1, javaCol); 294 endTime = System.currentTimeMillis(); 295 } 296 if (opt_java && !opt_usekeys){ 297 startTime = System.currentTimeMillis(); 298 qSortImpl_nokeys(tests, 0, tests.length -1, javaCol); 299 endTime = System.currentTimeMillis(); 300 } 301 long elapsedTime = endTime - startTime; 302 int ns = (int)(1000000 * elapsedTime / (globalCount + 0.0)); 303 if (!opt_terse) { 304 System.out.println("qsort: total # of string compares = " + globalCount); 305 System.out.println("qsort: time per compare = " + ns); 306 } else { 307 System.out.println(ns); 308 } 309 } 310 311 /**--------------------------------------------------------------------------------------- 312 * 313 * doBinarySearch() Binary Search timing test. Each name from the list 314 * is looked up in the full sorted list of names. 315 * 316 *--------------------------------------------------------------------------------------- 317 */ doBinarySearch()318 void doBinarySearch() { 319 callGC(); 320 int gCount = 0; 321 int loops = 0; 322 double dLoopCount = opt_loopCount * 3000 / (Math.log(tests.length) / Math.log(10)* tests.length); 323 long startTime = 0; 324 long elapsedTime = 0; 325 326 if(opt_usekeys) { 327 dLoopCount *= 5; 328 } 329 int adj_loopCount = (int)dLoopCount; 330 if(adj_loopCount < 1) { 331 adj_loopCount = 1; 332 } 333 334 //int opt2 = 0; 335 336 for(;;) { //not really a loop, just allows "break" to work, to simplify 337 //inadvertently running more than one test through here 338 if(opt_strcmp) { 339 int r = 0; 340 startTime = System.currentTimeMillis(); 341 for(loops = 0; loops < adj_loopCount; loops++) { 342 for (int j = 0; j < tests.length; j++) { 343 int hi = tests.length-1; 344 int lo = 0; 345 int guess = -1; 346 for(;;) { 347 int newGuess = (hi + lo) / 2; 348 if(newGuess == guess){ 349 break; 350 } 351 guess = newGuess; 352 r = tests[j].compareTo(tests[guess]); 353 gCount++; 354 if(r == 0) { 355 break; 356 } 357 if (r < 0) { 358 hi = guess; 359 } else { 360 lo = guess; 361 } 362 } 363 } 364 } 365 elapsedTime = System.currentTimeMillis() - startTime; 366 break; 367 } 368 369 if (opt_strcmpCPO) { 370 int r = 0; 371 startTime = System.currentTimeMillis(); 372 for(loops = 0; loops < adj_loopCount; loops++) { 373 for (int j = 0; j < tests.length; j++) { 374 int hi = tests.length-1; 375 int lo = 0; 376 int guess = -1; 377 for(;;) { 378 int newGuess = (hi + lo) / 2; 379 if(newGuess == guess){ 380 break; 381 } 382 guess = newGuess; 383 r = com.ibm.icu.text.Normalizer.compare(tests[j], tests[guess], Normalizer.COMPARE_CODE_POINT_ORDER); 384 gCount++; 385 if(r == 0) { 386 break; 387 } 388 if (r < 0) { 389 hi = guess; 390 } else { 391 lo = guess; 392 } 393 } 394 } 395 } 396 elapsedTime = System.currentTimeMillis() - startTime; 397 break; 398 } 399 400 if (opt_icu) { 401 402 int r = 0; 403 startTime = System.currentTimeMillis(); 404 for (loops = 0; loops < adj_loopCount; loops++) { 405 for (int j = 0; j < tests.length; j++) { 406 int hi = tests.length - 1; 407 int lo = 0; 408 int guess = -1; 409 for (;;) { 410 int newGuess = (hi + lo) / 2; 411 if (newGuess == guess) { 412 break; 413 } 414 guess = newGuess; 415 if (opt_usekeys) { 416 com.ibm.icu.text.CollationKey sortKey1 = icuCol.getCollationKey(tests[j]); 417 com.ibm.icu.text.CollationKey sortKey2 = icuCol.getCollationKey(tests[guess]); 418 r = sortKey1.compareTo(sortKey2); 419 gCount ++; 420 } else { 421 r = icuCol.compare(tests[j], tests[guess]); 422 gCount++; 423 } 424 if (r == 0) { 425 break; 426 } 427 if (r < 0) { 428 hi = guess; 429 } else { 430 lo = guess; 431 } 432 } 433 } 434 } 435 elapsedTime = System.currentTimeMillis() - startTime; 436 break; 437 } 438 if (opt_java) { 439 440 int r = 0; 441 startTime = System.currentTimeMillis(); 442 for (loops = 0; loops < adj_loopCount; loops++) { 443 for (int j = 0; j < tests.length; j++) { 444 int hi = tests.length - 1; 445 int lo = 0; 446 int guess = -1; 447 for (;;) { 448 int newGuess = (hi + lo) / 2; 449 if (newGuess == guess) { 450 break; 451 } 452 guess = newGuess; 453 if (opt_usekeys) { 454 java.text.CollationKey sortKey1 = javaCol.getCollationKey(tests[j]); 455 java.text.CollationKey sortKey2 = javaCol.getCollationKey(tests[guess]); 456 r = sortKey1.compareTo(sortKey2); 457 gCount ++; 458 } else { 459 r = javaCol.compare(tests[j], tests[guess]); 460 gCount++; 461 } 462 if (r == 0) { 463 break; 464 } 465 if (r < 0) { 466 hi = guess; 467 } else { 468 lo = guess; 469 } 470 } 471 } 472 } 473 elapsedTime = System.currentTimeMillis() - startTime; 474 break; 475 } 476 break; 477 } 478 int ns = (int)((float)(1000000) * (float)elapsedTime / (float)gCount); 479 if (!opt_terse) { 480 System.out.println("binary search: total # of string compares = " + gCount); 481 System.out.println("binary search: compares per loop = " + gCount / loops); 482 System.out.println("binary search: time per compare = " + ns); 483 } else { 484 System.out.println(ns); 485 } 486 } 487 488 /**--------------------------------------------------------------------------------------- 489 * 490 * doKeyGen() Key Generation Timing Test 491 * 492 *--------------------------------------------------------------------------------------- 493 */ doKeyGen()494 void doKeyGen() { 495 callGC(); 496 497 // Adjust loop count to compensate for file size. Should be order n 498 double dLoopCount = opt_loopCount * (1000.0 / (double)list.size()); 499 int adj_loopCount = (int)dLoopCount; 500 if (adj_loopCount < 1) adj_loopCount = 1; 501 502 long startTime = 0; 503 long totalKeyLen = 0; 504 long totalChars = 0; 505 if (opt_java) { 506 startTime = System.currentTimeMillis(); 507 for (int loops=0; loops<adj_loopCount; loops++) { 508 for (int line=0; line < tests.length; line++) { 509 for (int iLoop=0; iLoop < opt_iLoopCount; iLoop++) { 510 totalChars += tests[line].length(); 511 byte[] sortKey = javaCol.getCollationKey(tests[line]).toByteArray(); 512 totalKeyLen += sortKey.length; 513 } 514 } 515 } 516 } else { 517 startTime = System.currentTimeMillis(); 518 for (int loops=0; loops<adj_loopCount; loops++) { 519 for (int line=0; line < tests.length; line++) { 520 for (int iLoop=0; iLoop < opt_iLoopCount; iLoop++) { 521 totalChars += tests[line].length(); 522 byte[] sortKey = icuCol.getCollationKey(tests[line]).toByteArray(); 523 totalKeyLen += sortKey.length; 524 } 525 } 526 } 527 } 528 529 long elapsedTime = System.currentTimeMillis() - startTime; 530 long ns = (long)(1000000 * elapsedTime / (adj_loopCount * tests.length + 0.0)); 531 if (!opt_terse) { 532 System.out.println("Sort Key Generation: total # of keys =" + adj_loopCount * tests.length); 533 System.out.println("Sort Key Generation: time per key = " + ns + " ns"); 534 System.out.println("Key Length / character = " + nf.format(totalKeyLen / (totalChars + 0.0))); 535 } 536 else { 537 System.out.print(ns + ", "); 538 System.out.println(nf.format(totalKeyLen / (totalChars + 0.0)) + ", "); 539 } 540 } 541 542 /**--------------------------------------------------------------------------------------- 543 * 544 * doKeyHist() Output a table of data for average sort key size vs. string length. 545 * 546 *--------------------------------------------------------------------------------------- 547 */ doKeyHist()548 void doKeyHist() { 549 callGC(); 550 int maxLen = 0; 551 552 // Find the maximum string length 553 for (int i = 0; i < tests.length; i++) { 554 if (tests[i].length() > maxLen) maxLen = tests[i].length(); 555 } 556 557 int[] accumulatedLen = new int[maxLen + 1]; 558 int[] numKeysOfSize = new int[maxLen + 1]; 559 560 // Fill the arrays... 561 for (int i = 0; i < tests.length; i++) { 562 int len = tests[i].length(); 563 accumulatedLen[len] += icuCol.getCollationKey(tests[i]).toByteArray().length; 564 numKeysOfSize[len] += 1; 565 } 566 567 // And write out averages 568 System.out.println("String Length, Avg Key Length, Avg Key Len per char"); 569 for (int i = 1; i <= maxLen; i++) { 570 if (numKeysOfSize[i] > 0) { 571 System.out.println(i + ", " + nf.format(accumulatedLen[i] / (numKeysOfSize[i]+ 0.0)) + ", " 572 + nf.format(accumulatedLen[i] / (numKeysOfSize[i] * i + 0.0))); 573 } 574 } 575 576 } 577 doForwardIterTest()578 void doForwardIterTest() { 579 callGC(); 580 System.out.print("\n\nPerforming forward iteration performance test with "); 581 System.out.println("performance test on strings from file -----------"); 582 583 CollationElementIterator iter = ((RuleBasedCollator)icuCol).getCollationElementIterator(""); 584 585 int gCount = 0; 586 int count = 0; 587 long startTime = System.currentTimeMillis(); 588 while (count < opt_loopCount) { 589 int linecount = 0; 590 while (linecount < tests.length) { 591 String str = tests[linecount]; 592 iter.setText(str); 593 while (iter.next() != CollationElementIterator.NULLORDER) { 594 gCount++; 595 } 596 linecount ++; 597 } 598 count ++; 599 } 600 601 long elapsedTime = System.currentTimeMillis() - startTime; 602 System.out.println("elapsedTime " + elapsedTime + " ms"); 603 604 // empty loop recalculation 605 count = 0; 606 startTime = System.currentTimeMillis(); 607 while (count < opt_loopCount) { 608 int linecount = 0; 609 while (linecount < tests.length) { 610 String str = tests[linecount]; 611 iter.setText(str); 612 linecount ++; 613 } 614 count ++; 615 } 616 elapsedTime -= (System.currentTimeMillis() - startTime); 617 System.out.println("elapsedTime " + elapsedTime + " ms"); 618 619 int ns = (int)(1000000 * elapsedTime / (gCount + 0.0)); 620 System.out.println("Total number of strings compared " + tests.length 621 + "in " + opt_loopCount + " loops"); 622 System.out.println("Average time per CollationElementIterator.next() nano seconds " + ns); 623 System.out.println("performance test on skipped-5 concatenated strings from file -----------"); 624 625 String totalStr = ""; 626 int strlen = 0; 627 // appending all the strings 628 int linecount = 0; 629 while (linecount < tests.length) { 630 totalStr += tests[linecount]; 631 strlen += tests[linecount].length(); 632 linecount ++; 633 } 634 System.out.println("Total size of strings " + strlen); 635 636 gCount = 0; 637 count = 0; 638 iter = ((RuleBasedCollator)icuCol).getCollationElementIterator(totalStr); 639 strlen -= 5; // any left over characters are not iterated, 640 // this is to ensure the backwards and forwards iterators 641 // gets the same position 642 int strindex = 0; 643 startTime = System.currentTimeMillis(); 644 while (count < opt_loopCount) { 645 int count5 = 5; 646 strindex = 0; 647 iter.setOffset(strindex); 648 while (true) { 649 if (iter.next() == CollationElementIterator.NULLORDER) { 650 break; 651 } 652 gCount++; 653 count5 --; 654 if (count5 == 0) { 655 strindex += 10; 656 if (strindex > strlen) { 657 break; 658 } 659 iter.setOffset(strindex); 660 count5 = 5; 661 } 662 } 663 count ++; 664 } 665 666 elapsedTime = System.currentTimeMillis() - startTime; 667 System.out.println("elapsedTime " + elapsedTime); 668 669 // empty loop recalculation 670 int tempgCount = 0; 671 count = 0; 672 startTime = System.currentTimeMillis(); 673 while (count < opt_loopCount) { 674 int count5 = 5; 675 strindex = 0; 676 iter.setOffset(strindex); 677 while (true) { 678 tempgCount ++; 679 count5 --; 680 if (count5 == 0) { 681 strindex += 10; 682 if (strindex > strlen) { 683 break; 684 } 685 iter.setOffset(strindex); 686 count5 = 5; 687 } 688 } 689 count ++; 690 } 691 elapsedTime -= (System.currentTimeMillis() - startTime); 692 System.out.println("elapsedTime " + elapsedTime); 693 694 System.out.println("gCount " + gCount); 695 ns = (int)(1000000 * elapsedTime / (gCount + 0.0)); 696 System.out.println("Average time per CollationElementIterator.next() nano seconds " + ns); 697 } 698 doBackwardIterTest()699 void doBackwardIterTest() { 700 System.out.print("\n\nPerforming backward iteration performance test with "); 701 System.out.println("performance test on strings from file -----------\n"); 702 703 CollationElementIterator iter = ((RuleBasedCollator)icuCol).getCollationElementIterator(""); 704 705 int gCount = 0; 706 int count = 0; 707 long startTime = System.currentTimeMillis(); 708 while (count < opt_loopCount) { 709 int linecount = 0; 710 while (linecount < tests.length) { 711 String str = tests[linecount]; 712 iter.setText(str); 713 while (iter.previous() != CollationElementIterator.NULLORDER) { 714 gCount++; 715 } 716 linecount ++; 717 } 718 count ++; 719 } 720 long elapsedTime = System.currentTimeMillis() - startTime; 721 System.out.println("elapsedTime " + elapsedTime + " ms"); 722 723 // empty loop recalculation 724 count = 0; 725 startTime = System.currentTimeMillis(); 726 while (count < opt_loopCount) { 727 int linecount = 0; 728 while (linecount < tests.length) { 729 String str = tests[linecount]; 730 iter.setText(str); 731 linecount ++; 732 } 733 count ++; 734 } 735 elapsedTime -= (System.currentTimeMillis() - startTime); 736 System.out.println("elapsedTime " + elapsedTime + " ms"); 737 738 int ns = (int)(1000000 * elapsedTime / (gCount + 0.0)); 739 System.out.println("Total number of strings compared " + tests.length 740 + "in " + opt_loopCount + " loops"); 741 System.out.println("Average time per CollationElementIterator.previous() nano seconds " + ns); 742 System.out.println("performance test on skipped-5 concatenated strings from file -----------"); 743 744 String totalStr = ""; 745 int strlen = 0; 746 // appending all the strings 747 int linecount = 0; 748 while (linecount < tests.length) { 749 totalStr += tests[linecount]; 750 strlen += tests[linecount].length(); 751 linecount ++; 752 } 753 System.out.println("Total size of strings " + strlen); 754 755 gCount = 0; 756 count = 0; 757 758 iter = ((RuleBasedCollator)icuCol).getCollationElementIterator(totalStr); 759 int strindex = 0; 760 startTime = System.currentTimeMillis(); 761 while (count < opt_loopCount) { 762 int count5 = 5; 763 strindex = 5; 764 iter.setOffset(strindex); 765 while (true) { 766 if (iter.previous() == CollationElementIterator.NULLORDER) { 767 break; 768 } 769 gCount ++; 770 count5 --; 771 if (count5 == 0) { 772 strindex += 10; 773 if (strindex > strlen) { 774 break; 775 } 776 iter.setOffset(strindex); 777 count5 = 5; 778 } 779 } 780 count ++; 781 } 782 783 elapsedTime = System.currentTimeMillis() - startTime; 784 System.out.println("elapsedTime " + elapsedTime); 785 786 // empty loop recalculation 787 count = 0; 788 int tempgCount = 0; 789 startTime = System.currentTimeMillis(); 790 while (count < opt_loopCount) { 791 int count5 = 5; 792 strindex = 5; 793 iter.setOffset(strindex); 794 while (true) { 795 tempgCount ++; 796 count5 --; 797 if (count5 == 0) { 798 strindex += 10; 799 if (strindex > strlen) { 800 break; 801 } 802 iter.setOffset(strindex); 803 count5 = 5; 804 } 805 } 806 count ++; 807 } 808 elapsedTime -= (System.currentTimeMillis() - startTime); 809 System.out.println("elapsedTime " + elapsedTime); 810 811 System.out.println("gCount " + gCount); 812 ns = (int)(1000000 * elapsedTime / (gCount + 0.0)); 813 System.out.println("Average time per CollationElementIterator.previous() nano seconds " + ns); 814 } 815 816 817 /**--------------------------------------------------------------------------------------- 818 * 819 * doIterTest() Iteration test 820 * 821 *--------------------------------------------------------------------------------------- 822 */ doIterTest()823 void doIterTest() { 824 doForwardIterTest(); 825 doBackwardIterTest(); 826 } 827 setOptions()828 void setOptions() { 829 830 if (opt_java) { 831 opt_icu = false; 832 } 833 834 if (opt_rules.length() != 0) { 835 try { 836 icuCol = new com.ibm.icu.text.RuleBasedCollator(getCollationRules(opt_rules)); 837 } catch (Exception e) { 838 System.out.println("Cannot open rules:" + e.getMessage()); 839 System.exit(1); 840 } 841 } else { 842 icuCol = com.ibm.icu.text.Collator.getInstance( 843 LocaleUtility.getLocaleFromName(opt_locale)); 844 } 845 846 javaCol = java.text.Collator.getInstance( 847 LocaleUtility.getLocaleFromName(opt_locale)); 848 849 if (opt_norm) { 850 javaCol.setDecomposition(java.text.Collator.CANONICAL_DECOMPOSITION); 851 icuCol.setDecomposition(com.ibm.icu.text.Collator.CANONICAL_DECOMPOSITION); 852 } 853 854 if (opt_french && opt_frenchoff) { 855 System.err.println("Error: specified both -french and -frenchoff options."); 856 } 857 858 if (opt_french) { 859 ((com.ibm.icu.text.RuleBasedCollator)icuCol).setFrenchCollation(true); 860 } 861 if (opt_frenchoff) { 862 ((com.ibm.icu.text.RuleBasedCollator)icuCol).setFrenchCollation(false); 863 } 864 865 if (opt_lower) { 866 ((com.ibm.icu.text.RuleBasedCollator)icuCol).setLowerCaseFirst(true); 867 } 868 869 if (opt_upper) { 870 ((com.ibm.icu.text.RuleBasedCollator)icuCol).setUpperCaseFirst(true); 871 } 872 873 if (opt_shifted) { 874 ((com.ibm.icu.text.RuleBasedCollator)icuCol).setAlternateHandlingShifted(true); 875 } 876 877 if (opt_level != 0) { 878 switch (opt_level) { 879 case 1 : 880 javaCol.setStrength(java.text.Collator.PRIMARY); 881 icuCol.setStrength(com.ibm.icu.text.Collator.PRIMARY); 882 break; 883 case 2 : 884 javaCol.setStrength(java.text.Collator.SECONDARY); 885 icuCol.setStrength(com.ibm.icu.text.Collator.SECONDARY); 886 break; 887 case 3 : 888 javaCol.setStrength(java.text.Collator.TERTIARY); 889 icuCol.setStrength(com.ibm.icu.text.Collator.TERTIARY); 890 break; 891 case 4 : 892 icuCol.setStrength(com.ibm.icu.text.Collator.QUATERNARY); 893 break; 894 case 5 : 895 javaCol.setStrength(java.text.Collator.IDENTICAL); 896 icuCol.setStrength(com.ibm.icu.text.Collator.IDENTICAL); 897 break; 898 default: 899 System.err.println("-level param must be between 1 and 5\n"); 900 System.exit(1); 901 } 902 } 903 // load classes at least once before starting 904 javaCol.compare("a", "b"); 905 icuCol.compare("a", "b"); 906 } 907 processOptions(String[] args)908 static boolean processOptions(String[] args) { 909 int argNum; 910 for (argNum =0; argNum < args.length; argNum++) { 911 for (int i = 0; i < options.length; i++) { 912 if (args[argNum].equalsIgnoreCase(options[i].name)) { 913 switch (options[i].type) { 914 case 0: 915 options[i].value.delete(0, options[i].value.capacity()).append("true"); 916 break; 917 case 1: 918 argNum++; 919 if ((argNum >= args.length) || (args[argNum].charAt(0)=='-')) { 920 System.err.println("value expected for"+ options[i].name +"option.\n"); 921 return false; 922 } 923 try { 924 /* int value =*/ Integer.parseInt(args[argNum]); 925 options[i].value.delete(0, options[i].value.capacity()).append(args[argNum]); 926 } catch (NumberFormatException e) { 927 System.err.println("Expected: a number value"); 928 return false; 929 } 930 break; 931 case 2: 932 argNum++; 933 if ((argNum >= args.length) || (args[argNum].charAt(0)=='-')) { 934 System.err.println("value expected for"+ options[i].name +"option.\n"); 935 return false; 936 } 937 options[i].value.delete(0, options[i].value.capacity()).append(args[argNum]); 938 break; 939 default: 940 System.err.println("Option type error: {FLAG=0, NUM=1, STRING=2}"); 941 return false; 942 } 943 } 944 } 945 } 946 947 opt_fName = temp_opt_fName.toString(); 948 opt_locale = temp_opt_locale.toString(); 949 opt_rules = temp_opt_rules.toString(); 950 if (temp_opt_help.toString().equalsIgnoreCase("true")) { 951 opt_help = true; 952 } 953 opt_loopCount = Integer.parseInt(temp_opt_loopCount.toString()); 954 opt_iLoopCount = Integer.parseInt(temp_opt_iLoopCount.toString()); 955 if (temp_opt_terse.toString().equalsIgnoreCase("true")) { 956 opt_terse = true; 957 } 958 if (temp_opt_qsort.toString().equalsIgnoreCase("true")) { 959 opt_qsort = true; 960 } 961 if (temp_opt_binsearch.toString().equalsIgnoreCase("true")) { 962 opt_binsearch = true; 963 } 964 if (temp_opt_icu.toString().equalsIgnoreCase("true")) { 965 opt_icu = true; 966 } 967 if (temp_opt_usekeys.toString().equalsIgnoreCase("true")) { 968 opt_usekeys = true; 969 } 970 if (temp_opt_strcmp.toString().equalsIgnoreCase("true")) { 971 opt_strcmp = true; 972 } 973 if (temp_opt_strcmpCPO.toString().equalsIgnoreCase("true")) { 974 opt_strcmpCPO = true; 975 } 976 if (temp_opt_keygen.toString().equalsIgnoreCase("true")) { 977 opt_keygen = true; 978 } 979 if (temp_opt_norm.toString().equalsIgnoreCase("true")) { 980 opt_norm = true; 981 } 982 if (temp_opt_french.toString().equalsIgnoreCase("true")) { 983 opt_french = true; 984 } 985 if (temp_opt_frenchoff.toString().equalsIgnoreCase("true")) { 986 opt_frenchoff = true; 987 } 988 if (temp_opt_shifted.toString().equalsIgnoreCase("true")) { 989 opt_shifted = true; 990 } 991 if (temp_opt_lower.toString().equalsIgnoreCase("true")) { 992 opt_lower = true; 993 } 994 if (temp_opt_upper.toString().equalsIgnoreCase("true")) { 995 opt_upper = true; 996 } 997 if (temp_opt_case.toString().equalsIgnoreCase("true")) { 998 opt_case = true; 999 } 1000 opt_level = Integer.parseInt(temp_opt_level.toString()); 1001 if (temp_opt_keyhist.toString().equalsIgnoreCase("true")) { 1002 opt_keyhist = true; 1003 } 1004 if (temp_opt_itertest.toString().equalsIgnoreCase("true")) { 1005 opt_itertest = true; 1006 } 1007 if (temp_opt_dump.toString().equalsIgnoreCase("true")) { 1008 opt_dump = true; 1009 } 1010 if (temp_opt_java.toString().equalsIgnoreCase("true")) { 1011 opt_java = true; 1012 } 1013 1014 return true; 1015 } 1016 1017 /** 1018 * Invoke the runtime's garbage collection procedure repeatedly 1019 * until the amount of free memory stabilizes to within 10%. 1020 */ callGC()1021 private void callGC() { 1022 // From "Java Platform Performance". This is the procedure 1023 // recommended by Javasoft. 1024 try { 1025 System.gc(); 1026 Thread.sleep(100); 1027 System.runFinalization(); 1028 Thread.sleep(100); 1029 1030 System.gc(); 1031 Thread.sleep(100); 1032 System.runFinalization(); 1033 Thread.sleep(100); 1034 } catch (InterruptedException e) {} 1035 } 1036 1037 //private boolean needCRLF = false; 1038 1039 public int DOTMASK = 0x7FF; 1040 dot(int i)1041 void dot(int i) { 1042 if ((i % DOTMASK) == 0) { 1043 //needCRLF = true; 1044 // I do not know why print the dot here 1045 //System.out.print('.'); 1046 } 1047 } 1048 readDataLine(BufferedReader br)1049 String readDataLine(BufferedReader br) throws Exception { 1050 String originalLine = ""; 1051 String line = ""; 1052 1053 try { 1054 line = originalLine = br.readLine(); 1055 if (line == null) return null; 1056 if (line.length() > 0 && line.charAt(0) == 0xFEFF) line = line.substring(1); 1057 int commentPos = line.indexOf('#'); 1058 if (commentPos >= 0) line = line.substring(0, commentPos); 1059 line = line.trim(); 1060 } catch (Exception e) { 1061 throw new Exception("Line \"{0}\", \"{1}\"" + originalLine + " " 1062 + line + " " + e.toString()); 1063 } 1064 return line; 1065 } 1066 readDataLines()1067 void readDataLines() { 1068 // Read in the input file. 1069 // File assumed to be utf-16. 1070 // Lines go onto heap buffers. Global index array to line starts is created. 1071 // Lines themselves are null terminated. 1072 // 1073 FileInputStream fis = null; 1074 InputStreamReader isr = null; 1075 BufferedReader br = null; 1076 try { 1077 fis = new FileInputStream(opt_fName); 1078 isr = new InputStreamReader(fis, "UTF-8"); 1079 br= new BufferedReader(isr, 32*1024); 1080 } catch (Exception e) { 1081 System.err.println("Error: File access exception: " + e.getMessage() + "!"); 1082 System.exit(2); 1083 } 1084 1085 int counter = 0; 1086 1087 list = new ArrayList(); 1088 while (true) { 1089 String line = null; 1090 try { 1091 line = readDataLine(br); 1092 } catch (Exception e) { 1093 System.err.println("Read File Error" + e.getMessage() + "!"); 1094 System.exit(1); 1095 } 1096 1097 if (line == null) break; 1098 if (line.length() == 0) continue; 1099 dot(counter++); 1100 list.add(line); 1101 } 1102 if (!opt_terse) { 1103 System.out.println("Read " + counter + " lines in file"); 1104 } 1105 1106 int size = list.size(); 1107 tests = new String [size]; 1108 1109 for (int i = 0; i < size; ++i) { 1110 tests[i] = (String) list.get(i); 1111 } 1112 } 1113 1114 /** 1115 * Get the Collator Rules 1116 * The Rule File format: 1117 * 1. leading and trailing whitespaces will be omitted 1118 * 2. lines with the leading character '#' will be treated as comments 1119 * 3. File encoding is ISO-8859-1 1120 */ getCollationRules(String ruleFileName)1121 String getCollationRules(String ruleFileName) { 1122 FileInputStream fis = null; 1123 InputStreamReader isr = null; 1124 BufferedReader br = null; 1125 try { 1126 fis = new FileInputStream(opt_rules); 1127 isr = new InputStreamReader(fis,"ISO-8859-1"); 1128 br= new BufferedReader(isr); 1129 } catch (Exception e) { 1130 System.err.println("Error: File access exception: " + e.getMessage() + "!"); 1131 System.exit(2); 1132 } 1133 String rules = ""; 1134 String line = ""; 1135 while (true) { 1136 try { 1137 line = br.readLine(); 1138 } catch (IOException e) { 1139 System.err.println("Read File Error" + e.getMessage() + "!"); 1140 System.exit(1); 1141 } 1142 if (line == null) { 1143 break; 1144 } 1145 int commentPos = line.indexOf('#'); 1146 if (commentPos >= 0) line = line.substring(0, commentPos); 1147 line = line.trim(); 1148 rules = rules + line; 1149 } 1150 return rules; 1151 } 1152 1153 //Implementing qsort qSortImpl_java_usekeys(String src[], int fromIndex, int toIndex, java.text.Collator c)1154 void qSortImpl_java_usekeys(String src[], int fromIndex, int toIndex, java.text.Collator c) { 1155 int low = fromIndex; 1156 int high = toIndex; 1157 String middle = ""; 1158 if (high > low) { 1159 middle = src[ (low + high) / 2 ]; 1160 while(low <= high) { 1161 while((low < toIndex) && (compare(c.getCollationKey(src[low]), c.getCollationKey(middle)) < 0)) { 1162 ++low; 1163 } 1164 while((high > fromIndex) && (compare(c.getCollationKey(src[high]), c.getCollationKey(middle)) > 0)) { 1165 --high; 1166 } 1167 if(low <= high) { 1168 String swap = src[low]; 1169 src[low] = src[high]; 1170 src[high] = swap; 1171 ++low; 1172 --high; 1173 } 1174 } 1175 if(fromIndex < high) { 1176 qSortImpl_java_usekeys(src, fromIndex, high, c); 1177 } 1178 1179 if(low < toIndex) { 1180 qSortImpl_java_usekeys(src, low, toIndex, c); 1181 } 1182 } 1183 } 1184 qSortImpl_icu_usekeys(String src[], int fromIndex, int toIndex, com.ibm.icu.text.Collator c)1185 void qSortImpl_icu_usekeys(String src[], int fromIndex, int toIndex, com.ibm.icu.text.Collator c) { 1186 int low = fromIndex; 1187 int high = toIndex; 1188 String middle = ""; 1189 if (high > low) { 1190 middle = src[ (low + high) / 2 ]; 1191 while(low <= high) { 1192 while((low < toIndex) && (compare(c.getCollationKey(src[low]), c.getCollationKey(middle)) < 0)) { 1193 ++low; 1194 } 1195 while((high > fromIndex) && (compare(c.getCollationKey(src[high]), c.getCollationKey(middle)) > 0)) { 1196 --high; 1197 } 1198 if(low <= high) { 1199 String swap = src[low]; 1200 src[low] = src[high]; 1201 src[high] = swap; 1202 ++low; 1203 --high; 1204 } 1205 } 1206 if(fromIndex < high) { 1207 qSortImpl_icu_usekeys(src, fromIndex, high, c); 1208 } 1209 1210 if(low < toIndex) { 1211 qSortImpl_icu_usekeys(src, low, toIndex, c); 1212 } 1213 } 1214 } 1215 qSortImpl_nokeys(String src[], int fromIndex, int toIndex, Comparator c)1216 void qSortImpl_nokeys(String src[], int fromIndex, int toIndex, Comparator c) { 1217 int low = fromIndex; 1218 int high = toIndex; 1219 String middle = ""; 1220 if (high > low) { 1221 middle = src[ (low + high) / 2 ]; 1222 while(low <= high) { 1223 while((low < toIndex) && (compare(src[low], middle, c) < 0)) { 1224 ++low; 1225 } 1226 while((high > fromIndex) && (compare(src[high], middle, c) > 0)) { 1227 --high; 1228 } 1229 if(low <= high) { 1230 String swap = src[low]; 1231 src[low] = src[high]; 1232 src[high] = swap; 1233 ++low; 1234 --high; 1235 } 1236 } 1237 if(fromIndex < high) { 1238 qSortImpl_nokeys(src, fromIndex, high, c); 1239 } 1240 1241 if(low < toIndex) { 1242 qSortImpl_nokeys(src, low, toIndex, c); 1243 } 1244 } 1245 } 1246 compare(String source, String target, Comparator c)1247 int compare(String source, String target, Comparator c) { 1248 globalCount++; 1249 return c.compare(source, target); 1250 } 1251 compare(java.text.CollationKey source, java.text.CollationKey target)1252 int compare(java.text.CollationKey source, java.text.CollationKey target) { 1253 globalCount++; 1254 return source.compareTo(target); 1255 } 1256 compare(com.ibm.icu.text.CollationKey source, com.ibm.icu.text.CollationKey target)1257 int compare(com.ibm.icu.text.CollationKey source, com.ibm.icu.text.CollationKey target) { 1258 globalCount++; 1259 return source.compareTo(target); 1260 } 1261 1262 //Class for command line option 1263 static class OptionSpec { 1264 String name; 1265 int type; 1266 StringBuffer value; OptionSpec(String name, int type, StringBuffer value)1267 public OptionSpec(String name, int type, StringBuffer value) { 1268 this.name = name; 1269 this.type = type; 1270 this.value = value; 1271 } 1272 } 1273 } 1274