1#!/usr/bin/env perl 2# * © 2016 and later: Unicode, Inc. and others. 3# * License & terms of use: http://www.unicode.org/copyright.html 4# ******************************************************************************* 5# * Copyright (C) 2002-2007 International Business Machines Corporation and * 6# * others. All Rights Reserved. * 7# ******************************************************************************* 8 9use strict; 10 11# Assume we are running within the icu4j/perf-tests root directory 12use lib 'src/com/ibm/icu/dev/test/perf'; 13use Dataset; 14 15#--------------------------------------------------------------------- 16# Test class 17my $TESTCLASS = 'com.ibm.icu.dev.test.perf.ConverterPerformanceTest'; 18 19my $OS=$^O; 20my $CLASSPATH; 21if ($^O eq "MSWin32") { 22 $CLASSPATH = './target/*;./target/dependency/*'; 23} else { 24 $CLASSPATH = './target/*:./target/dependency/*'; 25} 26 27# Methods to be tested. Each pair represents a test method and 28# a baseline method which is used for comparison. 29# Some tests do not compile at this time. 30my @METHODS = ( 31## ['TestByteToCharConverter', 'TestByteToCharConverterICU'], 32## ['TestCharToByteConverter', 'TestCharToByteConverterICU'], 33 ['TestCharsetDecoder', 'TestCharsetDecoderICU'], 34 ['TestCharsetEncoder', 'TestCharsetEncoderICU'] 35 ); 36 37# Patterns which define the set of characters used for testing. 38 39my $SOURCEDIR ="./data/conversion/"; 40 41# Note that some tests are unavailable 42my @OPTIONS = ( 43# src text src encoding test encoding 44 [ "arabic.txt", "UTF-8", "csisolatinarabic"], 45 [ "french.txt", "UTF-8", "csisolatin1"], 46 [ "greek.txt", "UTF-8", "csisolatingreek"], 47 [ "hebrew.txt", "UTF-8", "csisolatinhebrew"], 48# [ "hindi.txt" , "UTF-8", "iscii"], 49 [ "japanese.txt", "UTF-8", "EUC-JP"], 50 [ "japanese.txt", "UTF-8", "csiso2022jp"], 51# [ "japanese.txt", "UTF-8", "shift_jis"], 52 [ "korean.txt", "UTF-8", "csiso2022kr"], 53# [ "korean.txt", "UTF-8", "EUC-KR"], 54 [ "s-chinese.txt", "UTF-8", "EUC_CN"], 55 [ "arabic.txt", "UTF-8", "UTF-8"], 56 [ "french.txt", "UTF-8", "UTF-8"], 57 [ "greek.txt", "UTF-8", "UTF-8"], 58 [ "hebrew.txt", "UTF-8", "UTF-8"], 59 [ "hindi.txt" , "UTF-8", "UTF-8"], 60 [ "japanese.txt", "UTF-8", "UTF-8"], 61 [ "korean.txt", "UTF-8", "UTF-8"], 62 [ "s-chinese.txt", "UTF-8", "UTF-8"], 63 [ "french.txt", "UTF-8", "UTF-16BE"], 64 [ "french.txt", "UTF-8", "UTF-16LE"], 65 [ "english.txt", "UTF-8", "US-ASCII"], 66 ); 67 68my $CALIBRATE = 2; # duration in seconds for initial calibration 69my $DURATION = 10; # duration in seconds for each pass 70my $NUMPASSES = 4; # number of passes. If > 1 then the first pass 71 # is discarded as a JIT warm-up pass. 72 73my $TABLEATTR = 'BORDER="1" CELLPADDING="4" CELLSPACING="0"'; 74 75my $PLUS_MINUS = "±"; 76 77if ($NUMPASSES < 3) { 78 die "Need at least 3 passes. One is discarded (JIT warmup) and need two to have 1 degree of freedom (t distribution)."; 79} 80 81my $OUT; # see out() 82 83main(); 84 85#--------------------------------------------------------------------- 86# ... 87sub main { 88 my $date = localtime; 89 my $title = "ICU4J Performance Test $date"; 90 91 my $html = $date; 92 $html =~ s/://g; # ':' illegal 93 $html =~ s/\s*\d+$//; # delete year 94 $html =~ s/^\w+\s*//; # delete dow 95 $html = "perf $html.html"; 96 97 open(HTML,">$html") or die "Can't write to $html: $!"; 98 99 print HTML <<EOF; 100<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" 101 "http://www.w3.org/TR/html4/strict.dtd"> 102<HTML> 103 <HEAD> 104 <TITLE>$title</TITLE> 105 </HEAD> 106 <BODY> 107EOF 108 print HTML "<H1>$title</H1>\n"; 109 110 print HTML "<H2>$TESTCLASS</H2>\n"; 111 112 my $raw = ""; 113 114 for my $methodPair (@METHODS) { 115 116 my $testMethod = $methodPair->[0]; 117 my $baselineMethod = $methodPair->[1]; 118 119 print HTML "<P><TABLE $TABLEATTR><TR><TD>\n"; 120 print HTML "<P><B>$testMethod vs. $baselineMethod</B></P>\n"; 121 122 print HTML "<P><TABLE $TABLEATTR BGCOLOR=\"#CCFFFF\">\n"; 123 print HTML "<TR><TD>Options</TD><TD>$testMethod</TD>"; 124 print HTML "<TD>$baselineMethod</TD><TD>Ratio</TD></TR>\n"; 125 126 $OUT = ''; 127 128 for my $pat (@OPTIONS) { 129 print HTML "<TR><TD>@$pat[0], @$pat[2]</TD>\n"; 130 131 out("<P><TABLE $TABLEATTR WIDTH=\"100%\">"); 132 133 # measure the test method 134 out("<TR><TD>"); 135 print "\n$testMethod [@$pat]\n"; 136 my $t = measure2($testMethod, $pat, -$DURATION); 137 out("</TD></TR>"); 138 print HTML "<TD>", formatSeconds(4, $t->getMean(), $t->getError); 139 print HTML "/event</TD>\n"; 140 141 # measure baseline method 142 out("<TR><TD>"); 143 print "\n$baselineMethod [@$pat]\n"; 144 my $b = measure2($baselineMethod, $pat, -$DURATION); 145 out("</TD></TR>"); 146 print HTML "<TD>", formatSeconds(4, $b->getMean(), $t->getError); 147 print HTML "/event</TD>\n"; 148 149 out("</TABLE></P>"); 150 151 # output ratio 152 my $r = $t->divide($b); 153 my $mean = $r->getMean() - 1; 154 my $color = $mean < 0 ? "RED" : "BLACK"; 155 print HTML "<TD><B><FONT COLOR=\"$color\">", formatPercent(3, $mean, $r->getError); 156 print HTML "</FONT></B></TD></TR>\n"; 157 } 158 159 print HTML "</TABLE></P>\n"; 160 161 print HTML "<P>Raw data:</P>\n"; 162 print HTML $OUT; 163 print HTML "</TABLE></P>\n"; 164 } 165 166 print HTML <<EOF; 167 </BODY> 168</HTML> 169EOF 170 close(HTML) or die "Can't close $html: $!"; 171} 172 173#--------------------------------------------------------------------- 174# Append text to the global variable $OUT 175sub out { 176 $OUT .= join('', @_); 177} 178 179#--------------------------------------------------------------------- 180# Append text to the global variable $OUT 181sub outln { 182 $OUT .= join('', @_) . "\n"; 183} 184 185#--------------------------------------------------------------------- 186# Measure a given test method with a give test pattern using the 187# global run parameters. 188# 189# @param the method to run 190# @param the pattern defining characters to test 191# @param if >0 then the number of iterations per pass. If <0 then 192# (negative of) the number of seconds per pass. 193# 194# @return a Dataset object, scaled by iterations per pass and 195# events per iteration, to give time per event 196# 197sub measure2 { 198 my @data = measure1(@_); 199 my $iterPerPass = shift(@data); 200 my $eventPerIter = shift(@data); 201 202 shift(@data) if (@data > 1); # discard first run 203 204 my $ds = Dataset->new(@data); 205 $ds->setScale(1.0e-3 / ($iterPerPass * $eventPerIter)); 206 $ds; 207} 208 209#--------------------------------------------------------------------- 210# Measure a given test method with a give test pattern using the 211# global run parameters. 212# 213# @param the method to run 214# @param the pattern defining characters to test 215# @param if >0 then the number of iterations per pass. If <0 then 216# (negative of) the number of seconds per pass. 217# 218# @return array of: 219# [0] iterations per pass 220# [1] events per iteration 221# [2..] ms reported for each pass, in order 222# 223sub measure1 { 224 my $method = shift; 225 my $pat = shift; 226 my $param3 = shift; # Either -seconds/pass or iteration count 227 228 my $iterCount = 0; # Set later based on param3. 229 230 out("<P>Measuring $method for input file @$pat[0] for encoding @$pat[2] , "); 231 if ($param3 > 0) { 232 $iterCount = $param3; 233 out("$iterCount iterations/pass, $NUMPASSES passes</P>\n"); 234 } else { 235 my $timePerPass = -$param3; 236 out(-$timePerPass, " seconds/pass, $NUMPASSES passes</P>\n"); 237 238 # Value given was -seconds/pass 239 240 # calibrate: estimate ms/iteration 241 print "Calibrating..."; 242 my @t = callJava($method, $pat, -$CALIBRATE, 1); 243 print "done.\n"; 244 245 my @data = split(/\s+/, $t[0]->[2]); 246 $data[0] *= 1.0e+3; 247 248 my $timePerIter = 1.0e-3 * $data[0] / $data[1]; 249 250 # determine iterations/pass from timePerPass and timePerIteration 251 $iterCount = int($timePerPass / $timePerIter + 0.5); 252 253 out("<P>Calibration pass ($CALIBRATE sec): "); 254 out("$data[0] ms, "); 255 out("$data[1] iterations = "); 256 out(formatSeconds(4, $timePerIter), "/iteration<BR>\n"); 257 } 258 259 # run passes 260 print "Measuring $iterCount iterations x $NUMPASSES passes..."; 261 my @t = callJava($method, $pat, $iterCount, $NUMPASSES); 262 print "done.\n"; 263 my @ms = (); 264 my @b; # scratch 265 for my $a (@t) { 266 # $a->[0]: method name, corresponds to $method 267 # $a->[1]: 'begin' data, == $iterCount 268 # $a->[2]: 'end' data, of the form <ms> <loops> <eventsPerIter> 269 # $a->[3...]: gc messages from JVM during pass 270 @b = split(/\s+/, $a->[2]); 271 push(@ms, $b[0] * 1.0e+3); 272 } 273 my $eventsPerIter = $b[2]; 274 275 out("Iterations per pass: $iterCount<BR>\n"); 276 out("Events per iteration: $eventsPerIter<BR>\n"); 277 278 my @ms_str = @ms; 279 $ms_str[0] .= " (discarded)" if (@ms_str > 1); 280 out("Raw times (ms/pass): ", join(", ", @ms_str), "<BR>\n"); 281 282 ($iterCount, $eventsPerIter, @ms); 283} 284 285#--------------------------------------------------------------------- 286# Invoke java to run $TESTCLASS, passing it the given parameters. 287# 288# @param the method to run 289# @param the number of iterations, or if negative, the duration 290# in seconds. If more than on pass is desired, pass in 291# a string, e.g., "100 100 100". 292# @param the pattern defining characters to test 293# 294# @return an array of results. Each result is an array REF 295# describing one pass. The array REF contains: 296# ->[0]: The method name as reported 297# ->[1]: The params on the '= <meth> begin ...' line 298# ->[2]: The params on the '= <meth> end ...' line 299# ->[3..]: GC messages from the JVM, if any 300# 301sub callJava { 302 my $method = shift; 303 my $pat = shift; 304 my $n = shift; 305 my $passes = shift; 306 307 my $fileName = $SOURCEDIR.@$pat[0] ; 308 my $n = ($n < 0) ? "-t ".(-$n) : "-i ".$n; 309 310 my $cmd = "java -classpath $CLASSPATH $TESTCLASS $method $n -p $passes -f $fileName -e @$pat[1] -T @$pat[2]"; 311 print "[$cmd]\n"; # for debugging 312 open(PIPE, "$cmd|") or die "Can't run \"$cmd\""; 313 my @out; 314 while (<PIPE>) { 315 push(@out, $_); 316 } 317 close(PIPE) or die "Java failed: \"$cmd\""; 318 319 @out = grep(!/^\#/, @out); # filter out comments 320 321 #print "[", join("\n", @out), "]\n"; 322 323 my @results; 324 my $method = ''; 325 my $data = []; 326 foreach (@out) { 327 next unless (/\S/); 328 329 if (/^=\s*(\w+)\s*(\w+)\s*(.*)/) { 330 my ($m, $state, $d) = ($1, $2, $3); 331 #print "$_ => [[$m $state $data]]\n"; 332 if ($state eq 'begin') { 333 die "$method was begun but not finished" if ($method); 334 $method = $m; 335 push(@$data, $d); 336 push(@$data, ''); # placeholder for end data 337 } elsif ($state eq 'end') { 338 if ($m ne $method) { 339 die "$method end does not match: $_"; 340 } 341 $data->[1] = $d; # insert end data at [1] 342 #print "#$method:", join(";",@$data), "\n"; 343 unshift(@$data, $method); # add method to start 344 345 push(@results, $data); 346 $method = ''; 347 $data = []; 348 } else { 349 die "Can't parse: $_"; 350 } 351 } 352 353 elsif (/^\[/) { 354 if ($method) { 355 push(@$data, $_); 356 } else { 357 # ignore extraneous GC notices 358 } 359 } 360 361 else { 362 die "Can't parse: $_"; 363 } 364 } 365 366 die "$method was begun but not finished" if ($method); 367 368 @results; 369} 370 371#|#--------------------------------------------------------------------- 372#|# Format a confidence interval, as given by a Dataset. Output is as 373#|# as follows: 374#|# 241.23 - 241.98 => 241.5 +/- 0.3 375#|# 241.2 - 243.8 => 242 +/- 1 376#|# 211.0 - 241.0 => 226 +/- 15 or? 230 +/- 20 377#|# 220.3 - 234.3 => 227 +/- 7 378#|# 220.3 - 300.3 => 260 +/- 40 379#|# 220.3 - 1000 => 610 +/- 390 or? 600 +/- 400 380#|# 0.022 - 0.024 => 0.023 +/- 0.001 381#|# 0.022 - 0.032 => 0.027 +/- 0.005 382#|# 0.022 - 1.000 => 0.5 +/- 0.5 383#|# In other words, take one significant digit of the error value and 384#|# display the mean to the same precision. 385#|sub formatDataset { 386#| my $ds = shift; 387#| my $lower = $ds->getMean() - $ds->getError(); 388#| my $upper = $ds->getMean() + $ds->getError(); 389#| my $scale = 0; 390#| # Find how many initial digits are the same 391#| while ($lower < 1 || 392#| int($lower) == int($upper)) { 393#| $lower *= 10; 394#| $upper *= 10; 395#| $scale++; 396#| } 397#| while ($lower >= 10 && 398#| int($lower) == int($upper)) { 399#| $lower /= 10; 400#| $upper /= 10; 401#| $scale--; 402#| } 403#|} 404 405#--------------------------------------------------------------------- 406# Format a number, optionally with a +/- delta, to n significant 407# digits. 408# 409# @param significant digit, a value >= 1 410# @param multiplier 411# @param time in seconds to be formatted 412# @optional delta in seconds 413# 414# @return string of the form "23" or "23 +/- 10". 415# 416sub formatNumber { 417 my $sigdig = shift; 418 my $mult = shift; 419 my $a = shift; 420 my $delta = shift; # may be undef 421 422 my $result = formatSigDig($sigdig, $a*$mult); 423 if (defined($delta)) { 424 my $d = formatSigDig($sigdig, $delta*$mult); 425 # restrict PRECISION of delta to that of main number 426 if ($result =~ /\.(\d+)/) { 427 # TODO make this work for values with all significant 428 # digits to the left of the decimal, e.g., 1234000. 429 430 # TODO the other thing wrong with this is that it 431 # isn't rounding the $delta properly. Have to put 432 # this logic into formatSigDig(). 433 my $x = length($1); 434 $d =~ s/\.(\d{$x})\d+/.$1/; 435 } 436 $result .= " $PLUS_MINUS " . $d; 437 } 438 $result; 439} 440 441#--------------------------------------------------------------------- 442# Format a time, optionally with a +/- delta, to n significant 443# digits. 444# 445# @param significant digit, a value >= 1 446# @param time in seconds to be formatted 447# @optional delta in seconds 448# 449# @return string of the form "23 ms" or "23 +/- 10 ms". 450# 451sub formatSeconds { 452 my $sigdig = shift; 453 my $a = shift; 454 my $delta = shift; # may be undef 455 456 my @MULT = (1 , 1e3, 1e6, 1e9, 1e12); 457 my @SUFF = ('s' , 'ms', 'us', 'ns', 'ps'); 458 459 # Determine our scale 460 my $i = 0; 461 ++$i while ($a*$MULT[$i] < 1 && $i < @MULT); 462 463 formatNumber($sigdig, $MULT[$i], $a, $delta) . ' ' . $SUFF[$i]; 464} 465 466#--------------------------------------------------------------------- 467# Format a percentage, optionally with a +/- delta, to n significant 468# digits. 469# 470# @param significant digit, a value >= 1 471# @param value to be formatted, as a fraction, e.g. 0.5 for 50% 472# @optional delta, as a fraction 473# 474# @return string of the form "23 %" or "23 +/- 10 %". 475# 476sub formatPercent { 477 my $sigdig = shift; 478 my $a = shift; 479 my $delta = shift; # may be undef 480 481 formatNumber($sigdig, 100, $a, $delta) . ' %'; 482} 483 484#--------------------------------------------------------------------- 485# Format a number to n significant digits without using exponential 486# notation. 487# 488# @param significant digit, a value >= 1 489# @param number to be formatted 490# 491# @return string of the form "1234" "12.34" or "0.001234". If 492# number was negative, prefixed by '-'. 493# 494sub formatSigDig { 495 my $n = shift() - 1; 496 my $a = shift; 497 498 local $_ = sprintf("%.${n}e", $a); 499 my $sign = (s/^-//) ? '-' : ''; 500 501 my $a_e; 502 my $result; 503 if (/^(\d)\.(\d+)e([-+]\d+)$/) { 504 my ($d, $dn, $e) = ($1, $2, $3); 505 $a_e = $e; 506 $d .= $dn; 507 $e++; 508 $d .= '0' while ($e > length($d)); 509 while ($e < 1) { 510 $e++; 511 $d = '0' . $d; 512 } 513 if ($e == length($d)) { 514 $result = $sign . $d; 515 } else { 516 $result = $sign . substr($d, 0, $e) . '.' . substr($d, $e); 517 } 518 } else { 519 die "Can't parse $_"; 520 } 521 $result; 522} 523 524#eof 525