#!/usr/local/bin/perl
# * © 2016 and later: Unicode, Inc. and others.
# * License & terms of use: http://www.unicode.org/copyright.html
# *******************************************************************************
# * Copyright (C) 2002-2007 International Business Machines Corporation and *
# * others. All Rights Reserved. *
# *******************************************************************************
use strict;
# Assume we are running within the icu4j/perf-tests root directory
use lib 'src/com/ibm/icu/dev/test/perf';
use Dataset;
#---------------------------------------------------------------------
# Test class
my $TESTCLASS = 'com.ibm.icu.dev.test.perf.ConverterPerformanceTest';
my $CLASSES = './out/bin:../tools/misc/out/bin/:../icu4j.jar:../icu4j-charset.jar';
# Methods to be tested. Each pair represents a test method and
# a baseline method which is used for comparison.
# Some tests do not compile at this time.
my @METHODS = (
## ['TestByteToCharConverter', 'TestByteToCharConverterICU'],
## ['TestCharToByteConverter', 'TestCharToByteConverterICU'],
['TestCharsetDecoder', 'TestCharsetDecoderICU'],
['TestCharsetEncoder', 'TestCharsetEncoderICU']
);
# Patterns which define the set of characters used for testing.
my $SOURCEDIR ="./data/conversion/";
# Note that some tests are unavailable
my @OPTIONS = (
# src text src encoding test encoding
[ "arabic.txt", "UTF-8", "csisolatinarabic"],
[ "french.txt", "UTF-8", "csisolatin1"],
[ "greek.txt", "UTF-8", "csisolatingreek"],
[ "hebrew.txt", "UTF-8", "csisolatinhebrew"],
# [ "hindi.txt" , "UTF-8", "iscii"],
[ "japanese.txt", "UTF-8", "EUC-JP"],
[ "japanese.txt", "UTF-8", "csiso2022jp"],
# [ "japanese.txt", "UTF-8", "shift_jis"],
[ "korean.txt", "UTF-8", "csiso2022kr"],
# [ "korean.txt", "UTF-8", "EUC-KR"],
[ "s-chinese.txt", "UTF-8", "EUC_CN"],
[ "arabic.txt", "UTF-8", "UTF-8"],
[ "french.txt", "UTF-8", "UTF-8"],
[ "greek.txt", "UTF-8", "UTF-8"],
[ "hebrew.txt", "UTF-8", "UTF-8"],
[ "hindi.txt" , "UTF-8", "UTF-8"],
[ "japanese.txt", "UTF-8", "UTF-8"],
[ "korean.txt", "UTF-8", "UTF-8"],
[ "s-chinese.txt", "UTF-8", "UTF-8"],
[ "french.txt", "UTF-8", "UTF-16BE"],
[ "french.txt", "UTF-8", "UTF-16LE"],
[ "english.txt", "UTF-8", "US-ASCII"],
);
my $CALIBRATE = 2; # duration in seconds for initial calibration
my $DURATION = 10; # duration in seconds for each pass
my $NUMPASSES = 4; # number of passes. If > 1 then the first pass
# is discarded as a JIT warm-up pass.
my $TABLEATTR = 'BORDER="1" CELLPADDING="4" CELLSPACING="0"';
my $PLUS_MINUS = "±";
if ($NUMPASSES < 3) {
die "Need at least 3 passes. One is discarded (JIT warmup) and need two to have 1 degree of freedom (t distribution).";
}
my $OUT; # see out()
main();
#---------------------------------------------------------------------
# ...
sub main {
my $date = localtime;
my $title = "ICU4J Performance Test $date";
my $html = $date;
$html =~ s/://g; # ':' illegal
$html =~ s/\s*\d+$//; # delete year
$html =~ s/^\w+\s*//; # delete dow
$html = "perf $html.html";
open(HTML,">$html") or die "Can't write to $html: $!";
print HTML < $testMethod vs. $baselineMethod Raw data:$title
\n";
print HTML "$TESTCLASS
\n";
my $raw = "";
for my $methodPair (@METHODS) {
my $testMethod = $methodPair->[0];
my $baselineMethod = $methodPair->[1];
print HTML "\n";
print HTML " \n";
print HTML "
\n";
print HTML " \n";
$OUT = '';
for my $pat (@OPTIONS) {
print HTML "Options $testMethod ";
print HTML "$baselineMethod Ratio \n";
}
print HTML "@$pat[0], @$pat[2] \n";
out("");
# measure the test method
out("
");
# output ratio
my $r = $t->divide($b);
my $mean = $r->getMean() - 1;
my $color = $mean < 0 ? "RED" : "BLACK";
print HTML " ");
print HTML "");
print "\n$testMethod [@$pat]\n";
my $t = measure2($testMethod, $pat, -$DURATION);
out(" ", formatSeconds(4, $t->getMean(), $t->getError);
print HTML "/event \n";
# measure baseline method
out(" ");
print HTML "");
print "\n$baselineMethod [@$pat]\n";
my $b = measure2($baselineMethod, $pat, -$DURATION);
out(" ", formatSeconds(4, $b->getMean(), $t->getError);
print HTML "/event \n";
out("", formatPercent(3, $mean, $r->getError);
print HTML "
Measuring $method for input file @$pat[0] for encoding @$pat[2] , "); if ($param3 > 0) { $iterCount = $param3; out("$iterCount iterations/pass, $NUMPASSES passes
\n"); } else { my $timePerPass = -$param3; out(-$timePerPass, " seconds/pass, $NUMPASSES passes\n"); # Value given was -seconds/pass # calibrate: estimate ms/iteration print "Calibrating..."; my @t = callJava($method, $pat, -$CALIBRATE, 1); print "done.\n"; my @data = split(/\s+/, $t[0]->[2]); $data[0] *= 1.0e+3; my $timePerIter = 1.0e-3 * $data[0] / $data[1]; # determine iterations/pass from timePerPass and timePerIteration $iterCount = int($timePerPass / $timePerIter + 0.5); out("Calibration pass ($CALIBRATE sec): ");
out("$data[0] ms, ");
out("$data[1] iterations = ");
out(formatSeconds(4, $timePerIter), "/iteration
\n");
}
# run passes
print "Measuring $iterCount iterations x $NUMPASSES passes...";
my @t = callJava($method, $pat, $iterCount, $NUMPASSES);
print "done.\n";
my @ms = ();
my @b; # scratch
for my $a (@t) {
# $a->[0]: method name, corresponds to $method
# $a->[1]: 'begin' data, == $iterCount
# $a->[2]: 'end' data, of the form
\n");
out("Events per iteration: $eventsPerIter
\n");
my @ms_str = @ms;
$ms_str[0] .= " (discarded)" if (@ms_str > 1);
out("Raw times (ms/pass): ", join(", ", @ms_str), "
\n");
($iterCount, $eventsPerIter, @ms);
}
#---------------------------------------------------------------------
# Invoke java to run $TESTCLASS, passing it the given parameters.
#
# @param the method to run
# @param the number of iterations, or if negative, the duration
# in seconds. If more than on pass is desired, pass in
# a string, e.g., "100 100 100".
# @param the pattern defining characters to test
#
# @return an array of results. Each result is an array REF
# describing one pass. The array REF contains:
# ->[0]: The method name as reported
# ->[1]: The params on the '=