#!/usr/local/bin/perl
# * © 2016 and later: Unicode, Inc. and others.
# * License & terms of use: http://www.unicode.org/copyright.html#License
# *******************************************************************************
# * Copyright (C) 2002-2007 International Business Machines Corporation and *
# * others. All Rights Reserved. *
# *******************************************************************************
use strict;
# Assume we are running within the icu4j root directory
use lib 'src/com/ibm/icu/dev/test/perf';
use Dataset;
#---------------------------------------------------------------------
# Test class
my $TESTCLASS = 'com.ibm.icu.dev.test.perf.NormalizerPerformanceTest';
# Methods to be tested. Each pair represents a test method and
# a baseline method which is used for comparison.
my @METHODS = (
['TestJDK_NFD_NFC_Text', 'TestICU_NFD_NFC_Text'],
['TestJDK_NFC_NFC_Text', 'TestICU_NFC_NFC_Text'],
# ['TestJDK_NFC_NFD_Text', 'TestICU_NFC_NFD_Text'],
['TestJDK_NFC_Orig_Text', 'TestICU_NFC_Orig_Text'],
['TestJDK_NFD_NFC_Text', 'TestICU_NFD_NFC_Text'],
['TestJDK_NFD_NFD_Text', 'TestICU_NFD_NFD_Text'],
['TestJDK_NFD_Orig_Text', 'TestICU_NFD_Orig_Text'],
);
# Patterns which define the set of characters used for testing.
my $SOURCEDIR ="src/com/ibm/icu/dev/test/perf/data/collation/";
my @OPTIONS = (
# src text src encoding mode
[ "TestNames_SerbianSH.txt", "UTF-8", "b"],
# [ "arabic.txt", "UTF-8", "b"],
# [ "french.txt", "UTF-8", "b"],
# [ "greek.txt", "UTF-8", "b"],
# [ "hebrew.txt", "UTF-8", "b"],
# [ "hindi.txt" , "UTF-8", "b"],
# [ "japanese.txt", "UTF-8", "b"],
# [ "korean.txt", "UTF-8", "b"],
# [ "s-chinese.txt", "UTF-8", "b"],
# [ "french.txt", "UTF-8", "b"],
# [ "greek.txt", "UTF-8", "b"],
# [ "hebrew.txt", "UTF-8", "b"],
# [ "hindi.txt" , "UTF-8", "b"],
# [ "japanese.txt", "UTF-8", "b"],
# [ "korean.txt", "UTF-8", "b"],
# [ "s-chinese.txt", "UTF-8", "b"],
# [ "arabic.html", "UTF-8", "b"],
# [ "czech.html", "UTF-8", "b"],
# [ "danish.html", "UTF-8", "b"],
# [ "english.html", "UTF-8", "b"],
# [ "esperanto.html", "UTF-8", "b"],
# [ "french.html", "UTF-8", "b"],
# [ "georgian.html", "UTF-8", "b"],
# [ "german.html", "UTF-8", "b"],
# [ "greek.html", "UTF-8", "b"],
# [ "hebrew.html", "UTF-8", "b"],
# [ "hindi.html", "UTF-8", "b"],
# [ "icelandic.html", "UTF-8", "b"],
# [ "interlingua.html", "UTF-8", "b"],
# [ "italian.html", "UTF-8", "b"],
# [ "japanese.html", "UTF-8", "b"],
# [ "korean.html", "UTF-8", "b"],
# [ "lithuanian.html", "UTF-8", "b"],
# [ "maltese.html", "UTF-8", "b"],
# [ "persian.html", "UTF-8", "b"],
# [ "polish.html", "UTF-8", "b"],
# [ "portuguese.html", "UTF-8", "b"],
# [ "romanian.html", "UTF-8", "b"],
# [ "russian.html", "UTF-8", "b"],
# [ "s-chinese.html", "UTF-8", "b"],
# [ "spanish.html", "UTF-8", "b"],
# [ "swedish.html", "UTF-8", "b"],
# [ "t-chinese.html", "UTF-8", "b"],
# [ "welsh.html", "UTF-8", "b"],
[ "TestNames_Asian.txt", "UTF-8", "l"],
[ "TestNames_Chinese.txt", "UTF-8", "l"],
[ "TestNames_Japanese.txt", "UTF-8", "l"],
[ "TestNames_Japanese_h.txt", "UTF-8", "l"],
[ "TestNames_Japanese_k.txt", "UTF-8", "l"],
[ "TestNames_Korean.txt", "UTF-8", "l"],
[ "TestNames_Latin.txt", "UTF-8", "l"],
[ "TestNames_SerbianSH.txt", "UTF-8", "l"],
[ "TestNames_SerbianSR.txt", "UTF-8", "l"],
[ "TestNames_Thai.txt", "UTF-8", "l"],
[ "Testnames_Russian.txt", "UTF-8", "l"],
);
my $CALIBRATE = 2; # duration in seconds for initial calibration
my $DURATION = 10; # duration in seconds for each pass
my $NUMPASSES = 4; # number of passes. If > 1 then the first pass
# is discarded as a JIT warm-up pass.
my $TABLEATTR = 'BORDER="1" CELLPADDING="4" CELLSPACING="0"';
my $PLUS_MINUS = "±";
if ($NUMPASSES < 3) {
die "Need at least 3 passes. One is discarded (JIT warmup) and need two to have 1 degree of freedom (t distribution).";
}
my $OUT; # see out()
main();
#---------------------------------------------------------------------
# ...
sub main {
my $date = localtime;
my $title = "ICU4J Performance Test $date";
my $html = $date;
$html =~ s/://g; # ':' illegal
$html =~ s/\s*\d+$//; # delete year
$html =~ s/^\w+\s*//; # delete dow
$html = "perf $html.html";
open(HTML,">$html") or die "Can't write to $html: $!";
print HTML < $testMethod vs. $baselineMethod Raw data:$title
\n";
print HTML "$TESTCLASS
\n";
my $raw = "";
for my $methodPair (@METHODS) {
my $testMethod = $methodPair->[0];
my $baselineMethod = $methodPair->[1];
print HTML "\n";
print HTML " \n";
print HTML "
\n";
print HTML " \n";
$OUT = '';
for my $pat (@OPTIONS) {
print HTML "Options $testMethod ";
print HTML "$baselineMethod Ratio \n";
}
print HTML "@$pat[0], @$pat[2] \n";
out("");
# measure the test method
out("
");
# output ratio
my $r = $t->divide($b);
my $mean = $r->getMean() - 1;
my $color = $mean < 0 ? "RED" : "BLACK";
print HTML " ");
print HTML "");
print "\n$testMethod [@$pat]\n";
my $t = measure2($testMethod, $pat, -$DURATION);
out(" ", formatSeconds(4, $t->getMean(), $t->getError);
print HTML "/event \n";
# measure baseline method
out(" ");
print HTML "");
print "\n$baselineMethod [@$pat]\n";
my $b = measure2($baselineMethod, $pat, -$DURATION);
out(" ", formatSeconds(4, $b->getMean(), $t->getError);
print HTML "/event \n";
out("", formatPercent(3, $mean, $r->getError);
print HTML "
Measuring $method for input file @$pat[0] in @$pat[2] , "); if ($iterCount > 0) { out("$iterCount iterations/pass, $NUMPASSES passes
\n"); } else { out(-$iterCount, " seconds/pass, $NUMPASSES passes\n"); } # is $iterCount actually -seconds/pass? if ($iterCount < 0) { # calibrate: estimate ms/iteration print "Calibrating..."; my @t = callJava($method, $pat, -$CALIBRATE, 1); print "done.\n"; my @data = split(/\s+/, $t[0]->[2]); $data[0] *= 1.0e+3; my $timePerIter = 1.0e-3 * $data[0] / $data[1]; # determine iterations/pass $iterCount = int(-$iterCount / $timePerIter + 0.5); out("Calibration pass ($CALIBRATE sec): ");
out("$data[0] ms, ");
out("$data[1] iterations = ");
out(formatSeconds(4, $timePerIter), "/iteration
\n");
}
# run passes
print "Measuring $iterCount iterations x $NUMPASSES passes...";
my @t = callJava($method, $pat, $iterCount, $NUMPASSES);
print "done.\n";
my @ms = ();
my @b; # scratch
for my $a (@t) {
# $a->[0]: method name, corresponds to $method
# $a->[1]: 'begin' data, == $iterCount
# $a->[2]: 'end' data, of the form
\n");
out("Events per iteration: $eventsPerIter
\n");
my @ms_str = @ms;
$ms_str[0] .= " (discarded)" if (@ms_str > 1);
out("Raw times (ms/pass): ", join(", ", @ms_str), "
\n");
($iterCount, $eventsPerIter, @ms);
}
#---------------------------------------------------------------------
# Invoke java to run $TESTCLASS, passing it the given parameters.
#
# @param the method to run
# @param the number of iterations, or if negative, the duration
# in seconds. If more than on pass is desired, pass in
# a string, e.g., "100 100 100".
# @param the pattern defining characters to test
#
# @return an array of results. Each result is an array REF
# describing one pass. The array REF contains:
# ->[0]: The method name as reported
# ->[1]: The params on the '=