1#!/usr/bin/perl 2# ******************************************************************** 3# * Copyright (C) 2016 and later: Unicode, Inc. and others. 4# * License & terms of use: http://www.unicode.org/copyright.html 5# ******************************************************************** 6# * COPYRIGHT: 7# * Copyright (c) 2002-2013, International Business Machines Corporation and 8# * others. All Rights Reserved. 9# ******************************************************************** 10 11require "../perldriver/Common.pl"; 12 13use lib '../perldriver'; 14 15my $p; 16if ($OnWindows) { 17 $p = "cd ".$ICULatest."/bin && ".$ICUPathLatest . "/collationperf/$WindowsPlatform/Release/collationperf.exe"; 18} 19else { 20 $p = "LD_LIBRARY_PATH=".$ICULatest."/source/lib:".$ICULatest."/source/tools/ctestfw ".$ICUPathLatest . "/collationperf/collperf"; 21} 22 23my @locale = ( 24 "en_US", 25 "da_DK", 26 "de_DE", 27 "fr_FR", 28 "ja_JP", 29 "ja_JP", 30 "ja_JP", 31 "ja_JP", 32 "zh_CN", 33 "zh_CN", 34 "zh_CN", 35 "zh_TW", 36 "zh_TW", 37 "ko_KR", 38 "ko_KR", 39 "ru_RU", 40 "ru_RU", 41 "th_TH", 42 "th_TH" 43); 44 45my $filePath = $CollationDataPath . "/"; 46my $filePrefix = "TestNames_"; 47my @data = ( 48 $filePrefix."Latin.txt", 49 $filePrefix."Latin.txt", 50 $filePrefix."Latin.txt", 51 $filePrefix."Latin.txt", 52 $filePrefix."Latin.txt", 53 $filePrefix."Japanese_h.txt", 54 $filePrefix."Japanese_k.txt", 55 $filePrefix."Asian.txt", 56 $filePrefix."Latin.txt", 57 $filePrefix."Chinese.txt", 58 $filePrefix."Simplified_Chinese.txt", 59 $filePrefix."Latin.txt", 60 $filePrefix."Chinese.txt", 61 $filePrefix."Latin.txt", 62 $filePrefix."Korean.txt", 63 $filePrefix."Latin.txt", 64 $filePrefix."Russian.txt", 65 $filePrefix."Latin.txt", 66 $filePrefix."Thai.txt" 67); 68 69my @resultPER; 70my @resultFIN; 71 72for ( $n = 0 ; $n < @data ; $n++ ) { 73 my $resultICU; 74 my $resultNIX; 75 $resultICU = @locale[$n].",".@data[$n].","; 76 $resultNIX = @locale[$n].",".@data[$n].","; 77 @resultFIN[$n] = @locale[$n].",".@data[$n].","; 78 79 #quicksort 80 my @icu = `$p -locale @locale[$n] -loop 1000 -file $filePath@data[$n] -qsort`; 81 my @nix = `$p -locale @locale[$n] -unix -loop 1000 -file $filePath@data[$n] -qsort`; 82 83 my @icua = split( ' = ', $icu[2] ); 84 my @icub = split( ' ', $icua[1] ); 85 my @nixa = split( ' = ', $nix[2] ); 86 my @nixb = split( ' ', $nixa[1] ); 87 88 $resultICU = $resultICU.$icub[0].","; 89 $resultNIX = $resultNIX.$nixb[0].","; 90 91 #keygen time 92 @icu = `$p -locale @locale[$n] -loop 1000 -file $filePath@data[$n] -keygen`; 93 @nix = `$p -locale @locale[$n] -unix -loop 1000 -file $filePath@data[$n] -keygen`; 94 95 @icua = split( ' = ', $icu[2] ); 96 @icub = split( ' ', $icua[1] ); 97 @nixa = split( ' = ', $nix[2] ); 98 @nixb = split( ' ', $nixa[1] ); 99 100 $resultICU = $resultICU.$icub[0].","; 101 $resultNIX = $resultNIX.$nixb[0].","; 102 103 #keygen len 104 @icua = split( ' = ', $icu[3] ); 105 @nixa = split( ' = ', $nix[3] ); 106 107 chomp( @icua[1] ); 108 chomp( @nixa[1] ); 109 110 $resultICU = $resultICU.$icua[1].","; 111 $resultNIX = $resultNIX.$nixa[1].","; 112 113 my @resultSplitICU; 114 my @resultSplitNIX; 115 116 #percent 117 for ( $i = 0 ; $i < 3 ; $i++ ) { 118 my $percent = 0; 119 @resultSplitICU = split( ',', $resultICU ); 120 @resultSplitNIX = split( ',', $resultNIX ); 121 if ( @resultSplitICU[ 2 + $i ] > 0 ) { 122 $percent = substr(((( 123 @resultSplitNIX[ 2 + $i ] - @resultSplitICU[ 2 + $i ]) / @resultSplitICU[ 2 + $i ]) * 100), 124 0, 7); 125 } 126 @resultPER[$n] = @resultPER[$n].$percent."%,"; 127 } 128 129 #store ICU result 130 for ( $j = 0 ; $j < 3 ; $j++ ) { 131 @resultFIN[$n] = @resultFIN[$n].@resultSplitICU[ 2 + $j ].","; 132 } 133 134 #store Unix result 135 for ( $j = 0 ; $j < 3 ; $j++ ) { 136 @resultFIN[$n] = @resultFIN[$n].@resultSplitNIX[ 2 + $j ].","; 137 } 138 139 #store Percent result 140 @resultFIN[$n] = @resultFIN[$n].@resultPER[$n]; 141} 142 143# Print the results in a HTML page 144printOutput(); 145 146exit(0); 147 148# This subroutine creates the web page and prints out the results in a table 149sub printOutput { 150 my $title = "Collation: ICU " . $ICULatestVersion . " vs GLIBC"; 151 my $html = localtime; 152 $html =~ s/://g; # ':' illegal 153 $html =~ s/\s*\d+$//; # delete year 154 $html =~ s/^\w+\s*//; # delete dow 155 $html = "CollationPerformance $html.html"; 156 $html = "../results/" . $html; 157 $html =~ s/ /_/g; 158 open( HTML, ">$html" ) or die "Can't write to $html: $!"; 159 print HTML <<EOF; 160<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 161"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 162<html xmlns="http://www.w3.org/1999/xhtml"> 163<head> 164<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 165<title>Collation: ICU4C vs. glibc</title> 166<link rel="stylesheet" href="../icu.css" type="text/css" /> 167</head> 168<body> 169<!--#include virtual="../ssi/header.html" --> 170EOF 171 172 print HTML "<h2>Collation: ICU4C ".$ICULatestVersion." vs. GLIBC</h2>\n"; 173 174 print HTML <<EOF; 175<p>The performance test takes a locale and creates a RuleBasedCollator with 176default options. A large list of names is used as data in each test, where the 177names vary according to language. Each Collation operation over the whole list 178is repeated 1000 times. The percentage values in the final column are the most 179useful. They measure differences, where positive is better for ICU4C, and 180negative is better for the compared implementation.</p> 181<h3>Key</h3> 182<table border="1" cellspacing="0" cellpadding="4"> 183<tr> 184<th align="left">Operation</th> 185<th align="left">Units</th> 186<th align="left">Description</th> 187</tr> 188<tr> 189<td>strcoll</td> 190<td>nanosecs</td> 191<td>Timing for string collation, an incremental compare of strings.</td> 192</tr> 193<tr> 194<td>keygen</td> 195<td>nanosecs</td> 196<td>Timing for generation of sort keys, used to 'precompile' information so 197that subsequent operations can use binary comparison.</td> 198</tr> 199<tr> 200<td>keylen</td> 201<td>bytes/char</td> 202<td>The average length of the generated sort keys, in bytes per character 203(Unicode/ISO 10646 code point). Generally this is the important field for sort 204key performance, since it directly impacts the time necessary for binary 205comparison, and the overhead of memory usage and retrieval time for sort 206keys.</td> 207</tr> 208</table> 209EOF 210 printData(); 211 212 print HTML <<EOF; 213<h3><i>Notes</i></h3> 214<ol> 215<li>As with all performance measurements, the results will vary according to 216the hardware and compiler. The strcoll operation is particularly sensitive; we 217have found that even slight changes in code alignment can produce 10% 218differences.</li> 219<li>For more information on incremental vs. sort key comparison, the importance 220of multi-level sorting, and other features of collation, see <a href= 221"http://www.unicode.org/reports/tr10/">Unicode Collation (UCA)</a>.</li> 222<li>For general information on ICU collation see <a href= 223"/userguide/Collate_Intro.html">User Guide</a>.</li> 224<li>For information on APIs, see <a href="/apiref/icu4c/ucol_8h.html">C</a>, 225<a href="/apiref/icu4c/classCollator.html">C++</a>, or <a href= 226"/apiref/icu4j/com/ibm/icu/text/Collator.html">Java</a>.</li> 227</ol> 228<!--#include virtual="../ssi/footer.html" --> 229</body> 230</html> 231 232EOF 233 234 close(HTML) or die "Can't close $html: $!"; 235} 236 237# This subroutine formats and prints the table. 238sub printData() { 239 print HTML <<EOF; 240<h3>Data</h3> 241<table border="1" cellspacing="0" cellpadding="4"> 242<tr> 243<td align="left"><b>Locale</b></td> 244<td align="left"><b>Data file</b></td> 245<td align="left"><b>strcoll</b> <i>(ICU)</i></td> 246<td align="left"><b>keygen</b> <i>(ICU)</i></td> 247<td align="left"><b>keylen</b> <i>(ICU)</i></td> 248<td align="left"><b>strcoll</b> <i>(GLIBC)</i></td> 249<td align="left"><b>keygen</b> <i>(GLIBC)</i></td> 250<td align="left"><b>keylen</b> <i>(GLIBC)</i></td> 251<td align="left"><b>strcoll</b> <i>(GLIBC-ICU)/ICU)</i></td> 252<td align="left"><b>keygen</b> <i>(GLIBC-ICU)/ICU)</i></td> 253<td align="left"><b>keylen</b> <i>(GLIBC-ICU)/ICU)</i></td> 254</tr> 255EOF 256 257 for ( $n = 0 ; $n < @resultFIN ; $n++ ) { 258 print HTML "<tr>"; 259 my @parsed = split( ',', @resultFIN[$n] ); 260 for ( $i = 0 ; $i < @parsed ; $i++ ) { 261 my $value = @parsed[$i]; 262 print HTML "<td align=\"center\">"; 263 264 if ( $value =~ m/^[-]/ ) { 265 print HTML "<font color=\"red\">$value</font>"; 266 } 267 else { 268 print HTML "$value"; 269 } 270 271 print HTML "</td>"; 272 273 } 274 print HTML "</tr>\n"; 275 } 276 277 print HTML<<EOF; 278</table> 279EOF 280} 281