1 /*
2 **********************************************************************
3 * Copyright (C) 2002-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * file name: utfperf.cpp
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * created on: 2005Nov17
12 * created by: Raymond Yang
13 *
14 * Ported from utfper.c created by Markus W. Scherer
15 * Performance test program for Unicode converters
16 */
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include "unicode/uperf.h"
21 #include "cmemory.h" // for UPRV_LENGTHOF
22 #include "uoptions.h"
23
24 /* definitions and text buffers */
25
26 #define INPUT_CAPACITY (1024*1024)
27 #define INTERMEDIATE_CAPACITY 4096
28 #define INTERMEDIATE_SMALL_CAPACITY 20
29 #define PIVOT_CAPACITY 1024
30 #define OUTPUT_CAPACITY INPUT_CAPACITY
31
32 static char utf8[INPUT_CAPACITY];
33 static UChar pivot[INTERMEDIATE_CAPACITY];
34
35 static UChar output[OUTPUT_CAPACITY];
36 static char intermediate[OUTPUT_CAPACITY];
37
38 static int32_t utf8Length, encodedLength, outputLength, countInputCodePoints;
39
40 static int32_t fromUCallbackCount;
41
42 // Command-line options specific to utfperf.
43 // Options do not have abbreviations: Force readable command lines.
44 // (Using U+0001 for abbreviation characters.)
45 enum {
46 CHARSET,
47 CHUNK_LENGTH,
48 PIVOT_LENGTH,
49 UTFPERF_OPTIONS_COUNT
50 };
51
52 static UOption options[UTFPERF_OPTIONS_COUNT]={
53 UOPTION_DEF("charset", '\x01', UOPT_REQUIRES_ARG),
54 UOPTION_DEF("chunk", '\x01', UOPT_REQUIRES_ARG),
55 UOPTION_DEF("pivot", '\x01', UOPT_REQUIRES_ARG)
56 };
57
58 static const char *const utfperf_usage =
59 "\t--charset Charset for which to test performance, e.g. windows-1251.\n"
60 "\t Default: UTF-8\n"
61 "\t--chunk Length (in bytes) of charset output chunks. [4096]\n"
62 "\t--pivot Length (in UChars) of the UTF-16 pivot buffer, if applicable.\n"
63 "\t [1024]\n";
64
65 // Test object.
66 class UtfPerformanceTest : public UPerfTest{
67 public:
UtfPerformanceTest(int32_t argc,const char * argv[],UErrorCode & status)68 UtfPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status)
69 : UPerfTest(argc, argv, options, UPRV_LENGTHOF(options), utfperf_usage, status) {
70 if (U_SUCCESS(status)) {
71 charset = options[CHARSET].value;
72
73 chunkLength = atoi(options[CHUNK_LENGTH].value);
74 if (chunkLength < 1 || OUTPUT_CAPACITY < chunkLength) {
75 fprintf(stderr, "error: chunk length must be 1..%ld\n", (long)OUTPUT_CAPACITY);
76 status = U_ILLEGAL_ARGUMENT_ERROR;
77 }
78
79 pivotLength = atoi(options[PIVOT_LENGTH].value);
80 if (pivotLength < 1 || PIVOT_CAPACITY < pivotLength) {
81 fprintf(stderr, "error: pivot length must be 1..%ld\n", (long)PIVOT_CAPACITY);
82 status = U_ILLEGAL_ARGUMENT_ERROR;
83 }
84
85 int32_t inputLength;
86 UPerfTest::getBuffer(inputLength, status);
87 countInputCodePoints = u_countChar32(buffer, bufferLen);
88 u_strToUTF8(utf8, (int32_t)sizeof(utf8), &utf8Length, buffer, bufferLen, &status);
89 }
90 }
91
92 virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL);
93
getBuffer() const94 const UChar *getBuffer() const { return buffer; }
getBufferLen() const95 int32_t getBufferLen() const { return bufferLen; }
96
97 const char *charset;
98 int32_t chunkLength, pivotLength;
99 };
100
101 U_CDECL_BEGIN
102 // Custom callback for counting callback calls.
103 static void U_CALLCONV
fromUCallback(const void * context,UConverterFromUnicodeArgs * fromUArgs,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * pErrorCode)104 fromUCallback(const void *context,
105 UConverterFromUnicodeArgs *fromUArgs,
106 const UChar *codeUnits,
107 int32_t length,
108 UChar32 codePoint,
109 UConverterCallbackReason reason,
110 UErrorCode *pErrorCode) {
111 if (reason <= UCNV_IRREGULAR) {
112 ++fromUCallbackCount;
113 }
114 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, pErrorCode);
115 }
116 U_CDECL_END
117
118 // Base class for Roundtrip, FromUnicode and FromUTF8 with common setup.
119 class Command : public UPerfFunction {
120 protected:
Command(const UtfPerformanceTest & testcase)121 Command(const UtfPerformanceTest &testcase)
122 : testcase(testcase),
123 input(testcase.getBuffer()), inputLength(testcase.getBufferLen()),
124 errorCode(U_ZERO_ERROR) {
125 cnv=ucnv_open(testcase.charset, &errorCode);
126 if (U_FAILURE(errorCode)) {
127 fprintf(stderr, "error opening converter for \"%s\" - %s\n", testcase.charset, u_errorName(errorCode));
128 }
129 ucnv_setFromUCallBack(cnv, fromUCallback, NULL, NULL, NULL, &errorCode);
130 }
131 public:
~Command()132 virtual ~Command(){
133 if(U_SUCCESS(errorCode)) {
134 ucnv_close(cnv);
135 }
136 }
137 // virtual void call(UErrorCode* pErrorCode) { ... }
getOperationsPerIteration()138 virtual long getOperationsPerIteration(){
139 return countInputCodePoints;
140 }
141
142 const UtfPerformanceTest &testcase;
143 const UChar *input;
144 int32_t inputLength;
145 UErrorCode errorCode;
146 UConverter *cnv;
147 };
148
149 // Test roundtrip UTF-16->encoding->UTF-16.
150 class Roundtrip : public Command {
151 protected:
Roundtrip(const UtfPerformanceTest & testcase)152 Roundtrip(const UtfPerformanceTest &testcase) : Command(testcase) {}
153 public:
get(const UtfPerformanceTest & testcase)154 static UPerfFunction* get(const UtfPerformanceTest &testcase) {
155 Roundtrip * t = new Roundtrip(testcase);
156 if (U_SUCCESS(t->errorCode)){
157 return t;
158 } else {
159 delete t;
160 return NULL;
161 }
162 }
call(UErrorCode * pErrorCode)163 virtual void call(UErrorCode* pErrorCode){
164 const UChar *pIn, *pInLimit;
165 UChar *pOut, *pOutLimit;
166 char *pInter, *pInterLimit;
167 const char *p;
168 UBool flush;
169
170 ucnv_reset(cnv);
171 fromUCallbackCount=0;
172
173 pIn=input;
174 pInLimit=input+inputLength;
175
176 pOut=output;
177 pOutLimit=output+OUTPUT_CAPACITY;
178
179 pInterLimit=intermediate+testcase.chunkLength;
180
181 encodedLength=outputLength=0;
182 flush=FALSE;
183
184 do {
185 /* convert a block of [pIn..pInLimit[ to the encoding in intermediate[] */
186 pInter=intermediate;
187 ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
188 encodedLength+=(int32_t)(pInter-intermediate);
189
190 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
191 /* make sure that we convert once more to really flush */
192 *pErrorCode=U_ZERO_ERROR;
193 } else if(U_FAILURE(*pErrorCode)) {
194 return;
195 } else if(pIn==pInLimit) {
196 flush=TRUE;
197 }
198
199 /* convert the block [intermediate..pInter[ back to UTF-16 */
200 p=intermediate;
201 ucnv_toUnicode(cnv, &pOut, pOutLimit,&p, pInter,NULL, flush,pErrorCode);
202 if(U_FAILURE(*pErrorCode)) {
203 return;
204 }
205 /* intermediate must have been consumed (p==pInter) because of the converter semantics */
206 } while(!flush);
207
208 outputLength=pOut-output;
209 if(inputLength!=outputLength) {
210 fprintf(stderr, "error: roundtrip failed, inputLength %d!=outputLength %d\n", inputLength, outputLength);
211 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
212 }
213 }
214 };
215
216 // Test one-way conversion UTF-16->encoding.
217 class FromUnicode : public Command {
218 protected:
FromUnicode(const UtfPerformanceTest & testcase)219 FromUnicode(const UtfPerformanceTest &testcase) : Command(testcase) {}
220 public:
get(const UtfPerformanceTest & testcase)221 static UPerfFunction* get(const UtfPerformanceTest &testcase) {
222 FromUnicode * t = new FromUnicode(testcase);
223 if (U_SUCCESS(t->errorCode)){
224 return t;
225 } else {
226 delete t;
227 return NULL;
228 }
229 }
call(UErrorCode * pErrorCode)230 virtual void call(UErrorCode* pErrorCode){
231 const UChar *pIn, *pInLimit;
232 char *pInter, *pInterLimit;
233
234 ucnv_resetFromUnicode(cnv);
235 fromUCallbackCount=0;
236
237 pIn=input;
238 pInLimit=input+inputLength;
239
240 pInterLimit=intermediate+testcase.chunkLength;
241
242 encodedLength=0;
243
244 for(;;) {
245 pInter=intermediate;
246 ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
247 encodedLength+=(int32_t)(pInter-intermediate);
248
249 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
250 /* make sure that we convert once more to really flush */
251 *pErrorCode=U_ZERO_ERROR;
252 } else if(U_FAILURE(*pErrorCode)) {
253 return;
254 } else {
255 break; // all done
256 }
257 }
258 }
259 };
260
261 // Test one-way conversion UTF-8->encoding.
262 class FromUTF8 : public Command {
263 protected:
FromUTF8(const UtfPerformanceTest & testcase)264 FromUTF8(const UtfPerformanceTest &testcase)
265 : Command(testcase),
266 utf8Cnv(NULL),
267 input8(utf8), input8Length(utf8Length) {
268 utf8Cnv=ucnv_open("UTF-8", &errorCode);
269 }
270 public:
get(const UtfPerformanceTest & testcase)271 static UPerfFunction* get(const UtfPerformanceTest &testcase) {
272 FromUTF8 * t = new FromUTF8(testcase);
273 if (U_SUCCESS(t->errorCode)){
274 return t;
275 } else {
276 delete t;
277 return NULL;
278 }
279 }
~FromUTF8()280 ~FromUTF8() {
281 ucnv_close(utf8Cnv);
282 }
call(UErrorCode * pErrorCode)283 virtual void call(UErrorCode* pErrorCode){
284 const char *pIn, *pInLimit;
285 char *pInter, *pInterLimit;
286 UChar *pivotSource, *pivotTarget, *pivotLimit;
287
288 ucnv_resetToUnicode(utf8Cnv);
289 ucnv_resetFromUnicode(cnv);
290 fromUCallbackCount=0;
291
292 pIn=input8;
293 pInLimit=input8+input8Length;
294
295 pInterLimit=intermediate+testcase.chunkLength;
296
297 pivotSource=pivotTarget=pivot;
298 pivotLimit=pivot+testcase.pivotLength;
299
300 encodedLength=0;
301
302 for(;;) {
303 pInter=intermediate;
304 ucnv_convertEx(cnv, utf8Cnv,
305 &pInter, pInterLimit,
306 &pIn, pInLimit,
307 pivot, &pivotSource, &pivotTarget, pivotLimit,
308 FALSE, TRUE, pErrorCode);
309 encodedLength+=(int32_t)(pInter-intermediate);
310
311 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
312 /* make sure that we convert once more to really flush */
313 *pErrorCode=U_ZERO_ERROR;
314 } else if(U_FAILURE(*pErrorCode)) {
315 return;
316 } else {
317 break; // all done
318 }
319 }
320 }
321 protected:
322 UConverter *utf8Cnv;
323 const char *input8;
324 int32_t input8Length;
325 };
326
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)327 UPerfFunction* UtfPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) {
328 switch (index) {
329 case 0: name = "Roundtrip"; if (exec) return Roundtrip::get(*this); break;
330 case 1: name = "FromUnicode"; if (exec) return FromUnicode::get(*this); break;
331 case 2: name = "FromUTF8"; if (exec) return FromUTF8::get(*this); break;
332 default: name = ""; break;
333 }
334 return NULL;
335 }
336
main(int argc,const char * argv[])337 int main(int argc, const char *argv[])
338 {
339 // Default values for command-line options.
340 options[CHARSET].value = "UTF-8";
341 options[CHUNK_LENGTH].value = "4096";
342 options[PIVOT_LENGTH].value = "1024";
343
344 UErrorCode status = U_ZERO_ERROR;
345 UtfPerformanceTest test(argc, argv, status);
346
347 if (U_FAILURE(status)){
348 printf("The error is %s\n", u_errorName(status));
349 test.usage();
350 return status;
351 }
352
353 if (test.run() == FALSE){
354 fprintf(stderr, "FAILED: Tests could not be run please check the "
355 "arguments.\n");
356 return -1;
357 }
358
359 if (fromUCallbackCount > 0) {
360 printf("Number of fromUnicode callback calls in the last iteration: %ld\n", (long)fromUCallbackCount);
361 }
362
363 return 0;
364 }
365