• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  **********************************************************************
3  *   Copyright (C) 2002-2007, International Business Machines
4  *   Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  *   file name:  utfperf.cpp
7  *   encoding:   US-ASCII
8  *   tab size:   8 (not used)
9  *   indentation:4
10  *
11  *   created on: 2005Nov17
12  *   created by: Raymond Yang
13  *
14  *   Ported from utfper.c created by Markus W. Scherer
15  *   Performance test program for Unicode converters
16  */
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include "unicode/uperf.h"
21 #include "uoptions.h"
22 
23 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
24 
25 /* definitions and text buffers */
26 
27 #define INPUT_CAPACITY (1024*1024)
28 #define INTERMEDIATE_CAPACITY 4096
29 #define INTERMEDIATE_SMALL_CAPACITY 20
30 #define PIVOT_CAPACITY 1024
31 #define OUTPUT_CAPACITY INPUT_CAPACITY
32 
33 static char utf8[INPUT_CAPACITY];
34 static UChar pivot[INTERMEDIATE_CAPACITY];
35 
36 static UChar output[OUTPUT_CAPACITY];
37 static char intermediate[OUTPUT_CAPACITY];
38 
39 static int32_t utf8Length, encodedLength, outputLength, countInputCodePoints;
40 
41 static int32_t fromUCallbackCount;
42 
43 // Command-line options specific to utfperf.
44 // Options do not have abbreviations: Force readable command lines.
45 // (Using U+0001 for abbreviation characters.)
46 enum {
47     CHARSET,
48     CHUNK_LENGTH,
49     PIVOT_LENGTH,
50     UTFPERF_OPTIONS_COUNT
51 };
52 
53 static UOption options[UTFPERF_OPTIONS_COUNT]={
54     UOPTION_DEF("charset",  '\x01', UOPT_REQUIRES_ARG),
55     UOPTION_DEF("chunk",    '\x01', UOPT_REQUIRES_ARG),
56     UOPTION_DEF("pivot",    '\x01', UOPT_REQUIRES_ARG)
57 };
58 
59 static const char *const utfperf_usage =
60     "\t--charset   Charset for which to test performance, e.g. windows-1251.\n"
61     "\t            Default: UTF-8\n"
62     "\t--chunk     Length (in bytes) of charset output chunks. [4096]\n"
63     "\t--pivot     Length (in UChars) of the UTF-16 pivot buffer, if applicable.\n"
64     "\t            [1024]\n";
65 
66 // Test object.
67 class  UtfPerformanceTest : public UPerfTest{
68 public:
UtfPerformanceTest(int32_t argc,const char * argv[],UErrorCode & status)69     UtfPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status)
70             : UPerfTest(argc, argv, options, LENGTHOF(options), utfperf_usage, status) {
71         if (U_SUCCESS(status)) {
72             charset = options[CHARSET].value;
73 
74             chunkLength = atoi(options[CHUNK_LENGTH].value);
75             if (chunkLength < 1 || OUTPUT_CAPACITY < chunkLength) {
76                 fprintf(stderr, "error: chunk length must be 1..%ld\n", (long)OUTPUT_CAPACITY);
77                 status = U_ILLEGAL_ARGUMENT_ERROR;
78             }
79 
80             pivotLength = atoi(options[PIVOT_LENGTH].value);
81             if (pivotLength < 1 || PIVOT_CAPACITY < pivotLength) {
82                 fprintf(stderr, "error: pivot length must be 1..%ld\n", (long)PIVOT_CAPACITY);
83                 status = U_ILLEGAL_ARGUMENT_ERROR;
84             }
85 
86             int32_t inputLength;
87             UPerfTest::getBuffer(inputLength, status);
88             countInputCodePoints = u_countChar32(buffer, bufferLen);
89             u_strToUTF8(utf8, (int32_t)sizeof(utf8), &utf8Length, buffer, bufferLen, &status);
90         }
91     }
92 
93     virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL);
94 
getBuffer() const95     const UChar *getBuffer() const { return buffer; }
getBufferLen() const96     int32_t getBufferLen() const { return bufferLen; }
97 
98     const char *charset;
99     int32_t chunkLength, pivotLength;
100 };
101 
102 U_CDECL_BEGIN
103 // Custom callback for counting callback calls.
104 static void U_CALLCONV
fromUCallback(const void * context,UConverterFromUnicodeArgs * fromUArgs,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * pErrorCode)105 fromUCallback(const void *context,
106               UConverterFromUnicodeArgs *fromUArgs,
107               const UChar *codeUnits,
108               int32_t length,
109               UChar32 codePoint,
110               UConverterCallbackReason reason,
111               UErrorCode *pErrorCode) {
112     if (reason <= UCNV_IRREGULAR) {
113         ++fromUCallbackCount;
114     }
115     UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, pErrorCode);
116 }
117 U_CDECL_END
118 
119 // Base class for Roundtrip, FromUnicode and FromUTF8 with common setup.
120 class Command : public UPerfFunction {
121 protected:
Command(const UtfPerformanceTest & testcase)122     Command(const UtfPerformanceTest &testcase)
123             : testcase(testcase),
124               input(testcase.getBuffer()), inputLength(testcase.getBufferLen()),
125               errorCode(U_ZERO_ERROR) {
126         cnv=ucnv_open(testcase.charset, &errorCode);
127         if (U_FAILURE(errorCode)) {
128             fprintf(stderr, "error opening converter for \"%s\" - %s\n", testcase.charset, u_errorName(errorCode));
129         }
130         ucnv_setFromUCallBack(cnv, fromUCallback, NULL, NULL, NULL, &errorCode);
131     }
132 public:
~Command()133     virtual ~Command(){
134         if(U_SUCCESS(errorCode)) {
135             ucnv_close(cnv);
136         }
137     }
138     // virtual void call(UErrorCode* pErrorCode) { ... }
getOperationsPerIteration()139     virtual long getOperationsPerIteration(){
140         return countInputCodePoints;
141     }
142 
143     const UtfPerformanceTest &testcase;
144     const UChar *input;
145     int32_t inputLength;
146     UErrorCode errorCode;
147     UConverter *cnv;
148 };
149 
150 // Test roundtrip UTF-16->encoding->UTF-16.
151 class Roundtrip : public Command {
152 protected:
Roundtrip(const UtfPerformanceTest & testcase)153     Roundtrip(const UtfPerformanceTest &testcase) : Command(testcase) {}
154 public:
get(const UtfPerformanceTest & testcase)155     static UPerfFunction* get(const UtfPerformanceTest &testcase) {
156         Roundtrip * t = new Roundtrip(testcase);
157         if (U_SUCCESS(t->errorCode)){
158             return t;
159         } else {
160             delete t;
161             return NULL;
162         }
163     }
call(UErrorCode * pErrorCode)164     virtual void call(UErrorCode* pErrorCode){
165         const UChar *pIn, *pInLimit;
166         UChar *pOut, *pOutLimit;
167         char *pInter, *pInterLimit;
168         const char *p;
169         UBool flush;
170 
171         ucnv_reset(cnv);
172         fromUCallbackCount=0;
173 
174         pIn=input;
175         pInLimit=input+inputLength;
176 
177         pOut=output;
178         pOutLimit=output+OUTPUT_CAPACITY;
179 
180         pInterLimit=intermediate+testcase.chunkLength;
181 
182         encodedLength=outputLength=0;
183         flush=FALSE;
184 
185         do {
186             /* convert a block of [pIn..pInLimit[ to the encoding in intermediate[] */
187             pInter=intermediate;
188             ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
189             encodedLength+=(int32_t)(pInter-intermediate);
190 
191             if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
192                 /* make sure that we convert once more to really flush */
193                 *pErrorCode=U_ZERO_ERROR;
194             } else if(U_FAILURE(*pErrorCode)) {
195                 return;
196             } else if(pIn==pInLimit) {
197                 flush=TRUE;
198             }
199 
200             /* convert the block [intermediate..pInter[ back to UTF-16 */
201             p=intermediate;
202             ucnv_toUnicode(cnv, &pOut, pOutLimit,&p, pInter,NULL, flush,pErrorCode);
203             if(U_FAILURE(*pErrorCode)) {
204                 return;
205             }
206             /* intermediate must have been consumed (p==pInter) because of the converter semantics */
207         } while(!flush);
208 
209         outputLength=pOut-output;
210         if(inputLength!=outputLength) {
211             fprintf(stderr, "error: roundtrip failed, inputLength %d!=outputLength %d\n", inputLength, outputLength);
212             *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
213         }
214     }
215 };
216 
217 // Test one-way conversion UTF-16->encoding.
218 class FromUnicode : public Command {
219 protected:
FromUnicode(const UtfPerformanceTest & testcase)220     FromUnicode(const UtfPerformanceTest &testcase) : Command(testcase) {}
221 public:
get(const UtfPerformanceTest & testcase)222     static UPerfFunction* get(const UtfPerformanceTest &testcase) {
223         FromUnicode * t = new FromUnicode(testcase);
224         if (U_SUCCESS(t->errorCode)){
225             return t;
226         } else {
227             delete t;
228             return NULL;
229         }
230     }
call(UErrorCode * pErrorCode)231     virtual void call(UErrorCode* pErrorCode){
232         const UChar *pIn, *pInLimit;
233         char *pInter, *pInterLimit;
234 
235         ucnv_resetFromUnicode(cnv);
236         fromUCallbackCount=0;
237 
238         pIn=input;
239         pInLimit=input+inputLength;
240 
241         pInterLimit=intermediate+testcase.chunkLength;
242 
243         encodedLength=0;
244 
245         for(;;) {
246             pInter=intermediate;
247             ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
248             encodedLength+=(int32_t)(pInter-intermediate);
249 
250             if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
251                 /* make sure that we convert once more to really flush */
252                 *pErrorCode=U_ZERO_ERROR;
253             } else if(U_FAILURE(*pErrorCode)) {
254                 return;
255             } else {
256                 break;  // all done
257             }
258         }
259     }
260 };
261 
262 // Test one-way conversion UTF-8->encoding.
263 class FromUTF8 : public Command {
264 protected:
FromUTF8(const UtfPerformanceTest & testcase)265     FromUTF8(const UtfPerformanceTest &testcase)
266             : Command(testcase),
267               utf8Cnv(NULL),
268               input8(utf8), input8Length(utf8Length) {
269         utf8Cnv=ucnv_open("UTF-8", &errorCode);
270     }
271 public:
get(const UtfPerformanceTest & testcase)272     static UPerfFunction* get(const UtfPerformanceTest &testcase) {
273         FromUTF8 * t = new FromUTF8(testcase);
274         if (U_SUCCESS(t->errorCode)){
275             return t;
276         } else {
277             delete t;
278             return NULL;
279         }
280     }
~FromUTF8()281     ~FromUTF8() {
282         ucnv_close(utf8Cnv);
283     }
call(UErrorCode * pErrorCode)284     virtual void call(UErrorCode* pErrorCode){
285         const char *pIn, *pInLimit;
286         char *pInter, *pInterLimit;
287         UChar *pivotSource, *pivotTarget, *pivotLimit;
288 
289         ucnv_resetToUnicode(utf8Cnv);
290         ucnv_resetFromUnicode(cnv);
291         fromUCallbackCount=0;
292 
293         pIn=input8;
294         pInLimit=input8+input8Length;
295 
296         pInterLimit=intermediate+testcase.chunkLength;
297 
298         pivotSource=pivotTarget=pivot;
299         pivotLimit=pivot+testcase.pivotLength;
300 
301         encodedLength=0;
302 
303         for(;;) {
304             pInter=intermediate;
305             ucnv_convertEx(cnv, utf8Cnv,
306                            &pInter, pInterLimit,
307                            &pIn, pInLimit,
308                            pivot, &pivotSource, &pivotTarget, pivotLimit,
309                            FALSE, TRUE, pErrorCode);
310             encodedLength+=(int32_t)(pInter-intermediate);
311 
312             if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
313                 /* make sure that we convert once more to really flush */
314                 *pErrorCode=U_ZERO_ERROR;
315             } else if(U_FAILURE(*pErrorCode)) {
316                 return;
317             } else {
318                 break;  // all done
319             }
320         }
321     }
322 protected:
323     UConverter *utf8Cnv;
324     const char *input8;
325     int32_t input8Length;
326 };
327 
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)328 UPerfFunction* UtfPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) {
329     switch (index) {
330         case 0: name = "Roundtrip";     if (exec) return Roundtrip::get(*this); break;
331         case 1: name = "FromUnicode";   if (exec) return FromUnicode::get(*this); break;
332         case 2: name = "FromUTF8";      if (exec) return FromUTF8::get(*this); break;
333         default: name = ""; break;
334     }
335     return NULL;
336 }
337 
main(int argc,const char * argv[])338 int main(int argc, const char *argv[])
339 {
340     // Default values for command-line options.
341     options[CHARSET].value = "UTF-8";
342     options[CHUNK_LENGTH].value = "4096";
343     options[PIVOT_LENGTH].value = "1024";
344 
345     UErrorCode status = U_ZERO_ERROR;
346     UtfPerformanceTest test(argc, argv, status);
347 
348 	if (U_FAILURE(status)){
349         printf("The error is %s\n", u_errorName(status));
350         test.usage();
351         return status;
352     }
353 
354     if (test.run() == FALSE){
355         fprintf(stderr, "FAILED: Tests could not be run please check the "
356 			            "arguments.\n");
357         return -1;
358     }
359 
360     if (fromUCallbackCount > 0) {
361         printf("Number of fromUnicode callback calls in the last iteration: %ld\n", (long)fromUCallbackCount);
362     }
363 
364     return 0;
365 }
366