• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  **********************************************************************
3  *   Copyright (C) 2002-2014, International Business Machines
4  *   Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  *   file name:  utfperf.cpp
7  *   encoding:   US-ASCII
8  *   tab size:   8 (not used)
9  *   indentation:4
10  *
11  *   created on: 2005Nov17
12  *   created by: Raymond Yang
13  *
14  *   Ported from utfper.c created by Markus W. Scherer
15  *   Performance test program for Unicode converters
16  */
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include "unicode/uperf.h"
21 #include "cmemory.h" // for UPRV_LENGTHOF
22 #include "uoptions.h"
23 
24 /* definitions and text buffers */
25 
26 #define INPUT_CAPACITY (1024*1024)
27 #define INTERMEDIATE_CAPACITY 4096
28 #define INTERMEDIATE_SMALL_CAPACITY 20
29 #define PIVOT_CAPACITY 1024
30 #define OUTPUT_CAPACITY INPUT_CAPACITY
31 
32 static char utf8[INPUT_CAPACITY];
33 static UChar pivot[INTERMEDIATE_CAPACITY];
34 
35 static UChar output[OUTPUT_CAPACITY];
36 static char intermediate[OUTPUT_CAPACITY];
37 
38 static int32_t utf8Length, encodedLength, outputLength, countInputCodePoints;
39 
40 static int32_t fromUCallbackCount;
41 
42 // Command-line options specific to utfperf.
43 // Options do not have abbreviations: Force readable command lines.
44 // (Using U+0001 for abbreviation characters.)
45 enum {
46     CHARSET,
47     CHUNK_LENGTH,
48     PIVOT_LENGTH,
49     UTFPERF_OPTIONS_COUNT
50 };
51 
52 static UOption options[UTFPERF_OPTIONS_COUNT]={
53     UOPTION_DEF("charset",  '\x01', UOPT_REQUIRES_ARG),
54     UOPTION_DEF("chunk",    '\x01', UOPT_REQUIRES_ARG),
55     UOPTION_DEF("pivot",    '\x01', UOPT_REQUIRES_ARG)
56 };
57 
58 static const char *const utfperf_usage =
59     "\t--charset   Charset for which to test performance, e.g. windows-1251.\n"
60     "\t            Default: UTF-8\n"
61     "\t--chunk     Length (in bytes) of charset output chunks. [4096]\n"
62     "\t--pivot     Length (in UChars) of the UTF-16 pivot buffer, if applicable.\n"
63     "\t            [1024]\n";
64 
65 // Test object.
66 class  UtfPerformanceTest : public UPerfTest{
67 public:
UtfPerformanceTest(int32_t argc,const char * argv[],UErrorCode & status)68     UtfPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status)
69             : UPerfTest(argc, argv, options, UPRV_LENGTHOF(options), utfperf_usage, status) {
70         if (U_SUCCESS(status)) {
71             charset = options[CHARSET].value;
72 
73             chunkLength = atoi(options[CHUNK_LENGTH].value);
74             if (chunkLength < 1 || OUTPUT_CAPACITY < chunkLength) {
75                 fprintf(stderr, "error: chunk length must be 1..%ld\n", (long)OUTPUT_CAPACITY);
76                 status = U_ILLEGAL_ARGUMENT_ERROR;
77             }
78 
79             pivotLength = atoi(options[PIVOT_LENGTH].value);
80             if (pivotLength < 1 || PIVOT_CAPACITY < pivotLength) {
81                 fprintf(stderr, "error: pivot length must be 1..%ld\n", (long)PIVOT_CAPACITY);
82                 status = U_ILLEGAL_ARGUMENT_ERROR;
83             }
84 
85             int32_t inputLength;
86             UPerfTest::getBuffer(inputLength, status);
87             countInputCodePoints = u_countChar32(buffer, bufferLen);
88             u_strToUTF8(utf8, (int32_t)sizeof(utf8), &utf8Length, buffer, bufferLen, &status);
89         }
90     }
91 
92     virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL);
93 
getBuffer() const94     const UChar *getBuffer() const { return buffer; }
getBufferLen() const95     int32_t getBufferLen() const { return bufferLen; }
96 
97     const char *charset;
98     int32_t chunkLength, pivotLength;
99 };
100 
101 U_CDECL_BEGIN
102 // Custom callback for counting callback calls.
103 static void U_CALLCONV
fromUCallback(const void * context,UConverterFromUnicodeArgs * fromUArgs,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * pErrorCode)104 fromUCallback(const void *context,
105               UConverterFromUnicodeArgs *fromUArgs,
106               const UChar *codeUnits,
107               int32_t length,
108               UChar32 codePoint,
109               UConverterCallbackReason reason,
110               UErrorCode *pErrorCode) {
111     if (reason <= UCNV_IRREGULAR) {
112         ++fromUCallbackCount;
113     }
114     UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, pErrorCode);
115 }
116 U_CDECL_END
117 
118 // Base class for Roundtrip, FromUnicode and FromUTF8 with common setup.
119 class Command : public UPerfFunction {
120 protected:
Command(const UtfPerformanceTest & testcase)121     Command(const UtfPerformanceTest &testcase)
122             : testcase(testcase),
123               input(testcase.getBuffer()), inputLength(testcase.getBufferLen()),
124               errorCode(U_ZERO_ERROR) {
125         cnv=ucnv_open(testcase.charset, &errorCode);
126         if (U_FAILURE(errorCode)) {
127             fprintf(stderr, "error opening converter for \"%s\" - %s\n", testcase.charset, u_errorName(errorCode));
128         }
129         ucnv_setFromUCallBack(cnv, fromUCallback, NULL, NULL, NULL, &errorCode);
130     }
131 public:
~Command()132     virtual ~Command(){
133         if(U_SUCCESS(errorCode)) {
134             ucnv_close(cnv);
135         }
136     }
137     // virtual void call(UErrorCode* pErrorCode) { ... }
getOperationsPerIteration()138     virtual long getOperationsPerIteration(){
139         return countInputCodePoints;
140     }
141 
142     const UtfPerformanceTest &testcase;
143     const UChar *input;
144     int32_t inputLength;
145     UErrorCode errorCode;
146     UConverter *cnv;
147 };
148 
149 // Test roundtrip UTF-16->encoding->UTF-16.
150 class Roundtrip : public Command {
151 protected:
Roundtrip(const UtfPerformanceTest & testcase)152     Roundtrip(const UtfPerformanceTest &testcase) : Command(testcase) {}
153 public:
get(const UtfPerformanceTest & testcase)154     static UPerfFunction* get(const UtfPerformanceTest &testcase) {
155         Roundtrip * t = new Roundtrip(testcase);
156         if (U_SUCCESS(t->errorCode)){
157             return t;
158         } else {
159             delete t;
160             return NULL;
161         }
162     }
call(UErrorCode * pErrorCode)163     virtual void call(UErrorCode* pErrorCode){
164         const UChar *pIn, *pInLimit;
165         UChar *pOut, *pOutLimit;
166         char *pInter, *pInterLimit;
167         const char *p;
168         UBool flush;
169 
170         ucnv_reset(cnv);
171         fromUCallbackCount=0;
172 
173         pIn=input;
174         pInLimit=input+inputLength;
175 
176         pOut=output;
177         pOutLimit=output+OUTPUT_CAPACITY;
178 
179         pInterLimit=intermediate+testcase.chunkLength;
180 
181         encodedLength=outputLength=0;
182         flush=FALSE;
183 
184         do {
185             /* convert a block of [pIn..pInLimit[ to the encoding in intermediate[] */
186             pInter=intermediate;
187             ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
188             encodedLength+=(int32_t)(pInter-intermediate);
189 
190             if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
191                 /* make sure that we convert once more to really flush */
192                 *pErrorCode=U_ZERO_ERROR;
193             } else if(U_FAILURE(*pErrorCode)) {
194                 return;
195             } else if(pIn==pInLimit) {
196                 flush=TRUE;
197             }
198 
199             /* convert the block [intermediate..pInter[ back to UTF-16 */
200             p=intermediate;
201             ucnv_toUnicode(cnv, &pOut, pOutLimit,&p, pInter,NULL, flush,pErrorCode);
202             if(U_FAILURE(*pErrorCode)) {
203                 return;
204             }
205             /* intermediate must have been consumed (p==pInter) because of the converter semantics */
206         } while(!flush);
207 
208         outputLength=pOut-output;
209         if(inputLength!=outputLength) {
210             fprintf(stderr, "error: roundtrip failed, inputLength %d!=outputLength %d\n", inputLength, outputLength);
211             *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
212         }
213     }
214 };
215 
216 // Test one-way conversion UTF-16->encoding.
217 class FromUnicode : public Command {
218 protected:
FromUnicode(const UtfPerformanceTest & testcase)219     FromUnicode(const UtfPerformanceTest &testcase) : Command(testcase) {}
220 public:
get(const UtfPerformanceTest & testcase)221     static UPerfFunction* get(const UtfPerformanceTest &testcase) {
222         FromUnicode * t = new FromUnicode(testcase);
223         if (U_SUCCESS(t->errorCode)){
224             return t;
225         } else {
226             delete t;
227             return NULL;
228         }
229     }
call(UErrorCode * pErrorCode)230     virtual void call(UErrorCode* pErrorCode){
231         const UChar *pIn, *pInLimit;
232         char *pInter, *pInterLimit;
233 
234         ucnv_resetFromUnicode(cnv);
235         fromUCallbackCount=0;
236 
237         pIn=input;
238         pInLimit=input+inputLength;
239 
240         pInterLimit=intermediate+testcase.chunkLength;
241 
242         encodedLength=0;
243 
244         for(;;) {
245             pInter=intermediate;
246             ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
247             encodedLength+=(int32_t)(pInter-intermediate);
248 
249             if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
250                 /* make sure that we convert once more to really flush */
251                 *pErrorCode=U_ZERO_ERROR;
252             } else if(U_FAILURE(*pErrorCode)) {
253                 return;
254             } else {
255                 break;  // all done
256             }
257         }
258     }
259 };
260 
261 // Test one-way conversion UTF-8->encoding.
262 class FromUTF8 : public Command {
263 protected:
FromUTF8(const UtfPerformanceTest & testcase)264     FromUTF8(const UtfPerformanceTest &testcase)
265             : Command(testcase),
266               utf8Cnv(NULL),
267               input8(utf8), input8Length(utf8Length) {
268         utf8Cnv=ucnv_open("UTF-8", &errorCode);
269     }
270 public:
get(const UtfPerformanceTest & testcase)271     static UPerfFunction* get(const UtfPerformanceTest &testcase) {
272         FromUTF8 * t = new FromUTF8(testcase);
273         if (U_SUCCESS(t->errorCode)){
274             return t;
275         } else {
276             delete t;
277             return NULL;
278         }
279     }
~FromUTF8()280     ~FromUTF8() {
281         ucnv_close(utf8Cnv);
282     }
call(UErrorCode * pErrorCode)283     virtual void call(UErrorCode* pErrorCode){
284         const char *pIn, *pInLimit;
285         char *pInter, *pInterLimit;
286         UChar *pivotSource, *pivotTarget, *pivotLimit;
287 
288         ucnv_resetToUnicode(utf8Cnv);
289         ucnv_resetFromUnicode(cnv);
290         fromUCallbackCount=0;
291 
292         pIn=input8;
293         pInLimit=input8+input8Length;
294 
295         pInterLimit=intermediate+testcase.chunkLength;
296 
297         pivotSource=pivotTarget=pivot;
298         pivotLimit=pivot+testcase.pivotLength;
299 
300         encodedLength=0;
301 
302         for(;;) {
303             pInter=intermediate;
304             ucnv_convertEx(cnv, utf8Cnv,
305                            &pInter, pInterLimit,
306                            &pIn, pInLimit,
307                            pivot, &pivotSource, &pivotTarget, pivotLimit,
308                            FALSE, TRUE, pErrorCode);
309             encodedLength+=(int32_t)(pInter-intermediate);
310 
311             if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
312                 /* make sure that we convert once more to really flush */
313                 *pErrorCode=U_ZERO_ERROR;
314             } else if(U_FAILURE(*pErrorCode)) {
315                 return;
316             } else {
317                 break;  // all done
318             }
319         }
320     }
321 protected:
322     UConverter *utf8Cnv;
323     const char *input8;
324     int32_t input8Length;
325 };
326 
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)327 UPerfFunction* UtfPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) {
328     switch (index) {
329         case 0: name = "Roundtrip";     if (exec) return Roundtrip::get(*this); break;
330         case 1: name = "FromUnicode";   if (exec) return FromUnicode::get(*this); break;
331         case 2: name = "FromUTF8";      if (exec) return FromUTF8::get(*this); break;
332         default: name = ""; break;
333     }
334     return NULL;
335 }
336 
main(int argc,const char * argv[])337 int main(int argc, const char *argv[])
338 {
339     // Default values for command-line options.
340     options[CHARSET].value = "UTF-8";
341     options[CHUNK_LENGTH].value = "4096";
342     options[PIVOT_LENGTH].value = "1024";
343 
344     UErrorCode status = U_ZERO_ERROR;
345     UtfPerformanceTest test(argc, argv, status);
346 
347 	if (U_FAILURE(status)){
348         printf("The error is %s\n", u_errorName(status));
349         test.usage();
350         return status;
351     }
352 
353     if (test.run() == FALSE){
354         fprintf(stderr, "FAILED: Tests could not be run please check the "
355 			            "arguments.\n");
356         return -1;
357     }
358 
359     if (fromUCallbackCount > 0) {
360         printf("Number of fromUnicode callback calls in the last iteration: %ld\n", (long)fromUCallbackCount);
361     }
362 
363     return 0;
364 }
365