1 /*
2 **************************************************************************
3 * © 2016 and later: Unicode, Inc. and others.
4 * License & terms of use: http://www.unicode.org/copyright.html
5 *************************************************************************
6 *************************************************************************
7 * Copyright (C) 2002-2014, International Business Machines
8 * Corporation and others. All Rights Reserved.
9 *************************************************************************
10 * file name: utfperf.cpp
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2005Nov17
16 * created by: Raymond Yang
17 *
18 * Ported from utfper.c created by Markus W. Scherer
19 * Performance test program for Unicode converters
20 */
21
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include "unicode/uperf.h"
25 #include "cmemory.h" // for UPRV_LENGTHOF
26 #include "uoptions.h"
27
28 /* definitions and text buffers */
29
30 #define INPUT_CAPACITY (1024*1024)
31 #define INTERMEDIATE_CAPACITY 4096
32 #define INTERMEDIATE_SMALL_CAPACITY 20
33 #define PIVOT_CAPACITY 1024
34 #define OUTPUT_CAPACITY INPUT_CAPACITY
35
36 static char utf8[INPUT_CAPACITY];
37 static UChar pivot[INTERMEDIATE_CAPACITY];
38
39 static UChar output[OUTPUT_CAPACITY];
40 static char intermediate[OUTPUT_CAPACITY];
41
42 static int32_t utf8Length, encodedLength, outputLength, countInputCodePoints;
43
44 static int32_t fromUCallbackCount;
45
46 // Command-line options specific to utfperf.
47 // Options do not have abbreviations: Force readable command lines.
48 // (Using U+0001 for abbreviation characters.)
49 enum {
50 CHARSET,
51 CHUNK_LENGTH,
52 PIVOT_LENGTH,
53 UTFPERF_OPTIONS_COUNT
54 };
55
56 static UOption options[UTFPERF_OPTIONS_COUNT]={
57 UOPTION_DEF("charset", '\x01', UOPT_REQUIRES_ARG),
58 UOPTION_DEF("chunk", '\x01', UOPT_REQUIRES_ARG),
59 UOPTION_DEF("pivot", '\x01', UOPT_REQUIRES_ARG)
60 };
61
62 static const char *const utfperf_usage =
63 "\t--charset Charset for which to test performance, e.g. windows-1251.\n"
64 "\t Default: UTF-8\n"
65 "\t--chunk Length (in bytes) of charset output chunks. [4096]\n"
66 "\t--pivot Length (in UChars) of the UTF-16 pivot buffer, if applicable.\n"
67 "\t [1024]\n";
68
69 // Test object.
70 class UtfPerformanceTest : public UPerfTest{
71 public:
UtfPerformanceTest(int32_t argc,const char * argv[],UErrorCode & status)72 UtfPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status)
73 : UPerfTest(argc, argv, options, UPRV_LENGTHOF(options), utfperf_usage, status) {
74 if (U_SUCCESS(status)) {
75 charset = options[CHARSET].value;
76
77 chunkLength = atoi(options[CHUNK_LENGTH].value);
78 if (chunkLength < 1 || OUTPUT_CAPACITY < chunkLength) {
79 fprintf(stderr, "error: chunk length must be 1..%ld\n", (long)OUTPUT_CAPACITY);
80 status = U_ILLEGAL_ARGUMENT_ERROR;
81 }
82
83 pivotLength = atoi(options[PIVOT_LENGTH].value);
84 if (pivotLength < 1 || PIVOT_CAPACITY < pivotLength) {
85 fprintf(stderr, "error: pivot length must be 1..%ld\n", (long)PIVOT_CAPACITY);
86 status = U_ILLEGAL_ARGUMENT_ERROR;
87 }
88
89 int32_t inputLength;
90 UPerfTest::getBuffer(inputLength, status);
91 countInputCodePoints = u_countChar32(buffer, bufferLen);
92 u_strToUTF8(utf8, (int32_t)sizeof(utf8), &utf8Length, buffer, bufferLen, &status);
93 }
94 }
95
96 virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL);
97
getBuffer() const98 const UChar *getBuffer() const { return buffer; }
getBufferLen() const99 int32_t getBufferLen() const { return bufferLen; }
100
101 const char *charset;
102 int32_t chunkLength, pivotLength;
103 };
104
105 U_CDECL_BEGIN
106 // Custom callback for counting callback calls.
107 static void U_CALLCONV
fromUCallback(const void * context,UConverterFromUnicodeArgs * fromUArgs,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * pErrorCode)108 fromUCallback(const void *context,
109 UConverterFromUnicodeArgs *fromUArgs,
110 const UChar *codeUnits,
111 int32_t length,
112 UChar32 codePoint,
113 UConverterCallbackReason reason,
114 UErrorCode *pErrorCode) {
115 if (reason <= UCNV_IRREGULAR) {
116 ++fromUCallbackCount;
117 }
118 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, pErrorCode);
119 }
120 U_CDECL_END
121
122 // Base class for Roundtrip, FromUnicode and FromUTF8 with common setup.
123 class Command : public UPerfFunction {
124 protected:
Command(const UtfPerformanceTest & testcase)125 Command(const UtfPerformanceTest &testcase)
126 : testcase(testcase),
127 input(testcase.getBuffer()), inputLength(testcase.getBufferLen()),
128 errorCode(U_ZERO_ERROR) {
129 cnv=ucnv_open(testcase.charset, &errorCode);
130 if (U_FAILURE(errorCode)) {
131 fprintf(stderr, "error opening converter for \"%s\" - %s\n", testcase.charset, u_errorName(errorCode));
132 }
133 ucnv_setFromUCallBack(cnv, fromUCallback, NULL, NULL, NULL, &errorCode);
134 }
135 public:
~Command()136 virtual ~Command(){
137 if(U_SUCCESS(errorCode)) {
138 ucnv_close(cnv);
139 }
140 }
141 // virtual void call(UErrorCode* pErrorCode) { ... }
getOperationsPerIteration()142 virtual long getOperationsPerIteration(){
143 return countInputCodePoints;
144 }
145
146 const UtfPerformanceTest &testcase;
147 const UChar *input;
148 int32_t inputLength;
149 UErrorCode errorCode;
150 UConverter *cnv;
151 };
152
153 // Test roundtrip UTF-16->encoding->UTF-16.
154 class Roundtrip : public Command {
155 protected:
Roundtrip(const UtfPerformanceTest & testcase)156 Roundtrip(const UtfPerformanceTest &testcase) : Command(testcase) {}
157 public:
get(const UtfPerformanceTest & testcase)158 static UPerfFunction* get(const UtfPerformanceTest &testcase) {
159 Roundtrip * t = new Roundtrip(testcase);
160 if (U_SUCCESS(t->errorCode)){
161 return t;
162 } else {
163 delete t;
164 return NULL;
165 }
166 }
call(UErrorCode * pErrorCode)167 virtual void call(UErrorCode* pErrorCode){
168 const UChar *pIn, *pInLimit;
169 UChar *pOut, *pOutLimit;
170 char *pInter, *pInterLimit;
171 const char *p;
172 UBool flush;
173
174 ucnv_reset(cnv);
175 fromUCallbackCount=0;
176
177 pIn=input;
178 pInLimit=input+inputLength;
179
180 pOut=output;
181 pOutLimit=output+OUTPUT_CAPACITY;
182
183 pInterLimit=intermediate+testcase.chunkLength;
184
185 encodedLength=outputLength=0;
186 flush=FALSE;
187
188 do {
189 /* convert a block of [pIn..pInLimit[ to the encoding in intermediate[] */
190 pInter=intermediate;
191 ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
192 encodedLength+=(int32_t)(pInter-intermediate);
193
194 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
195 /* make sure that we convert once more to really flush */
196 *pErrorCode=U_ZERO_ERROR;
197 } else if(U_FAILURE(*pErrorCode)) {
198 return;
199 } else if(pIn==pInLimit) {
200 flush=TRUE;
201 }
202
203 /* convert the block [intermediate..pInter[ back to UTF-16 */
204 p=intermediate;
205 ucnv_toUnicode(cnv, &pOut, pOutLimit,&p, pInter,NULL, flush,pErrorCode);
206 if(U_FAILURE(*pErrorCode)) {
207 return;
208 }
209 /* intermediate must have been consumed (p==pInter) because of the converter semantics */
210 } while(!flush);
211
212 outputLength=pOut-output;
213 if(inputLength!=outputLength) {
214 fprintf(stderr, "error: roundtrip failed, inputLength %d!=outputLength %d\n", inputLength, outputLength);
215 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
216 }
217 }
218 };
219
220 // Test one-way conversion UTF-16->encoding.
221 class FromUnicode : public Command {
222 protected:
FromUnicode(const UtfPerformanceTest & testcase)223 FromUnicode(const UtfPerformanceTest &testcase) : Command(testcase) {}
224 public:
get(const UtfPerformanceTest & testcase)225 static UPerfFunction* get(const UtfPerformanceTest &testcase) {
226 FromUnicode * t = new FromUnicode(testcase);
227 if (U_SUCCESS(t->errorCode)){
228 return t;
229 } else {
230 delete t;
231 return NULL;
232 }
233 }
call(UErrorCode * pErrorCode)234 virtual void call(UErrorCode* pErrorCode){
235 const UChar *pIn, *pInLimit;
236 char *pInter, *pInterLimit;
237
238 ucnv_resetFromUnicode(cnv);
239 fromUCallbackCount=0;
240
241 pIn=input;
242 pInLimit=input+inputLength;
243
244 pInterLimit=intermediate+testcase.chunkLength;
245
246 encodedLength=0;
247
248 for(;;) {
249 pInter=intermediate;
250 ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
251 encodedLength+=(int32_t)(pInter-intermediate);
252
253 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
254 /* make sure that we convert once more to really flush */
255 *pErrorCode=U_ZERO_ERROR;
256 } else if(U_FAILURE(*pErrorCode)) {
257 return;
258 } else {
259 break; // all done
260 }
261 }
262 }
263 };
264
265 // Test one-way conversion UTF-8->encoding.
266 class FromUTF8 : public Command {
267 protected:
FromUTF8(const UtfPerformanceTest & testcase)268 FromUTF8(const UtfPerformanceTest &testcase)
269 : Command(testcase),
270 utf8Cnv(NULL),
271 input8(utf8), input8Length(utf8Length) {
272 utf8Cnv=ucnv_open("UTF-8", &errorCode);
273 }
274 public:
get(const UtfPerformanceTest & testcase)275 static UPerfFunction* get(const UtfPerformanceTest &testcase) {
276 FromUTF8 * t = new FromUTF8(testcase);
277 if (U_SUCCESS(t->errorCode)){
278 return t;
279 } else {
280 delete t;
281 return NULL;
282 }
283 }
~FromUTF8()284 ~FromUTF8() {
285 ucnv_close(utf8Cnv);
286 }
call(UErrorCode * pErrorCode)287 virtual void call(UErrorCode* pErrorCode){
288 const char *pIn, *pInLimit;
289 char *pInter, *pInterLimit;
290 UChar *pivotSource, *pivotTarget, *pivotLimit;
291
292 ucnv_resetToUnicode(utf8Cnv);
293 ucnv_resetFromUnicode(cnv);
294 fromUCallbackCount=0;
295
296 pIn=input8;
297 pInLimit=input8+input8Length;
298
299 pInterLimit=intermediate+testcase.chunkLength;
300
301 pivotSource=pivotTarget=pivot;
302 pivotLimit=pivot+testcase.pivotLength;
303
304 encodedLength=0;
305
306 for(;;) {
307 pInter=intermediate;
308 ucnv_convertEx(cnv, utf8Cnv,
309 &pInter, pInterLimit,
310 &pIn, pInLimit,
311 pivot, &pivotSource, &pivotTarget, pivotLimit,
312 FALSE, TRUE, pErrorCode);
313 encodedLength+=(int32_t)(pInter-intermediate);
314
315 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
316 /* make sure that we convert once more to really flush */
317 *pErrorCode=U_ZERO_ERROR;
318 } else if(U_FAILURE(*pErrorCode)) {
319 return;
320 } else {
321 break; // all done
322 }
323 }
324 }
325 protected:
326 UConverter *utf8Cnv;
327 const char *input8;
328 int32_t input8Length;
329 };
330
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)331 UPerfFunction* UtfPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) {
332 switch (index) {
333 case 0: name = "Roundtrip"; if (exec) return Roundtrip::get(*this); break;
334 case 1: name = "FromUnicode"; if (exec) return FromUnicode::get(*this); break;
335 case 2: name = "FromUTF8"; if (exec) return FromUTF8::get(*this); break;
336 default: name = ""; break;
337 }
338 return NULL;
339 }
340
main(int argc,const char * argv[])341 int main(int argc, const char *argv[])
342 {
343 // Default values for command-line options.
344 options[CHARSET].value = "UTF-8";
345 options[CHUNK_LENGTH].value = "4096";
346 options[PIVOT_LENGTH].value = "1024";
347
348 UErrorCode status = U_ZERO_ERROR;
349 UtfPerformanceTest test(argc, argv, status);
350
351 if (U_FAILURE(status)){
352 printf("The error is %s\n", u_errorName(status));
353 test.usage();
354 return status;
355 }
356
357 if (test.run() == FALSE){
358 fprintf(stderr, "FAILED: Tests could not be run please check the "
359 "arguments.\n");
360 return -1;
361 }
362
363 if (fromUCallbackCount > 0) {
364 printf("Number of fromUnicode callback calls in the last iteration: %ld\n", (long)fromUCallbackCount);
365 }
366
367 return 0;
368 }
369