• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 **********************************************************************
3 *   Copyright (C) 2007, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 *   file name:  unisetperf.cpp
7 *   encoding:   US-ASCII
8 *   tab size:   8 (not used)
9 *   indentation:4
10 *
11 *   created on: 2007jan31
12 *   created by: Markus Scherer
13 */
14 
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include "unicode/uperf.h"
19 #include "unicode/uniset.h"
20 #include "unicode/unistr.h"
21 #include "uoptions.h"
22 
23 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
24 
25 // Command-line options specific to unisetperf.
26 // Options do not have abbreviations: Force readable command lines.
27 // (Using U+0001 for abbreviation characters.)
28 enum {
29     SET_PATTERN,
30     FAST_TYPE,
31     UNISETPERF_OPTIONS_COUNT
32 };
33 
34 static UOption options[UNISETPERF_OPTIONS_COUNT]={
35     UOPTION_DEF("pattern", '\x01', UOPT_REQUIRES_ARG),
36     UOPTION_DEF("type",    '\x01', UOPT_REQUIRES_ARG)
37 };
38 
39 static const char *const unisetperf_usage =
40     "\t--pattern   UnicodeSet pattern for instantiation.\n"
41     "\t            Default: [:ID_Continue:]\n"
42     "\t--type      Type of UnicodeSet: slow fast\n"
43     "\t            Default: slow\n";
44 
45 // Test object with setup data.
46 class UnicodeSetPerformanceTest : public UPerfTest {
47 public:
UnicodeSetPerformanceTest(int32_t argc,const char * argv[],UErrorCode & status)48     UnicodeSetPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status)
49             : UPerfTest(argc, argv, options, LENGTHOF(options), unisetperf_usage, status),
50               utf8(NULL), utf8Length(0), countInputCodePoints(0), spanCount(0) {
51         if (U_SUCCESS(status)) {
52             UnicodeString pattern=UnicodeString(options[SET_PATTERN].value, -1, US_INV).unescape();
53             set.applyPattern(pattern, status);
54             prefrozen=set;
55             if(0==strcmp(options[FAST_TYPE].value, "fast")) {
56                 set.freeze();
57             }
58 
59             int32_t inputLength;
60             UPerfTest::getBuffer(inputLength, status);
61             if(U_SUCCESS(status) && inputLength>0) {
62                 countInputCodePoints = u_countChar32(buffer, bufferLen);
63 
64                 countSpans();
65 
66                 // Preflight the UTF-8 length and allocate utf8.
67                 u_strToUTF8(NULL, 0, &utf8Length, buffer, bufferLen, &status);
68                 if(status==U_BUFFER_OVERFLOW_ERROR) {
69                     utf8=(char *)malloc(utf8Length);
70                     if(utf8!=NULL) {
71                         status=U_ZERO_ERROR;
72                         u_strToUTF8(utf8, utf8Length, NULL, buffer, bufferLen, &status);
73                     } else {
74                         status=U_MEMORY_ALLOCATION_ERROR;
75                     }
76                 }
77 
78                 if(verbose) {
79                     printf("code points:%ld  len16:%ld  len8:%ld  spans:%ld  "
80                            "cp/span:%.3g  UChar/span:%.3g  B/span:%.3g  B/cp:%.3g\n",
81                            (long)countInputCodePoints, (long)bufferLen, (long)utf8Length, (long)spanCount,
82                            (double)countInputCodePoints/spanCount, (double)bufferLen/spanCount, (double)utf8Length/spanCount,
83                            (double)utf8Length/countInputCodePoints);
84                 }
85             }
86         }
87     }
88 
89     virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL);
90 
91     // Count spans of characters that are in the set,
92     // and spans of characters that are not in the set.
93     // If the very first character is in the set, then one additional
94     // not-span is counted.
countSpans()95     void countSpans() {
96         const UChar *s=getBuffer();
97         int32_t length=getBufferLen();
98         int32_t i=0;
99         UBool tf=FALSE;
100         while(i<length) {
101             i=span(s, length, i, tf);
102             tf=(UBool)(!tf);
103             ++spanCount;
104         }
105     }
span(const UChar * s,int32_t length,int32_t start,UBool tf) const106     int32_t span(const UChar *s, int32_t length, int32_t start, UBool tf) const {
107         UChar32 c;
108         int32_t prev;
109         while((prev=start)<length) {
110             U16_NEXT(s, start, length, c);
111             if(tf!=set.contains(c)) {
112                 break;
113             }
114         }
115         return prev;
116     }
117 
getBuffer() const118     const UChar *getBuffer() const { return buffer; }
getBufferLen() const119     int32_t getBufferLen() const { return bufferLen; }
120 
121     char *utf8;
122     int32_t utf8Length;
123 
124     // Number of code points in the input text.
125     int32_t countInputCodePoints;
126     int32_t spanCount;
127 
128     UnicodeSet set;
129     UnicodeSet prefrozen;
130 };
131 
132 // Performance test function object.
133 class Command : public UPerfFunction {
134 protected:
Command(const UnicodeSetPerformanceTest & testcase)135     Command(const UnicodeSetPerformanceTest &testcase) : testcase(testcase) {}
136 
137 public:
~Command()138     virtual ~Command() {}
139 
140     // virtual void call(UErrorCode* pErrorCode) { ... }
141 
getOperationsPerIteration()142     virtual long getOperationsPerIteration() {
143         // Number of code points tested:
144         // Input code points, plus one for the end of each span except the last span.
145         return testcase.countInputCodePoints+testcase.spanCount-1;
146     }
147 
getEventsPerIteration()148     virtual long getEventsPerIteration() {
149         return testcase.spanCount;
150     }
151 
152     const UnicodeSetPerformanceTest &testcase;
153 };
154 
155 class Contains : public Command {
156 protected:
Contains(const UnicodeSetPerformanceTest & testcase)157     Contains(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
158         // Verify that the frozen set is equal to the unfrozen one.
159         UnicodeSet set;
160         UChar32 c;
161 
162         for(c=0; c<=0x10ffff; ++c) {
163             if(testcase.set.contains(c)) {
164                 set.add(c);
165             }
166         }
167         if(set!=testcase.set) {
168             fprintf(stderr, "error: frozen set != original!\n");
169         }
170     }
171 public:
get(const UnicodeSetPerformanceTest & testcase)172     static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) {
173         return new Contains(testcase);
174     }
call(UErrorCode * pErrorCode)175     virtual void call(UErrorCode* pErrorCode) {
176         const UnicodeSet &set=testcase.set;
177         const UChar *s=testcase.getBuffer();
178         int32_t length=testcase.getBufferLen();
179         int32_t count=0;
180         int32_t i=0;
181         UBool tf=FALSE;
182         while(i<length) {
183             i+=span(set, s+i, length-i, tf);
184             tf=(UBool)(!tf);
185             ++count;
186         }
187         if(count!=testcase.spanCount) {
188             fprintf(stderr, "error: Contains() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
189                     (long)count, (long)testcase.spanCount);
190         }
191     }
span(const UnicodeSet & set,const UChar * s,int32_t length,UBool tf)192     static int32_t span(const UnicodeSet &set, const UChar *s, int32_t length, UBool tf) {
193         UChar32 c;
194         int32_t start=0, prev;
195         while((prev=start)<length) {
196             U16_NEXT(s, start, length, c);
197             if(tf!=set.contains(c)) {
198                 break;
199             }
200         }
201         return prev;
202     }
203 };
204 
205 class SpanUTF16 : public Command {
206 protected:
SpanUTF16(const UnicodeSetPerformanceTest & testcase)207     SpanUTF16(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
208         // Verify that the frozen set is equal to the unfrozen one.
209         UnicodeSet set;
210         UChar utf16[2];
211         UChar32 c, c2;
212 
213         for(c=0; c<=0xffff; ++c) {
214             utf16[0]=(UChar)c;
215             if(testcase.set.span(utf16, 1, USET_SPAN_CONTAINED)>0) {
216                 set.add(c);
217             }
218         }
219         for(c=0xd800; c<=0xdbff; ++c) {
220             utf16[0]=(UChar)c;
221             for(c2=0xdc00; c2<=0xdfff; ++c2) {
222                 utf16[1]=(UChar)c2;
223                 if(testcase.set.span(utf16, 2, USET_SPAN_CONTAINED)>0) {
224                     set.add(U16_GET_SUPPLEMENTARY(c, c2));
225                 }
226             }
227         }
228 
229         if(set!=testcase.set) {
230             fprintf(stderr, "error: frozen set != original!\n");
231         }
232     }
233 public:
get(const UnicodeSetPerformanceTest & testcase)234     static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) {
235         return new SpanUTF16(testcase);
236     }
call(UErrorCode * pErrorCode)237     virtual void call(UErrorCode* pErrorCode) {
238         const UnicodeSet &set=testcase.set;
239         const UChar *s=testcase.getBuffer();
240         int32_t length=testcase.getBufferLen();
241         int32_t count=0;
242         int32_t i=0;
243         UBool tf=FALSE;
244         while(i<length) {
245             i+=set.span(s+i, length-i, (USetSpanCondition)tf);
246             tf=(UBool)(!tf);
247             ++count;
248         }
249         if(count!=testcase.spanCount) {
250             fprintf(stderr, "error: SpanUTF16() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
251                     (long)count, (long)testcase.spanCount);
252         }
253     }
254 };
255 
256 class SpanBackUTF16 : public Command {
257 protected:
SpanBackUTF16(const UnicodeSetPerformanceTest & testcase)258     SpanBackUTF16(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
259         // Verify that the frozen set is equal to the unfrozen one.
260         UnicodeSet set;
261         UChar utf16[2];
262         UChar32 c, c2;
263 
264         for(c=0; c<=0xffff; ++c) {
265             utf16[0]=(UChar)c;
266             if(testcase.set.spanBack(utf16, 1, USET_SPAN_CONTAINED)==0) {
267                 set.add(c);
268             }
269         }
270         for(c=0xd800; c<=0xdbff; ++c) {
271             utf16[0]=(UChar)c;
272             for(c2=0xdc00; c2<=0xdfff; ++c2) {
273                 utf16[1]=(UChar)c2;
274                 if(testcase.set.spanBack(utf16, 2, USET_SPAN_CONTAINED)==0) {
275                     set.add(U16_GET_SUPPLEMENTARY(c, c2));
276                 }
277             }
278         }
279 
280         if(set!=testcase.set) {
281             fprintf(stderr, "error: frozen set != original!\n");
282         }
283     }
284 public:
get(const UnicodeSetPerformanceTest & testcase)285     static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) {
286         return new SpanBackUTF16(testcase);
287     }
call(UErrorCode * pErrorCode)288     virtual void call(UErrorCode* pErrorCode) {
289         const UnicodeSet &set=testcase.set;
290         const UChar *s=testcase.getBuffer();
291         int32_t length=testcase.getBufferLen();
292         int32_t count=0;
293         /*
294          * Get the same spans as with span() where we always start with a not-contained span.
295          * If testcase.spanCount is an odd number, then the last span() was not-contained.
296          * The last spanBack() must be not-contained to match the first span().
297          */
298         UBool tf=(UBool)((testcase.spanCount&1)==0);
299         while(length>0 || !tf) {
300             length=set.spanBack(s, length, (USetSpanCondition)tf);
301             tf=(UBool)(!tf);
302             ++count;
303         }
304         if(count!=testcase.spanCount) {
305             fprintf(stderr, "error: SpanBackUTF16() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
306                     (long)count, (long)testcase.spanCount);
307         }
308     }
309 };
310 
311 class SpanUTF8 : public Command {
312 protected:
SpanUTF8(const UnicodeSetPerformanceTest & testcase)313     SpanUTF8(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
314         // Verify that the frozen set is equal to the unfrozen one.
315         UnicodeSet set;
316         char utf8[4];
317         UChar32 c;
318         int32_t length;
319 
320         for(c=0; c<=0x10ffff; ++c) {
321             if(c==0xd800) {
322                 c=0xe000;
323             }
324             length=0;
325             U8_APPEND_UNSAFE(utf8, length, c);
326             if(testcase.set.spanUTF8(utf8, length, USET_SPAN_CONTAINED)>0) {
327                 set.add(c);
328             }
329         }
330         if(set!=testcase.set) {
331             fprintf(stderr, "error: frozen set != original!\n");
332         }
333     }
334 public:
get(const UnicodeSetPerformanceTest & testcase)335     static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) {
336         return new SpanUTF8(testcase);
337     }
call(UErrorCode * pErrorCode)338     virtual void call(UErrorCode* pErrorCode) {
339         const UnicodeSet &set=testcase.set;
340         const char *s=testcase.utf8;
341         int32_t length=testcase.utf8Length;
342         int32_t count=0;
343         int32_t i=0;
344         UBool tf=FALSE;
345         while(i<length) {
346             i+=set.spanUTF8(s+i, length-i, (USetSpanCondition)tf);
347             tf=(UBool)(!tf);
348             ++count;
349         }
350         if(count!=testcase.spanCount) {
351             fprintf(stderr, "error: SpanUTF8() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
352                     (long)count, (long)testcase.spanCount);
353         }
354     }
355 };
356 
357 class SpanBackUTF8 : public Command {
358 protected:
SpanBackUTF8(const UnicodeSetPerformanceTest & testcase)359     SpanBackUTF8(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
360         // Verify that the frozen set is equal to the unfrozen one.
361         UnicodeSet set;
362         char utf8[4];
363         UChar32 c;
364         int32_t length;
365 
366         for(c=0; c<=0x10ffff; ++c) {
367             if(c==0xd800) {
368                 c=0xe000;
369             }
370             length=0;
371             U8_APPEND_UNSAFE(utf8, length, c);
372             if(testcase.set.spanBackUTF8(utf8, length, USET_SPAN_CONTAINED)==0) {
373                 set.add(c);
374             }
375         }
376         if(set!=testcase.set) {
377             fprintf(stderr, "error: frozen set != original!\n");
378         }
379     }
380 public:
get(const UnicodeSetPerformanceTest & testcase)381     static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) {
382         return new SpanBackUTF8(testcase);
383     }
call(UErrorCode * pErrorCode)384     virtual void call(UErrorCode* pErrorCode) {
385         const UnicodeSet &set=testcase.set;
386         const char *s=testcase.utf8;
387         int32_t length=testcase.utf8Length;
388         int32_t count=0;
389         /*
390          * Get the same spans as with span() where we always start with a not-contained span.
391          * If testcase.spanCount is an odd number, then the last span() was not-contained.
392          * The last spanBack() must be not-contained to match the first span().
393          */
394         UBool tf=(UBool)((testcase.spanCount&1)==0);
395         while(length>0 || !tf) {
396             length=set.spanBackUTF8(s, length, (USetSpanCondition)tf);
397             tf=(UBool)(!tf);
398             ++count;
399         }
400         if(count!=testcase.spanCount) {
401             fprintf(stderr, "error: SpanBackUTF8() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
402                     (long)count, (long)testcase.spanCount);
403         }
404     }
405 };
406 
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)407 UPerfFunction* UnicodeSetPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) {
408     switch (index) {
409         case 0: name = "Contains";     if (exec) return Contains::get(*this); break;
410         case 1: name = "SpanUTF16";    if (exec) return SpanUTF16::get(*this); break;
411         case 2: name = "SpanBackUTF16";if (exec) return SpanBackUTF16::get(*this); break;
412         case 3: name = "SpanUTF8";     if (exec) return SpanUTF8::get(*this); break;
413         case 4: name = "SpanBackUTF8"; if (exec) return SpanBackUTF8::get(*this); break;
414         default: name = ""; break;
415     }
416     return NULL;
417 }
418 
main(int argc,const char * argv[])419 int main(int argc, const char *argv[])
420 {
421     // Default values for command-line options.
422     options[SET_PATTERN].value = "[:ID_Continue:]";
423     options[FAST_TYPE].value = "slow";
424 
425     UErrorCode status = U_ZERO_ERROR;
426     UnicodeSetPerformanceTest test(argc, argv, status);
427 
428 	if (U_FAILURE(status)){
429         printf("The error is %s\n", u_errorName(status));
430         test.usage();
431         return status;
432     }
433 
434     if (test.run() == FALSE){
435         fprintf(stderr, "FAILED: Tests could not be run, please check the "
436 			            "arguments.\n");
437         return 1;
438     }
439 
440     return 0;
441 }
442