• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (C) 2001-2006 IBM, Inc.   All Rights Reserved.
4 *
5 ********************************************************************/
6 
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <locale.h>
10 #include <limits.h>
11 #include <string.h>
12 #include "unicode/uperf.h"
13 #include "uoptions.h"
14 #include "unicode/coll.h"
15 #include <unicode/ucoleitr.h>
16 
17 
18 
19 /* To store an array of string<UNIT> in continue space.
20 Since string<UNIT> itself is treated as an array of UNIT, this
21 class will ease our memory management for an array of string<UNIT>.
22 */
23 
24 //template<typename UNIT>
25 #define COMPATCT_ARRAY(CompactArrays, UNIT) \
26 struct CompactArrays{\
27     CompactArrays(const CompactArrays & );\
28     CompactArrays & operator=(const CompactArrays & );\
29     int32_t   count;/*total number of the strings*/ \
30     int32_t * index;/*relative offset in data*/ \
31     UNIT    * data; /*the real space to hold strings*/ \
32     \
33     ~CompactArrays(){free(index);free(data);} \
34     CompactArrays():data(NULL), index(NULL), count(0){ \
35     index = (int32_t *) realloc(index, sizeof(int32_t)); \
36     index[0] = 0; \
37     } \
38     void append_one(int32_t theLen){ /*include terminal NULL*/ \
39     count++; \
40     index = (int32_t *) realloc(index, sizeof(int32_t) * (count + 1)); \
41     index[count] = index[count - 1] + theLen; \
42     data = (UNIT *) realloc(data, sizeof(UNIT) * index[count]); \
43     } \
44     UNIT * last(){return data + index[count - 1];} \
45     UNIT * dataOf(int32_t i){return data + index[i];} \
46     int32_t lengthOf(int i){return index[i+1] - index[i] - 1; }	/*exclude terminating NULL*/  \
47 };
48 
49 //typedef CompactArrays<UChar> CA_uchar;
50 //typedef CompactArrays<char> CA_char;
51 //typedef CompactArrays<uint8_t> CA_uint8;
52 //typedef CompactArrays<WCHAR> CA_win_wchar;
53 
54 COMPATCT_ARRAY(CA_uchar, UChar)
55 COMPATCT_ARRAY(CA_char, char)
56 COMPATCT_ARRAY(CA_uint8, uint8_t)
57 COMPATCT_ARRAY(CA_win_wchar, WCHAR)
58 
59 
60 struct DataIndex {
61     static DWORD        win_langid;     // for qsort callback function
62     static UCollator *  col;            // for qsort callback function
63     uint8_t *   icu_key;
64     UChar *     icu_data;
65     int32_t     icu_data_len;
66     char*       posix_key;
67     char*       posix_data;
68     int32_t     posix_data_len;
69     char*       win_key;
70     WCHAR *     win_data;
71     int32_t     win_data_len;
72 };
73 DWORD DataIndex::win_langid;
74 UCollator * DataIndex::col;
75 
76 
77 
78 class CmdKeyGen : public UPerfFunction {
79     typedef	void (CmdKeyGen::* Func)(int32_t);
80     enum{MAX_KEY_LENGTH = 5000};
81     UCollator * col;
82     DWORD       win_langid;
83     int32_t     count;
84     DataIndex * data;
85     Func 	    fn;
86 
87     union { // to save sapce
88         uint8_t		icu_key[MAX_KEY_LENGTH];
89         char        posix_key[MAX_KEY_LENGTH];
90         WCHAR		win_key[MAX_KEY_LENGTH];
91     };
92 public:
CmdKeyGen(UErrorCode,UCollator * col,DWORD win_langid,int32_t count,DataIndex * data,Func fn,int32_t)93     CmdKeyGen(UErrorCode, UCollator * col,DWORD win_langid, int32_t count, DataIndex * data,Func fn,int32_t)
94         :col(col),win_langid(win_langid), count(count), data(data), fn(fn){}
95 
getOperationsPerIteration()96         virtual long getOperationsPerIteration(){return count;}
97 
call(UErrorCode * status)98         virtual void call(UErrorCode* status){
99             for(int32_t i = 0; i< count; i++){
100                 (this->*fn)(i);
101             }
102         }
103 
icu_key_null(int32_t i)104         void icu_key_null(int32_t i){
105             ucol_getSortKey(col, data[i].icu_data, -1, icu_key, MAX_KEY_LENGTH);
106         }
107 
icu_key_len(int32_t i)108         void icu_key_len(int32_t i){
109             ucol_getSortKey(col, data[i].icu_data, data[i].icu_data_len, icu_key, MAX_KEY_LENGTH);
110         }
111 
112         // pre-generated in CollPerfTest::prepareData(), need not to check error here
win_key_null(int32_t i)113         void win_key_null(int32_t i){
114             //LCMAP_SORTsk             0x00000400  // WC sort sk (normalize)
115             LCMapStringW(win_langid, LCMAP_SORTKEY, data[i].win_data, -1, win_key, MAX_KEY_LENGTH);
116         }
117 
win_key_len(int32_t i)118         void win_key_len(int32_t i){
119             LCMapStringW(win_langid, LCMAP_SORTKEY, data[i].win_data, data[i].win_data_len, win_key, MAX_KEY_LENGTH);
120         }
121 
posix_key_null(int32_t i)122         void posix_key_null(int32_t i){
123             strxfrm(posix_key, data[i].posix_data, MAX_KEY_LENGTH);
124         }
125 };
126 
127 
128 class CmdIter : public UPerfFunction {
129     typedef	void (CmdIter::* Func)(UErrorCode* , int32_t );
130     int32_t             count;
131     CA_uchar *          data;
132     Func                fn;
133     UCollationElements *iter;
134     int32_t             exec_count;
135 public:
CmdIter(UErrorCode & status,UCollator * col,int32_t count,CA_uchar * data,Func fn,int32_t,int32_t)136     CmdIter(UErrorCode & status, UCollator * col, int32_t count, CA_uchar *data, Func fn, int32_t,int32_t)
137         :count(count), data(data), fn(fn){
138             exec_count = 0;
139             UChar dummytext[] = {0, 0};
140             iter = ucol_openElements(col, NULL, 0, &status);
141             ucol_setText(iter, dummytext, 1, &status);
142         }
~CmdIter()143         ~CmdIter(){
144             ucol_closeElements(iter);
145         }
146 
getOperationsPerIteration()147         virtual long getOperationsPerIteration(){return exec_count ? exec_count : 1;}
148 
call(UErrorCode * status)149         virtual void call(UErrorCode* status){
150             exec_count = 0;
151             for(int32_t i = 0; i< count; i++){
152                 (this->*fn)(status, i);
153             }
154         }
155 
icu_forward_null(UErrorCode * status,int32_t i)156         void icu_forward_null(UErrorCode* status, int32_t i){
157             ucol_setText(iter, data->dataOf(i), -1, status);
158             while (ucol_next(iter, status) != UCOL_NULLORDER) exec_count++;
159         }
160 
icu_forward_len(UErrorCode * status,int32_t i)161         void icu_forward_len(UErrorCode* status, int32_t i){
162             ucol_setText(iter, data->dataOf(i), data->lengthOf(i) , status);
163             while (ucol_next(iter, status) != UCOL_NULLORDER) exec_count++;
164         }
165 
icu_backward_null(UErrorCode * status,int32_t i)166         void icu_backward_null(UErrorCode* status, int32_t i){
167             ucol_setText(iter, data->dataOf(i), -1, status);
168             while (ucol_previous(iter, status) != UCOL_NULLORDER) exec_count++;
169         }
170 
icu_backward_len(UErrorCode * status,int32_t i)171         void icu_backward_len(UErrorCode* status, int32_t i){
172             ucol_setText(iter, data->dataOf(i), data->lengthOf(i) , status);
173             while (ucol_previous(iter, status) != UCOL_NULLORDER) exec_count++;
174         }
175 };
176 
177 class CmdIterAll : public UPerfFunction {
178     typedef	void (CmdIterAll::* Func)(UErrorCode* status);
179     int32_t     count;
180     UChar *     data;
181     Func        fn;
182     UCollationElements *iter;
183     int32_t     exec_count;
184 
185 public:
186     enum CALL {forward_null, forward_len, backward_null, backward_len};
187 
~CmdIterAll()188     ~CmdIterAll(){
189         ucol_closeElements(iter);
190     }
CmdIterAll(UErrorCode & status,UCollator * col,int32_t count,UChar * data,CALL call,int32_t,int32_t)191     CmdIterAll(UErrorCode & status, UCollator * col, int32_t count,  UChar * data, CALL call,int32_t,int32_t)
192         :count(count),data(data)
193     {
194         exec_count = 0;
195         if (call == forward_null || call == backward_null) {
196             iter = ucol_openElements(col, data, -1, &status);
197         } else {
198             iter = ucol_openElements(col, data, count, &status);
199         }
200 
201         if (call == forward_null || call == forward_len){
202             fn = &CmdIterAll::icu_forward_all;
203         } else {
204             fn = &CmdIterAll::icu_backward_all;
205         }
206     }
getOperationsPerIteration()207     virtual long getOperationsPerIteration(){return exec_count ? exec_count : 1;}
208 
call(UErrorCode * status)209     virtual void call(UErrorCode* status){
210         (this->*fn)(status);
211     }
212 
icu_forward_all(UErrorCode * status)213     void icu_forward_all(UErrorCode* status){
214         int strlen = count - 5;
215         int count5 = 5;
216         int strindex = 0;
217         ucol_setOffset(iter, strindex, status);
218         while (TRUE) {
219             if (ucol_next(iter, status) == UCOL_NULLORDER) {
220                 break;
221             }
222             exec_count++;
223             count5 --;
224             if (count5 == 0) {
225                 strindex += 10;
226                 if (strindex > strlen) {
227                     break;
228                 }
229                 ucol_setOffset(iter, strindex, status);
230                 count5 = 5;
231             }
232         }
233     }
234 
icu_backward_all(UErrorCode * status)235     void icu_backward_all(UErrorCode* status){
236         int strlen = count;
237         int count5 = 5;
238         int strindex = 5;
239         ucol_setOffset(iter, strindex, status);
240         while (TRUE) {
241             if (ucol_previous(iter, status) == UCOL_NULLORDER) {
242                 break;
243             }
244             exec_count++;
245             count5 --;
246             if (count5 == 0) {
247                 strindex += 10;
248                 if (strindex > strlen) {
249                     break;
250                 }
251                 ucol_setOffset(iter, strindex, status);
252                 count5 = 5;
253             }
254         }
255     }
256 
257 };
258 
259 struct CmdQsort : public UPerfFunction{
260 
q_randomCmdQsort261     static int q_random(const void * a, const void * b){
262         uint8_t * key_a = ((DataIndex *)a)->icu_key;
263         uint8_t * key_b = ((DataIndex *)b)->icu_key;
264 
265         int   val_a = 0;
266         int   val_b = 0;
267         while (*key_a != 0) {val_a += val_a*37 + *key_a++;}
268         while (*key_b != 0) {val_b += val_b*37 + *key_b++;}
269         return val_a - val_b;
270     }
271 
272 #define QCAST() \
273     DataIndex * da = (DataIndex *) a; \
274     DataIndex * db = (DataIndex *) b; \
275     ++exec_count
276 
icu_strcoll_nullCmdQsort277     static int icu_strcoll_null(const void *a, const void *b){
278         QCAST();
279         return ucol_strcoll(da->col, da->icu_data, -1, db->icu_data, -1) - UCOL_EQUAL;
280     }
281 
icu_strcoll_lenCmdQsort282     static int icu_strcoll_len(const void *a, const void *b){
283         QCAST();
284         return ucol_strcoll(da->col, da->icu_data, da->icu_data_len, db->icu_data, db->icu_data_len) - UCOL_EQUAL;
285     }
286 
icu_cmpkeyCmdQsort287     static int icu_cmpkey (const void *a, const void *b){
288         QCAST();
289         return strcmp((char *) da->icu_key, (char *) db->icu_key);
290     }
291 
win_cmp_nullCmdQsort292     static int win_cmp_null(const void *a, const void *b) {
293         QCAST();
294         //CSTR_LESS_THAN		1
295         //CSTR_EQUAL			2
296         //CSTR_GREATER_THAN		3
297         int t = CompareStringW(da->win_langid, 0, da->win_data, -1, db->win_data, -1);
298         if (t == 0){
299             fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
300             exit(-1);
301         } else{
302             return t - CSTR_EQUAL;
303         }
304     }
305 
win_cmp_lenCmdQsort306     static int win_cmp_len(const void *a, const void *b) {
307         QCAST();
308         int t = CompareStringW(da->win_langid, 0, da->win_data, da->win_data_len, db->win_data, db->win_data_len);
309         if (t == 0){
310             fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
311             exit(-1);
312         } else{
313             return t - CSTR_EQUAL;
314         }
315     }
316 
317 #define QFUNC(name, func, data) \
318     static int name (const void *a, const void *b){ \
319     QCAST(); \
320     return func(da->data, db->data); \
321     }
322 
323     QFUNC(posix_strcoll_null, strcoll, posix_data)
324         QFUNC(posix_cmpkey, strcmp, posix_key)
325         QFUNC(win_cmpkey, strcmp, win_key)
326         QFUNC(win_wcscmp, wcscmp, win_data)
327         QFUNC(icu_strcmp, u_strcmp, icu_data)
328         QFUNC(icu_cmpcpo, u_strcmpCodePointOrder, icu_data)
329 
330 private:
331     static int32_t exec_count; // potential muilt-thread problem
332 
333     typedef	int (* Func)(const void *, const void *);
334 
335     Func    fn;
336     void *  base;   //Start of target array.
337     int32_t num;    //Array size in elements.
338     int32_t width;  //Element size in bytes.
339 
340     void *  backup; //copy source of base
341 public:
CmdQsortCmdQsort342     CmdQsort(UErrorCode & status,void *theBase, int32_t num, int32_t width, Func fn, int32_t,int32_t)
343         :backup(theBase),num(num),width(width),fn(fn){
344             base = malloc(num * width);
345             time_empty(100, &status); // warm memory/cache
346         }
347 
~CmdQsortCmdQsort348         ~CmdQsort(){
349             free(base);
350         }
351 
empty_callCmdQsort352         void empty_call(){
353             exec_count = 0;
354             memcpy(base, backup, num * width);
355         }
356 
time_emptyCmdQsort357         double time_empty(int32_t n, UErrorCode* status) {
358             UTimer start, stop;
359             utimer_getTime(&start);
360             while (n-- > 0) {
361                 empty_call();
362             }
363             utimer_getTime(&stop);
364             return utimer_getDeltaSeconds(&start,&stop); // ms
365         }
366 
callCmdQsort367         virtual void call(UErrorCode* status){
368             exec_count = 0;
369             memcpy(base, backup, num * width);
370             qsort(base, num, width, fn);
371         }
timeCmdQsort372         virtual double time(int32_t n, UErrorCode* status) {
373             double t1 = time_empty(n,status);
374             double t2 = UPerfFunction::time(n, status);
375             return  t2-t1;// < 0 ? t2 : t2-t1;
376         }
377 
getOperationsPerIterationCmdQsort378         virtual long getOperationsPerIteration(){ return exec_count?exec_count:1;}
379 };
380 int32_t CmdQsort::exec_count;
381 
382 
383 class CmdBinSearch : public UPerfFunction{
384 public:
385     typedef	int (CmdBinSearch::* Func)(int, int);
386 
387     UCollator * col;
388     DWORD       win_langid;
389     int32_t     count;
390     DataIndex * rnd;
391     DataIndex * ord;
392     Func 	    fn;
393     int32_t     exec_count;
394 
CmdBinSearch(UErrorCode,UCollator * col,DWORD win_langid,int32_t count,DataIndex * rnd,DataIndex * ord,Func fn)395     CmdBinSearch(UErrorCode, UCollator * col,DWORD win_langid,int32_t count,DataIndex * rnd,DataIndex * ord,Func fn)
396         :col(col),win_langid(win_langid), count(count), rnd(rnd), ord(ord), fn(fn),exec_count(0){}
397 
398 
call(UErrorCode * status)399         virtual void call(UErrorCode* status){
400             exec_count = 0;
401             for(int32_t i = 0; i< count; i++){ // search all data
402                 binary_search(i);
403             }
404         }
getOperationsPerIteration()405         virtual long getOperationsPerIteration(){ return exec_count?exec_count:1;}
406 
binary_search(int32_t random)407         void binary_search(int32_t random)	{
408             int low   = 0;
409             int high  = count - 1;
410             int guess;
411             int last_guess = -1;
412             int r;
413             while (TRUE) {
414                 guess = (high + low)/2;
415                 if (last_guess == guess) break; // nothing to search
416 
417                 r = (this->*fn)(random, guess);
418                 exec_count++;
419 
420                 if (r == 0)
421                     return;	// found, search end.
422                 if (r < 0) {
423                     high = guess;
424                 } else {
425                     low  = guess;
426                 }
427                 last_guess = guess;
428             }
429         }
430 
icu_strcoll_null(int32_t i,int32_t j)431         int icu_strcoll_null(int32_t i, int32_t j){
432             return ucol_strcoll(col, rnd[i].icu_data, -1, ord[j].icu_data,-1);
433         }
434 
icu_strcoll_len(int32_t i,int32_t j)435         int icu_strcoll_len(int32_t i, int32_t j){
436             return ucol_strcoll(col, rnd[i].icu_data, rnd[i].icu_data_len, ord[j].icu_data, ord[j].icu_data_len);
437         }
438 
icu_cmpkey(int32_t i,int32_t j)439         int icu_cmpkey(int32_t i, int32_t j) {
440             return strcmp( (char *) rnd[i].icu_key, (char *) ord[j].icu_key );
441         }
442 
win_cmp_null(int32_t i,int32_t j)443         int win_cmp_null(int32_t i, int32_t j) {
444             int t = CompareStringW(win_langid, 0, rnd[i].win_data, -1, ord[j].win_data, -1);
445             if (t == 0){
446                 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
447                 exit(-1);
448             } else{
449                 return t - CSTR_EQUAL;
450             }
451         }
452 
win_cmp_len(int32_t i,int32_t j)453         int win_cmp_len(int32_t i, int32_t j) {
454             int t = CompareStringW(win_langid, 0, rnd[i].win_data, rnd[i].win_data_len, ord[j].win_data, ord[j].win_data_len);
455             if (t == 0){
456                 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
457                 exit(-1);
458             } else{
459                 return t - CSTR_EQUAL;
460             }
461         }
462 
463 #define BFUNC(name, func, data) \
464     int name(int32_t i, int32_t j) { \
465     return func(rnd[i].data, ord[j].data); \
466     }
467 
468         BFUNC(posix_strcoll_null, strcoll, posix_data)
469             BFUNC(posix_cmpkey, strcmp, posix_key)
470             BFUNC(win_cmpkey, strcmp, win_key)
471             BFUNC(win_wcscmp, wcscmp, win_data)
472             BFUNC(icu_strcmp, u_strcmp, icu_data)
473             BFUNC(icu_cmpcpo, u_strcmpCodePointOrder, icu_data)
474 };
475 
476 class CollPerfTest : public UPerfTest {
477 public:
478     UCollator *     col;
479     DWORD           win_langid;
480 
481     UChar * icu_data_all;
482     int32_t icu_data_all_len;
483 
484     int32_t         count;
485     CA_uchar *      icu_data;
486     CA_uint8 *      icu_key;
487     CA_char *       posix_data;
488     CA_char *       posix_key;
489     CA_win_wchar *  win_data;
490     CA_char *       win_key;
491 
492     DataIndex * rnd_index; // random by icu key
493     DataIndex * ord_win_data;
494     DataIndex * ord_win_key;
495     DataIndex * ord_posix_data;
496     DataIndex * ord_posix_key;
497     DataIndex * ord_icu_data;
498     DataIndex * ord_icu_key;
499     DataIndex * ord_win_wcscmp;
500     DataIndex * ord_icu_strcmp;
501     DataIndex * ord_icu_cmpcpo;
502 
~CollPerfTest()503     virtual ~CollPerfTest(){
504         ucol_close(col);
505         delete [] icu_data_all;
506         delete icu_data;
507         delete icu_key;
508         delete posix_data;
509         delete posix_key;
510         delete win_data;
511         delete win_key;
512         delete[] rnd_index;
513         delete[] ord_win_data;
514         delete[] ord_win_key;
515         delete[] ord_posix_data;
516         delete[] ord_posix_key;
517         delete[] ord_icu_data;
518         delete[] ord_icu_key;
519         delete[] ord_win_wcscmp;
520         delete[] ord_icu_strcmp;
521         delete[] ord_icu_cmpcpo;
522     }
523 
CollPerfTest(int32_t argc,const char * argv[],UErrorCode & status)524     CollPerfTest(int32_t argc, const char* argv[], UErrorCode& status):UPerfTest(argc, argv, status){
525         col = NULL;
526         icu_data_all = NULL;
527         icu_data = NULL;
528         icu_key = NULL;
529         posix_data = NULL;
530         posix_key = NULL;
531         win_data =NULL;
532         win_key = NULL;
533 
534         rnd_index = NULL;
535         ord_win_data= NULL;
536         ord_win_key= NULL;
537         ord_posix_data= NULL;
538         ord_posix_key= NULL;
539         ord_icu_data= NULL;
540         ord_icu_key= NULL;
541         ord_win_wcscmp = NULL;
542         ord_icu_strcmp = NULL;
543         ord_icu_cmpcpo = NULL;
544 
545         if (U_FAILURE(status)){
546             return;
547         }
548 
549         // Parse additional arguments
550 
551         UOption options[] = {
552             UOPTION_DEF("langid", 'i', UOPT_REQUIRES_ARG),        // Windows Language ID number.
553                 UOPTION_DEF("rulefile", 'r', UOPT_REQUIRES_ARG),      // --rulefile <filename>
554                 // Collation related arguments. All are optional.
555                 // To simplify parsing, two choice arguments are disigned as NO_ARG.
556                 // The default value is UPPER word in the comment
557                 UOPTION_DEF("c_french", 'f', UOPT_NO_ARG),          // --french <on | OFF>
558                 UOPTION_DEF("c_alternate", 'a', UOPT_NO_ARG),       // --alternate <NON_IGNORE | shifted>
559                 UOPTION_DEF("c_casefirst", 'c', UOPT_REQUIRES_ARG), // --casefirst <lower | upper | OFF>
560                 UOPTION_DEF("c_caselevel", 'l', UOPT_NO_ARG),       // --caselevel <on | OFF>
561                 UOPTION_DEF("c_normal", 'n', UOPT_NO_ARG),          // --normal <on | OFF>
562                 UOPTION_DEF("c_strength", 's', UOPT_REQUIRES_ARG),  // --strength <1-5>
563         };
564         int32_t opt_len = (sizeof(options)/sizeof(options[0]));
565         enum {i, r,f,a,c,l,n,s};   // The buffer between the option items' order and their references
566 
567         _remainingArgc = u_parseArgs(_remainingArgc, (char**)argv, opt_len, options);
568 
569         if (_remainingArgc < 0){
570             status = U_ILLEGAL_ARGUMENT_ERROR;
571             return;
572         }
573 
574         if (locale == NULL){
575             locale = "en_US";   // set default locale
576         }
577 
578         //#ifdef U_WINDOWS
579         if (options[i].doesOccur) {
580             char *endp;
581             int tmp = strtol(options[i].value, &endp, 0);
582             if (endp == options[i].value) {
583                 status = U_ILLEGAL_ARGUMENT_ERROR;
584                 return;
585             }
586             win_langid = MAKELCID(tmp, SORT_DEFAULT);
587         } else {
588             win_langid = uloc_getLCID(locale);
589         }
590         //#endif
591 
592         //  Set up an ICU collator
593         if (options[r].doesOccur) {
594             // TODO: implement it
595         } else {
596             col = ucol_open(locale, &status);
597             if (U_FAILURE(status)) {
598                 return;
599             }
600         }
601 
602         if (options[f].doesOccur) {
603             ucol_setAttribute(col, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
604         } else {
605             ucol_setAttribute(col, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
606         }
607 
608         if (options[a].doesOccur) {
609             ucol_setAttribute(col, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
610         }
611 
612         if (options[c].doesOccur) { // strcmp() has i18n encoding problem
613             if (strcmp("lower", options[c].value) == 0){
614                 ucol_setAttribute(col, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, &status);
615             } else if (strcmp("upper", options[c].value) == 0) {
616                 ucol_setAttribute(col, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, &status);
617             } else {
618                 status = U_ILLEGAL_ARGUMENT_ERROR;
619                 return;
620             }
621         }
622 
623         if (options[l].doesOccur){
624             ucol_setAttribute(col, UCOL_CASE_LEVEL, UCOL_ON, &status);
625         }
626 
627         if (options[n].doesOccur){
628             ucol_setAttribute(col, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
629         }
630 
631         if (options[s].doesOccur) {
632             char *endp;
633             int tmp = strtol(options[l].value, &endp, 0);
634             if (endp == options[l].value) {
635                 status = U_ILLEGAL_ARGUMENT_ERROR;
636                 return;
637             }
638             switch (tmp) {
639             case 1:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_PRIMARY, &status);		break;
640             case 2:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_SECONDARY, &status);		break;
641             case 3:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_TERTIARY, &status);		break;
642             case 4:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_QUATERNARY, &status);	break;
643             case 5:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_IDENTICAL, &status);		break;
644             default: status = U_ILLEGAL_ARGUMENT_ERROR;					return;
645             }
646         }
647         prepareData(status);
648     }
649 
650     //to avoid use the annoying 'id' in TESTCASE(id,test) macro or the like
651 #define TEST(testname, classname, arg1, arg2, arg3, arg4, arg5, arg6) \
652     if(temp == index) {\
653     name = #testname;\
654     if (exec) {\
655     UErrorCode status = U_ZERO_ERROR;\
656     UPerfFunction * t = new classname(status,arg1, arg2, arg3, arg4, arg5, arg6);\
657     if (U_FAILURE(status)) {\
658     delete t;\
659     return NULL;\
660     } else {\
661     return t;\
662     }\
663     } else {\
664     return NULL;\
665     }\
666     }\
667     temp++\
668 
669 
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par=NULL)670     virtual UPerfFunction* runIndexedTest( /*[in]*/int32_t index, /*[in]*/UBool exec, /*[out]*/const char* &name, /*[in]*/ char* par = NULL ){
671         int temp = 0;
672 
673 #define TEST_KEYGEN(testname, func)\
674     TEST(testname, CmdKeyGen, col, win_langid, count, rnd_index, &CmdKeyGen::func, 0)
675         TEST_KEYGEN(TestIcu_KeyGen_null, icu_key_null);
676         TEST_KEYGEN(TestIcu_KeyGen_len,  icu_key_len);
677         TEST_KEYGEN(TestPosix_KeyGen_null, posix_key_null);
678         TEST_KEYGEN(TestWin_KeyGen_null, win_key_null);
679         TEST_KEYGEN(TestWin_KeyGen_len, win_key_len);
680 
681 #define TEST_ITER(testname, func)\
682     TEST(testname, CmdIter, col, count, icu_data, &CmdIter::func,0,0)
683         TEST_ITER(TestIcu_ForwardIter_null, icu_forward_null);
684         TEST_ITER(TestIcu_ForwardIter_len, icu_forward_len);
685         TEST_ITER(TestIcu_BackwardIter_null, icu_backward_null);
686         TEST_ITER(TestIcu_BackwardIter_len, icu_backward_len);
687 
688 #define TEST_ITER_ALL(testname, func)\
689     TEST(testname, CmdIterAll, col, icu_data_all_len, icu_data_all, CmdIterAll::func,0,0)
690         TEST_ITER_ALL(TestIcu_ForwardIter_all_null, forward_null);
691         TEST_ITER_ALL(TestIcu_ForwardIter_all_len, forward_len);
692         TEST_ITER_ALL(TestIcu_BackwardIter_all_null, backward_null);
693         TEST_ITER_ALL(TestIcu_BackwardIter_all_len, backward_len);
694 
695 #define TEST_QSORT(testname, func)\
696     TEST(testname, CmdQsort, rnd_index, count, sizeof(DataIndex), CmdQsort::func,0,0)
697         TEST_QSORT(TestIcu_qsort_strcoll_null, icu_strcoll_null);
698         TEST_QSORT(TestIcu_qsort_strcoll_len, icu_strcoll_len);
699         TEST_QSORT(TestIcu_qsort_usekey, icu_cmpkey);
700         TEST_QSORT(TestPosix_qsort_strcoll_null, posix_strcoll_null);
701         TEST_QSORT(TestPosix_qsort_usekey, posix_cmpkey);
702         TEST_QSORT(TestWin_qsort_CompareStringW_null, win_cmp_null);
703         TEST_QSORT(TestWin_qsort_CompareStringW_len, win_cmp_len);
704         TEST_QSORT(TestWin_qsort_usekey, win_cmpkey);
705 
706 #define TEST_BIN(testname, func)\
707     TEST(testname, CmdBinSearch, col, win_langid, count, rnd_index, ord_icu_key, &CmdBinSearch::func)
708         TEST_BIN(TestIcu_BinarySearch_strcoll_null, icu_strcoll_null);
709         TEST_BIN(TestIcu_BinarySearch_strcoll_len, icu_strcoll_len);
710         TEST_BIN(TestIcu_BinarySearch_usekey, icu_cmpkey);
711         TEST_BIN(TestIcu_BinarySearch_strcmp, icu_strcmp);
712         TEST_BIN(TestIcu_BinarySearch_cmpCPO, icu_cmpcpo);
713         TEST_BIN(TestPosix_BinarySearch_strcoll_null, posix_strcoll_null);
714         TEST_BIN(TestPosix_BinarySearch_usekey, posix_cmpkey);
715         TEST_BIN(TestWin_BinarySearch_CompareStringW_null, win_cmp_null);
716         TEST_BIN(TestWin_BinarySearch_CompareStringW_len, win_cmp_len);
717         TEST_BIN(TestWin_BinarySearch_usekey, win_cmpkey);
718         TEST_BIN(TestWin_BinarySearch_wcscmp, win_wcscmp);
719 
720         name="";
721         return NULL;
722     }
723 
724 
725 
prepareData(UErrorCode & status)726     void prepareData(UErrorCode& status){
727         if(U_FAILURE(status)) return;
728         if (icu_data) return; // prepared
729 
730         icu_data = new CA_uchar();
731 
732         // Following code is borrowed from UPerfTest::getLines();
733         const UChar*    line=NULL;
734         int32_t         len =0;
735         for (;;) {
736             line = ucbuf_readline(ucharBuf,&len,&status);
737             if(line == NULL || U_FAILURE(status)){break;}
738 
739             // Refer to the source code of ucbuf_readline()
740             // 1. 'len' includs the line terminal symbols
741             // 2. The length of the line terminal symbols is only one character
742             // 3. The Windows CR LF line terminal symbols will be converted to CR
743 
744             if (len == 1) {
745                 continue; //skip empty line
746             } else {
747                 icu_data->append_one(len);
748                 memcpy(icu_data->last(), line, len * sizeof(UChar));
749                 icu_data->last()[len -1] = NULL;
750             }
751         }
752         if(U_FAILURE(status)) return;
753 
754         // UTF-16 -> UTF-8 conversion.
755         UConverter   *conv = ucnv_open("utf-8", &status); // just UTF-8 for now.
756         if (U_FAILURE(status)) return;
757 
758         count = icu_data->count;
759 
760         icu_data_all_len =  icu_data->index[count]; // includes all NULLs
761         icu_data_all_len -= count;  // excludes all NULLs
762         icu_data_all_len += 1;      // the terminal NULL
763         icu_data_all = new UChar[icu_data_all_len];
764         icu_data_all[icu_data_all_len - 1] = 0; //the terminal NULL
765 
766         icu_key  = new CA_uint8;
767         win_data = new CA_win_wchar;
768         win_key  = new CA_char;
769         posix_data = new CA_char;
770         posix_key = new CA_char;
771         rnd_index = new DataIndex[count];
772         DataIndex::win_langid = win_langid;
773         DataIndex::col        = col;
774 
775 
776         UChar * p = icu_data_all;
777         int32_t s;
778         int32_t t;
779         for (int i=0; i < count; i++) {
780             // ICU all data
781             s = sizeof(UChar) * icu_data->lengthOf(i);
782             memcpy(p, icu_data->dataOf(i), s);
783             p += icu_data->lengthOf(i);
784 
785             // ICU data
786 
787             // ICU key
788             s = ucol_getSortKey(col, icu_data->dataOf(i), -1,NULL, 0);
789             icu_key->append_one(s);
790             t = ucol_getSortKey(col, icu_data->dataOf(i), -1,icu_key->last(), s);
791             if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
792 
793             // POSIX data
794             s = ucnv_fromUChars(conv,NULL, 0, icu_data->dataOf(i), icu_data->lengthOf(i), &status);
795             if (status == U_BUFFER_OVERFLOW_ERROR || status == U_ZERO_ERROR){
796                 status = U_ZERO_ERROR;
797             } else {
798                 return;
799             }
800             posix_data->append_one(s + 1); // plus terminal NULL
801             t = ucnv_fromUChars(conv,posix_data->last(), s, icu_data->dataOf(i), icu_data->lengthOf(i), &status);
802             if (U_FAILURE(status)) return;
803             if ( t != s){status = U_INVALID_FORMAT_ERROR;return;}
804             posix_data->last()[s] = 0;
805 
806             // POSIX key
807             s = strxfrm(NULL, posix_data->dataOf(i), 0);
808             if (s == INT_MAX){status = U_INVALID_FORMAT_ERROR;return;}
809             posix_key->append_one(s);
810             t = strxfrm(posix_key->last(), posix_data->dataOf(i), s);
811             if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
812 
813             // Win data
814             s = icu_data->lengthOf(i) + 1; // plus terminal NULL
815             win_data->append_one(s);
816             memcpy(win_data->last(), icu_data->dataOf(i), sizeof(WCHAR) * s);
817 
818             // Win key
819             s = LCMapStringW(win_langid, LCMAP_SORTKEY, win_data->dataOf(i), win_data->lengthOf(i), NULL,0);
820             if (s == 0) {status = U_INVALID_FORMAT_ERROR;return;}
821             win_key->append_one(s);
822             t = LCMapStringW(win_langid, LCMAP_SORTKEY, win_data->dataOf(i), win_data->lengthOf(i), (WCHAR *)(win_key->last()),s);
823             if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
824 
825         };
826 
827         // append_one() will make points shifting, should not merge following code into previous iteration
828         for (int i=0; i < count; i++) {
829             rnd_index[i].icu_key = icu_key->dataOf(i);
830             rnd_index[i].icu_data = icu_data->dataOf(i);
831             rnd_index[i].icu_data_len = icu_data->lengthOf(i);
832             rnd_index[i].posix_key = posix_key->last();
833             rnd_index[i].posix_data = posix_data->dataOf(i);
834             rnd_index[i].posix_data_len = posix_data->lengthOf(i);
835             rnd_index[i].win_key = win_key->dataOf(i);
836             rnd_index[i].win_data = win_data->dataOf(i);
837             rnd_index[i].win_data_len = win_data->lengthOf(i);
838         };
839 
840         ucnv_close(conv);
841         qsort(rnd_index, count, sizeof(DataIndex), CmdQsort::q_random);
842 
843 #define SORT(data, func) \
844     data = new DataIndex[count];\
845     memcpy(data, rnd_index, count * sizeof(DataIndex));\
846     qsort(data, count, sizeof(DataIndex), CmdQsort::func)
847 
848         SORT(ord_icu_data, icu_strcoll_len);
849         SORT(ord_icu_key, icu_cmpkey);
850         SORT(ord_posix_data, posix_strcoll_null);
851         SORT(ord_posix_key, posix_cmpkey);
852         SORT(ord_win_data, win_cmp_len);
853         SORT(ord_win_key, win_cmpkey);
854         SORT(ord_win_wcscmp, win_wcscmp);
855         SORT(ord_icu_strcmp, icu_strcmp);
856         SORT(ord_icu_cmpcpo, icu_cmpcpo);
857     }
858 };
859 
860 
main(int argc,const char * argv[])861 int main(int argc, const char *argv[])
862 {
863 
864     UErrorCode status = U_ZERO_ERROR;
865     CollPerfTest test(argc, argv, status);
866 
867     if (U_FAILURE(status)){
868         printf("The error is %s\n", u_errorName(status));
869         //TODO: print usage here
870         return status;
871     }
872 
873     if (test.run() == FALSE){
874         fprintf(stderr, "FAILED: Tests could not be run please check the "
875             "arguments.\n");
876         return -1;
877     }
878     return 0;
879 }
880 
881