• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /***********************************************************************
2 * © 2016 and later: Unicode, Inc. and others.
3 * License & terms of use: http://www.unicode.org/copyright.html
4 *
5 ***********************************************************************
6 ***********************************************************************
7 * COPYRIGHT:
8 * Copyright (C) 2001-2016 IBM, Inc.   All Rights Reserved.
9 *
10 ***********************************************************************/
11 
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <locale.h>
15 #include <limits.h>
16 #include <string.h>
17 #include "cmemory.h"
18 #include "unicode/uperf.h"
19 #include "uoptions.h"
20 #include "unicode/coll.h"
21 #include <unicode/ucoleitr.h>
22 
23 #if !U_PLATFORM_HAS_WIN32_API
24 #define DWORD uint32_t
25 #define WCHAR wchar_t
26 #endif
27 
28 /* To store an array of string<UNIT> in continue space.
29 Since string<UNIT> itself is treated as an array of UNIT, this
30 class will ease our memory management for an array of string<UNIT>.
31 */
32 
33 //template<typename UNIT>
34 #define COMPATCT_ARRAY(CompactArrays, UNIT) \
35 struct CompactArrays{\
36     CompactArrays(const CompactArrays & );\
37     CompactArrays & operator=(const CompactArrays & );\
38     int32_t   count;/*total number of the strings*/ \
39     int32_t * index;/*relative offset in data*/ \
40     UNIT    * data; /*the real space to hold strings*/ \
41     \
42     ~CompactArrays(){free(index);free(data);} \
43     CompactArrays():count(0), index(nullptr), data(nullptr){ \
44     index = (int32_t *) realloc(index, sizeof(int32_t)); \
45     index[0] = 0; \
46     } \
47     void append_one(int32_t theLen){ /*include terminal NUL*/ \
48     count++; \
49     index = (int32_t *) realloc(index, sizeof(int32_t) * (count + 1)); \
50     index[count] = index[count - 1] + theLen; \
51     data = (UNIT *) realloc(data, sizeof(UNIT) * index[count]); \
52     } \
53     UNIT * last(){return data + index[count - 1];} \
54     UNIT * dataOf(int32_t i){return data + index[i];} \
55     int32_t lengthOf(int i){return index[i+1] - index[i] - 1; }	/*exclude terminating NUL*/  \
56 };
57 
58 //typedef CompactArrays<char16_t> CA_uchar;
59 //typedef CompactArrays<char> CA_char;
60 //typedef CompactArrays<uint8_t> CA_uint8;
61 //typedef CompactArrays<WCHAR> CA_win_wchar;
62 
63 COMPATCT_ARRAY(CA_uchar, char16_t)
64 COMPATCT_ARRAY(CA_char, char)
65 COMPATCT_ARRAY(CA_uint8, uint8_t)
66 COMPATCT_ARRAY(CA_win_wchar, WCHAR)
67 
68 
69 struct DataIndex {
70     static DWORD        win_langid;     // for qsort callback function
71     static UCollator *  col;            // for qsort callback function
72     uint8_t *   icu_key;
73     char16_t *     icu_data;
74     int32_t     icu_data_len;
75     char*       posix_key;
76     char*       posix_data;
77     int32_t     posix_data_len;
78     char*       win_key;
79     WCHAR *     win_data;
80     int32_t     win_data_len;
81 };
82 DWORD DataIndex::win_langid;
83 UCollator * DataIndex::col;
84 
85 
86 
87 class CmdKeyGen : public UPerfFunction {
88     typedef	void (CmdKeyGen::* Func)(int32_t);
89     enum{MAX_KEY_LENGTH = 5000};
90     UCollator * col;
91     DWORD       win_langid;
92     int32_t     count;
93     DataIndex * data;
94     Func 	    fn;
95 
96     union { // to save sapce
97         uint8_t		icu_key[MAX_KEY_LENGTH];
98         char        posix_key[MAX_KEY_LENGTH];
99         WCHAR		win_key[MAX_KEY_LENGTH];
100     };
101 public:
CmdKeyGen(UErrorCode,UCollator * col,DWORD win_langid,int32_t count,DataIndex * data,Func fn,int32_t)102     CmdKeyGen(UErrorCode, UCollator * col,DWORD win_langid, int32_t count, DataIndex * data,Func fn,int32_t)
103         :col(col),win_langid(win_langid), count(count), data(data), fn(fn){}
104 
getOperationsPerIteration()105         long getOperationsPerIteration() override { return count; }
106 
call(UErrorCode * status)107         void call(UErrorCode* status) override {
108             for(int32_t i = 0; i< count; i++){
109                 (this->*fn)(i);
110             }
111         }
112 
icu_key_null(int32_t i)113         void icu_key_null(int32_t i){
114             ucol_getSortKey(col, data[i].icu_data, -1, icu_key, MAX_KEY_LENGTH);
115         }
116 
icu_key_len(int32_t i)117         void icu_key_len(int32_t i){
118             ucol_getSortKey(col, data[i].icu_data, data[i].icu_data_len, icu_key, MAX_KEY_LENGTH);
119         }
120 
121 #if U_PLATFORM_HAS_WIN32_API
122         // pre-generated in CollPerfTest::prepareData(), need not to check error here
win_key_null(int32_t i)123         void win_key_null(int32_t i){
124             //LCMAP_SORTsk             0x00000400  // WC sort sk (normalize)
125             LCMapStringW(win_langid, LCMAP_SORTKEY, data[i].win_data, -1, win_key, MAX_KEY_LENGTH);
126         }
127 
win_key_len(int32_t i)128         void win_key_len(int32_t i){
129             LCMapStringW(win_langid, LCMAP_SORTKEY, data[i].win_data, data[i].win_data_len, win_key, MAX_KEY_LENGTH);
130         }
131 #endif
132 
posix_key_null(int32_t i)133         void posix_key_null(int32_t i){
134             strxfrm(posix_key, data[i].posix_data, MAX_KEY_LENGTH);
135         }
136 };
137 
138 
139 class CmdIter : public UPerfFunction {
140     typedef	void (CmdIter::* Func)(UErrorCode* , int32_t );
141     int32_t             count;
142     CA_uchar *          data;
143     Func                fn;
144     UCollationElements *iter;
145     int32_t             exec_count;
146 public:
CmdIter(UErrorCode & status,UCollator * col,int32_t count,CA_uchar * data,Func fn,int32_t,int32_t)147     CmdIter(UErrorCode & status, UCollator * col, int32_t count, CA_uchar *data, Func fn, int32_t,int32_t)
148         :count(count), data(data), fn(fn){
149             exec_count = 0;
150             char16_t dummytext[] = {0, 0};
151             iter = ucol_openElements(col, nullptr, 0, &status);
152             ucol_setText(iter, dummytext, 1, &status);
153         }
~CmdIter()154         ~CmdIter(){
155             ucol_closeElements(iter);
156         }
157 
getOperationsPerIteration()158         long getOperationsPerIteration() override { return exec_count ? exec_count : 1; }
159 
call(UErrorCode * status)160         void call(UErrorCode* status) override {
161             exec_count = 0;
162             for(int32_t i = 0; i< count; i++){
163                 (this->*fn)(status, i);
164             }
165         }
166 
icu_forward_null(UErrorCode * status,int32_t i)167         void icu_forward_null(UErrorCode* status, int32_t i){
168             ucol_setText(iter, data->dataOf(i), -1, status);
169             while (ucol_next(iter, status) != UCOL_NULLORDER) exec_count++;
170         }
171 
icu_forward_len(UErrorCode * status,int32_t i)172         void icu_forward_len(UErrorCode* status, int32_t i){
173             ucol_setText(iter, data->dataOf(i), data->lengthOf(i) , status);
174             while (ucol_next(iter, status) != UCOL_NULLORDER) exec_count++;
175         }
176 
icu_backward_null(UErrorCode * status,int32_t i)177         void icu_backward_null(UErrorCode* status, int32_t i){
178             ucol_setText(iter, data->dataOf(i), -1, status);
179             while (ucol_previous(iter, status) != UCOL_NULLORDER) exec_count++;
180         }
181 
icu_backward_len(UErrorCode * status,int32_t i)182         void icu_backward_len(UErrorCode* status, int32_t i){
183             ucol_setText(iter, data->dataOf(i), data->lengthOf(i) , status);
184             while (ucol_previous(iter, status) != UCOL_NULLORDER) exec_count++;
185         }
186 };
187 
188 class CmdIterAll : public UPerfFunction {
189     typedef	void (CmdIterAll::* Func)(UErrorCode* status);
190     int32_t     count;
191     Func        fn;
192     UCollationElements *iter;
193     int32_t     exec_count;
194 
195 public:
196     enum CALL {forward_null, forward_len, backward_null, backward_len};
197 
~CmdIterAll()198     ~CmdIterAll(){
199         ucol_closeElements(iter);
200     }
CmdIterAll(UErrorCode & status,UCollator * col,int32_t count,char16_t * data,CALL call,int32_t,int32_t)201     CmdIterAll(UErrorCode & status, UCollator * col, int32_t count,  char16_t * data, CALL call,int32_t,int32_t)
202         :count(count)
203     {
204         exec_count = 0;
205         if (call == forward_null || call == backward_null) {
206             iter = ucol_openElements(col, data, -1, &status);
207         } else {
208             iter = ucol_openElements(col, data, count, &status);
209         }
210 
211         if (call == forward_null || call == forward_len){
212             fn = &CmdIterAll::icu_forward_all;
213         } else {
214             fn = &CmdIterAll::icu_backward_all;
215         }
216     }
getOperationsPerIteration()217     long getOperationsPerIteration() override { return exec_count ? exec_count : 1; }
218 
call(UErrorCode * status)219     void call(UErrorCode* status) override {
220         (this->*fn)(status);
221     }
222 
icu_forward_all(UErrorCode * status)223     void icu_forward_all(UErrorCode* status){
224         int strlen = count - 5;
225         int count5 = 5;
226         int strindex = 0;
227         ucol_setOffset(iter, strindex, status);
228         while (true) {
229             if (ucol_next(iter, status) == UCOL_NULLORDER) {
230                 break;
231             }
232             exec_count++;
233             count5 --;
234             if (count5 == 0) {
235                 strindex += 10;
236                 if (strindex > strlen) {
237                     break;
238                 }
239                 ucol_setOffset(iter, strindex, status);
240                 count5 = 5;
241             }
242         }
243     }
244 
icu_backward_all(UErrorCode * status)245     void icu_backward_all(UErrorCode* status){
246         int strlen = count;
247         int count5 = 5;
248         int strindex = 5;
249         ucol_setOffset(iter, strindex, status);
250         while (true) {
251             if (ucol_previous(iter, status) == UCOL_NULLORDER) {
252                 break;
253             }
254             exec_count++;
255             count5 --;
256             if (count5 == 0) {
257                 strindex += 10;
258                 if (strindex > strlen) {
259                     break;
260                 }
261                 ucol_setOffset(iter, strindex, status);
262                 count5 = 5;
263             }
264         }
265     }
266 
267 };
268 
269 struct CmdQsort : public UPerfFunction{
270 
q_randomCmdQsort271     static int q_random(const void * a, const void * b){
272         uint8_t * key_a = ((DataIndex *)a)->icu_key;
273         uint8_t * key_b = ((DataIndex *)b)->icu_key;
274 
275         int   val_a = 0;
276         int   val_b = 0;
277         while (*key_a != 0) {val_a += val_a*37 + *key_a++;}
278         while (*key_b != 0) {val_b += val_b*37 + *key_b++;}
279         return val_a - val_b;
280     }
281 
282 #define QCAST() \
283     DataIndex * da = (DataIndex *) a; \
284     DataIndex * db = (DataIndex *) b; \
285     ++exec_count
286 
icu_strcoll_nullCmdQsort287     static int icu_strcoll_null(const void *a, const void *b){
288         QCAST();
289         return ucol_strcoll(da->col, da->icu_data, -1, db->icu_data, -1) - UCOL_EQUAL;
290     }
291 
icu_strcoll_lenCmdQsort292     static int icu_strcoll_len(const void *a, const void *b){
293         QCAST();
294         return ucol_strcoll(da->col, da->icu_data, da->icu_data_len, db->icu_data, db->icu_data_len) - UCOL_EQUAL;
295     }
296 
icu_cmpkeyCmdQsort297     static int icu_cmpkey (const void *a, const void *b){
298         QCAST();
299         return strcmp(reinterpret_cast<char*>(da->icu_key), reinterpret_cast<char*>(db->icu_key));
300     }
301 
302 #if U_PLATFORM_HAS_WIN32_API
win_cmp_nullCmdQsort303     static int win_cmp_null(const void *a, const void *b) {
304         QCAST();
305         //CSTR_LESS_THAN		1
306         //CSTR_EQUAL			2
307         //CSTR_GREATER_THAN		3
308         int t = CompareStringW(da->win_langid, 0, da->win_data, -1, db->win_data, -1);
309         if (t == 0){
310             fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
311             exit(-1);
312         } else{
313             return t - CSTR_EQUAL;
314         }
315     }
316 
win_cmp_lenCmdQsort317     static int win_cmp_len(const void *a, const void *b) {
318         QCAST();
319         int t = CompareStringW(da->win_langid, 0, da->win_data, da->win_data_len, db->win_data, db->win_data_len);
320         if (t == 0){
321             fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
322             exit(-1);
323         } else{
324             return t - CSTR_EQUAL;
325         }
326     }
327 #endif
328 
329 #define QFUNC(name, func, data) \
330     static int name (const void *a, const void *b){ \
331     QCAST(); \
332     return func(da->data, db->data); \
333     }
334 
335     QFUNC(posix_strcoll_null, strcoll, posix_data)
336         QFUNC(posix_cmpkey, strcmp, posix_key)
337 #if U_PLATFORM_HAS_WIN32_API
338         QFUNC(win_cmpkey, strcmp, win_key)
339         QFUNC(win_wcscmp, wcscmp, win_data)
340 #endif
341         QFUNC(icu_strcmp, u_strcmp, icu_data)
342         QFUNC(icu_cmpcpo, u_strcmpCodePointOrder, icu_data)
343 
344 private:
345     static int32_t exec_count; // potential muilt-thread problem
346 
347     typedef	int (* Func)(const void *, const void *);
348 
349     Func    fn;
350     void *  base;   //Start of target array.
351     int32_t num;    //Array size in elements.
352     int32_t width;  //Element size in bytes.
353 
354     void *  backup; //copy source of base
355 public:
CmdQsortCmdQsort356     CmdQsort(UErrorCode & status,void *theBase, int32_t num, int32_t width, Func fn, int32_t,int32_t)
357         :fn(fn),num(num),width(width),backup(theBase){
358             base = malloc(num * width);
359             time_empty(100, &status); // warm memory/cache
360         }
361 
~CmdQsortCmdQsort362         ~CmdQsort(){
363             free(base);
364         }
365 
empty_callCmdQsort366         void empty_call(){
367             exec_count = 0;
368             memcpy(base, backup, num * width);
369         }
370 
time_emptyCmdQsort371         double time_empty(int32_t n, UErrorCode* status) {
372             UTimer start, stop;
373             utimer_getTime(&start);
374             while (n-- > 0) {
375                 empty_call();
376             }
377             utimer_getTime(&stop);
378             return utimer_getDeltaSeconds(&start,&stop); // ms
379         }
380 
callCmdQsort381         void call(UErrorCode* status) override {
382             exec_count = 0;
383             memcpy(base, backup, num * width);
384             qsort(base, num, width, fn);
385         }
timeCmdQsort386         double time(int32_t n, UErrorCode* status) override {
387             double t1 = time_empty(n,status);
388             double t2 = UPerfFunction::time(n, status);
389             return  t2-t1;// < 0 ? t2 : t2-t1;
390         }
391 
getOperationsPerIterationCmdQsort392         long getOperationsPerIteration() override { return exec_count ? exec_count : 1; }
393 };
394 int32_t CmdQsort::exec_count;
395 
396 
397 class CmdBinSearch : public UPerfFunction{
398 public:
399     typedef	int (CmdBinSearch::* Func)(int, int);
400 
401     UCollator * col;
402     DWORD       win_langid;
403     int32_t     count;
404     DataIndex * rnd;
405     DataIndex * ord;
406     Func 	    fn;
407     int32_t     exec_count;
408 
CmdBinSearch(UErrorCode,UCollator * col,DWORD win_langid,int32_t count,DataIndex * rnd,DataIndex * ord,Func fn)409     CmdBinSearch(UErrorCode, UCollator * col,DWORD win_langid,int32_t count,DataIndex * rnd,DataIndex * ord,Func fn)
410         :col(col),win_langid(win_langid), count(count), rnd(rnd), ord(ord), fn(fn),exec_count(0){}
411 
412 
call(UErrorCode * status)413         void call(UErrorCode* status) override {
414             exec_count = 0;
415             for(int32_t i = 0; i< count; i++){ // search all data
416                 binary_search(i);
417             }
418         }
getOperationsPerIteration()419         long getOperationsPerIteration() override { return exec_count ? exec_count : 1; }
420 
binary_search(int32_t random)421         void binary_search(int32_t random)	{
422             int low   = 0;
423             int high  = count - 1;
424             int guess;
425             int last_guess = -1;
426             int r;
427             while (true) {
428                 guess = (high + low)/2;
429                 if (last_guess == guess) break; // nothing to search
430 
431                 r = (this->*fn)(random, guess);
432                 exec_count++;
433 
434                 if (r == 0)
435                     return;	// found, search end.
436                 if (r < 0) {
437                     high = guess;
438                 } else {
439                     low  = guess;
440                 }
441                 last_guess = guess;
442             }
443         }
444 
icu_strcoll_null(int32_t i,int32_t j)445         int icu_strcoll_null(int32_t i, int32_t j){
446             return ucol_strcoll(col, rnd[i].icu_data, -1, ord[j].icu_data,-1);
447         }
448 
icu_strcoll_len(int32_t i,int32_t j)449         int icu_strcoll_len(int32_t i, int32_t j){
450             return ucol_strcoll(col, rnd[i].icu_data, rnd[i].icu_data_len, ord[j].icu_data, ord[j].icu_data_len);
451         }
452 
icu_cmpkey(int32_t i,int32_t j)453         int icu_cmpkey(int32_t i, int32_t j) {
454             return strcmp(reinterpret_cast<char*>(rnd[i].icu_key),
455                           reinterpret_cast<char*>(ord[j].icu_key));
456         }
457 
458 #if U_PLATFORM_HAS_WIN32_API
win_cmp_null(int32_t i,int32_t j)459         int win_cmp_null(int32_t i, int32_t j) {
460             int t = CompareStringW(win_langid, 0, rnd[i].win_data, -1, ord[j].win_data, -1);
461             if (t == 0){
462                 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
463                 exit(-1);
464             } else{
465                 return t - CSTR_EQUAL;
466             }
467         }
468 
win_cmp_len(int32_t i,int32_t j)469         int win_cmp_len(int32_t i, int32_t j) {
470             int t = CompareStringW(win_langid, 0, rnd[i].win_data, rnd[i].win_data_len, ord[j].win_data, ord[j].win_data_len);
471             if (t == 0){
472                 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
473                 exit(-1);
474             } else{
475                 return t - CSTR_EQUAL;
476             }
477         }
478 #endif
479 
480 #define BFUNC(name, func, data) \
481     int name(int32_t i, int32_t j) { \
482     return func(rnd[i].data, ord[j].data); \
483     }
484 
485         BFUNC(posix_strcoll_null, strcoll, posix_data)
486             BFUNC(posix_cmpkey, strcmp, posix_key)
487             BFUNC(win_cmpkey, strcmp, win_key)
488             BFUNC(win_wcscmp, wcscmp, win_data)
489             BFUNC(icu_strcmp, u_strcmp, icu_data)
490             BFUNC(icu_cmpcpo, u_strcmpCodePointOrder, icu_data)
491 };
492 
493 class CollPerfTest : public UPerfTest {
494 public:
495     UCollator *     col;
496     DWORD           win_langid;
497 
498     char16_t * icu_data_all;
499     int32_t icu_data_all_len;
500 
501     int32_t         count;
502     CA_uchar *      icu_data;
503     CA_uint8 *      icu_key;
504     CA_char *       posix_data;
505     CA_char *       posix_key;
506     CA_win_wchar *  win_data;
507     CA_char *       win_key;
508 
509     DataIndex * rnd_index; // random by icu key
510     DataIndex * ord_win_data;
511     DataIndex * ord_win_key;
512     DataIndex * ord_posix_data;
513     DataIndex * ord_posix_key;
514     DataIndex * ord_icu_data;
515     DataIndex * ord_icu_key;
516     DataIndex * ord_win_wcscmp;
517     DataIndex * ord_icu_strcmp;
518     DataIndex * ord_icu_cmpcpo;
519 
~CollPerfTest()520     virtual ~CollPerfTest(){
521         ucol_close(col);
522         delete [] icu_data_all;
523         delete icu_data;
524         delete icu_key;
525         delete posix_data;
526         delete posix_key;
527         delete win_data;
528         delete win_key;
529         delete[] rnd_index;
530         delete[] ord_win_data;
531         delete[] ord_win_key;
532         delete[] ord_posix_data;
533         delete[] ord_posix_key;
534         delete[] ord_icu_data;
535         delete[] ord_icu_key;
536         delete[] ord_win_wcscmp;
537         delete[] ord_icu_strcmp;
538         delete[] ord_icu_cmpcpo;
539     }
540 
CollPerfTest(int32_t argc,const char * argv[],UErrorCode & status)541     CollPerfTest(int32_t argc, const char* argv[], UErrorCode& status):UPerfTest(argc, argv, status){
542         col = nullptr;
543         icu_data_all = nullptr;
544         icu_data = nullptr;
545         icu_key = nullptr;
546         posix_data = nullptr;
547         posix_key = nullptr;
548         win_data =nullptr;
549         win_key = nullptr;
550 
551         rnd_index = nullptr;
552         ord_win_data= nullptr;
553         ord_win_key= nullptr;
554         ord_posix_data= nullptr;
555         ord_posix_key= nullptr;
556         ord_icu_data= nullptr;
557         ord_icu_key= nullptr;
558         ord_win_wcscmp = nullptr;
559         ord_icu_strcmp = nullptr;
560         ord_icu_cmpcpo = nullptr;
561 
562         if (U_FAILURE(status)){
563             return;
564         }
565 
566         // Parse additional arguments
567 
568         UOption options[] = {
569             UOPTION_DEF("langid", 'i', UOPT_REQUIRES_ARG),        // Windows Language ID number.
570                 UOPTION_DEF("rulefile", 'r', UOPT_REQUIRES_ARG),      // --rulefile <filename>
571                 // Collation related arguments. All are optional.
572                 // To simplify parsing, two choice arguments are disigned as NO_ARG.
573                 // The default value is UPPER word in the comment
574                 UOPTION_DEF("c_french", 'f', UOPT_NO_ARG),          // --french <on | OFF>
575                 UOPTION_DEF("c_alternate", 'a', UOPT_NO_ARG),       // --alternate <NON_IGNORE | shifted>
576                 UOPTION_DEF("c_casefirst", 'c', UOPT_REQUIRES_ARG), // --casefirst <lower | upper | OFF>
577                 UOPTION_DEF("c_caselevel", 'l', UOPT_NO_ARG),       // --caselevel <on | OFF>
578                 UOPTION_DEF("c_normal", 'n', UOPT_NO_ARG),          // --normal <on | OFF>
579                 UOPTION_DEF("c_strength", 's', UOPT_REQUIRES_ARG),  // --strength <1-5>
580         };
581         int32_t opt_len = UPRV_LENGTHOF(options);
582         enum {i, r,f,a,c,l,n,s};   // The buffer between the option items' order and their references
583 
584         _remainingArgc = u_parseArgs(_remainingArgc, const_cast<char**>(argv), opt_len, options);
585 
586         if (_remainingArgc < 0){
587             status = U_ILLEGAL_ARGUMENT_ERROR;
588             return;
589         }
590 
591         if (locale == nullptr){
592             locale = "en_US";   // set default locale
593         }
594 
595 #if U_PLATFORM_HAS_WIN32_API
596         if (options[i].doesOccur) {
597             char *endp;
598             int tmp = strtol(options[i].value, &endp, 0);
599             if (endp == options[i].value) {
600                 status = U_ILLEGAL_ARGUMENT_ERROR;
601                 return;
602             }
603             win_langid = MAKELCID(tmp, SORT_DEFAULT);
604         } else {
605             win_langid = uloc_getLCID(locale);
606         }
607 #endif
608 
609         //  Set up an ICU collator
610         if (options[r].doesOccur) {
611             // TODO: implement it
612         } else {
613             col = ucol_open(locale, &status);
614             if (U_FAILURE(status)) {
615                 return;
616             }
617         }
618 
619         if (options[f].doesOccur) {
620             ucol_setAttribute(col, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
621         } else {
622             ucol_setAttribute(col, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
623         }
624 
625         if (options[a].doesOccur) {
626             ucol_setAttribute(col, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
627         }
628 
629         if (options[c].doesOccur) { // strcmp() has i18n encoding problem
630             if (strcmp("lower", options[c].value) == 0){
631                 ucol_setAttribute(col, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, &status);
632             } else if (strcmp("upper", options[c].value) == 0) {
633                 ucol_setAttribute(col, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, &status);
634             } else {
635                 status = U_ILLEGAL_ARGUMENT_ERROR;
636                 return;
637             }
638         }
639 
640         if (options[l].doesOccur){
641             ucol_setAttribute(col, UCOL_CASE_LEVEL, UCOL_ON, &status);
642         }
643 
644         if (options[n].doesOccur){
645             ucol_setAttribute(col, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
646         }
647 
648         if (options[s].doesOccur) {
649             char *endp;
650             int tmp = strtol(options[l].value, &endp, 0);
651             if (endp == options[l].value) {
652                 status = U_ILLEGAL_ARGUMENT_ERROR;
653                 return;
654             }
655             switch (tmp) {
656             case 1:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_PRIMARY, &status);		break;
657             case 2:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_SECONDARY, &status);		break;
658             case 3:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_TERTIARY, &status);		break;
659             case 4:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_QUATERNARY, &status);	break;
660             case 5:	ucol_setAttribute(col, UCOL_STRENGTH, UCOL_IDENTICAL, &status);		break;
661             default: status = U_ILLEGAL_ARGUMENT_ERROR;					return;
662             }
663         }
664         prepareData(status);
665     }
666 
667     //to avoid use the annoying 'id' in TESTCASE(id,test) macro or the like
668 #define TEST(testname, classname, arg1, arg2, arg3, arg4, arg5, arg6) \
669     if(temp == index) {\
670     name = #testname;\
671     if (exec) {\
672     UErrorCode status = U_ZERO_ERROR;\
673     UPerfFunction * t = new classname(status,arg1, arg2, arg3, arg4, arg5, arg6);\
674     if (U_FAILURE(status)) {\
675     delete t;\
676     return nullptr;\
677     } else {\
678     return t;\
679     }\
680     } else {\
681     return nullptr;\
682     }\
683     }\
684     temp++\
685 
686 
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par=nullptr)687     UPerfFunction* runIndexedTest(/*[in]*/int32_t index, /*[in]*/UBool exec, /*[out]*/const char*& name, /*[in]*/char* par = nullptr) override {
688         int temp = 0;
689 
690 #define TEST_KEYGEN(testname, func)\
691     TEST(testname, CmdKeyGen, col, win_langid, count, rnd_index, &CmdKeyGen::func, 0)
692         TEST_KEYGEN(TestIcu_KeyGen_null, icu_key_null);
693         TEST_KEYGEN(TestIcu_KeyGen_len,  icu_key_len);
694         TEST_KEYGEN(TestPosix_KeyGen_null, posix_key_null);
695 #if U_PLATFORM_HAS_WIN32_API
696         TEST_KEYGEN(TestWin_KeyGen_null, win_key_null);
697         TEST_KEYGEN(TestWin_KeyGen_len, win_key_len);
698 #endif
699 
700 #define TEST_ITER(testname, func)\
701     TEST(testname, CmdIter, col, count, icu_data, &CmdIter::func,0,0)
702         TEST_ITER(TestIcu_ForwardIter_null, icu_forward_null);
703         TEST_ITER(TestIcu_ForwardIter_len, icu_forward_len);
704         TEST_ITER(TestIcu_BackwardIter_null, icu_backward_null);
705         TEST_ITER(TestIcu_BackwardIter_len, icu_backward_len);
706 
707 #define TEST_ITER_ALL(testname, func)\
708     TEST(testname, CmdIterAll, col, icu_data_all_len, icu_data_all, CmdIterAll::func,0,0)
709         TEST_ITER_ALL(TestIcu_ForwardIter_all_null, forward_null);
710         TEST_ITER_ALL(TestIcu_ForwardIter_all_len, forward_len);
711         TEST_ITER_ALL(TestIcu_BackwardIter_all_null, backward_null);
712         TEST_ITER_ALL(TestIcu_BackwardIter_all_len, backward_len);
713 
714 #define TEST_QSORT(testname, func)\
715     TEST(testname, CmdQsort, rnd_index, count, sizeof(DataIndex), CmdQsort::func,0,0)
716         TEST_QSORT(TestIcu_qsort_strcoll_null, icu_strcoll_null);
717         TEST_QSORT(TestIcu_qsort_strcoll_len, icu_strcoll_len);
718         TEST_QSORT(TestIcu_qsort_usekey, icu_cmpkey);
719         TEST_QSORT(TestPosix_qsort_strcoll_null, posix_strcoll_null);
720         TEST_QSORT(TestPosix_qsort_usekey, posix_cmpkey);
721 #if U_PLATFORM_HAS_WIN32_API
722         TEST_QSORT(TestWin_qsort_CompareStringW_null, win_cmp_null);
723         TEST_QSORT(TestWin_qsort_CompareStringW_len, win_cmp_len);
724         TEST_QSORT(TestWin_qsort_usekey, win_cmpkey);
725 #endif
726 
727 #define TEST_BIN(testname, func)\
728     TEST(testname, CmdBinSearch, col, win_langid, count, rnd_index, ord_icu_key, &CmdBinSearch::func)
729         TEST_BIN(TestIcu_BinarySearch_strcoll_null, icu_strcoll_null);
730         TEST_BIN(TestIcu_BinarySearch_strcoll_len, icu_strcoll_len);
731         TEST_BIN(TestIcu_BinarySearch_usekey, icu_cmpkey);
732         TEST_BIN(TestIcu_BinarySearch_strcmp, icu_strcmp);
733         TEST_BIN(TestIcu_BinarySearch_cmpCPO, icu_cmpcpo);
734         TEST_BIN(TestPosix_BinarySearch_strcoll_null, posix_strcoll_null);
735         TEST_BIN(TestPosix_BinarySearch_usekey, posix_cmpkey);
736 #if U_PLATFORM_HAS_WIN32_API
737         TEST_BIN(TestWin_BinarySearch_CompareStringW_null, win_cmp_null);
738         TEST_BIN(TestWin_BinarySearch_CompareStringW_len, win_cmp_len);
739 #endif
740         TEST_BIN(TestWin_BinarySearch_usekey, win_cmpkey);
741         TEST_BIN(TestWin_BinarySearch_wcscmp, win_wcscmp);
742 
743         name="";
744         return nullptr;
745     }
746 
747 
748 
prepareData(UErrorCode & status)749     void prepareData(UErrorCode& status){
750         if(U_FAILURE(status)) return;
751         if (icu_data) return; // prepared
752 
753         icu_data = new CA_uchar();
754 
755         // Following code is borrowed from UPerfTest::getLines();
756         const char16_t*    line=nullptr;
757         int32_t         len =0;
758         for (;;) {
759             line = ucbuf_readline(ucharBuf,&len,&status);
760             if(line == nullptr || U_FAILURE(status)){break;}
761 
762             // Refer to the source code of ucbuf_readline()
763             // 1. 'len' includs the line terminal symbols
764             // 2. The length of the line terminal symbols is only one character
765             // 3. The Windows CR LF line terminal symbols will be converted to CR
766 
767             if (len == 1) {
768                 continue; //skip empty line
769             } else {
770                 icu_data->append_one(len);
771                 memcpy(icu_data->last(), line, len * sizeof(char16_t));
772                 icu_data->last()[len -1] = 0;
773             }
774         }
775         if(U_FAILURE(status)) return;
776 
777         // UTF-16 -> UTF-8 conversion.
778         UConverter   *conv = ucnv_open("utf-8", &status); // just UTF-8 for now.
779         if (U_FAILURE(status)) return;
780 
781         count = icu_data->count;
782 
783         icu_data_all_len =  icu_data->index[count]; // includes all NULs
784         icu_data_all_len -= count;  // excludes all NULs
785         icu_data_all_len += 1;      // the terminal NUL
786         icu_data_all = new char16_t[icu_data_all_len];
787         icu_data_all[icu_data_all_len - 1] = 0; //the terminal NUL
788 
789         icu_key  = new CA_uint8;
790         win_data = new CA_win_wchar;
791         win_key  = new CA_char;
792         posix_data = new CA_char;
793         posix_key = new CA_char;
794         rnd_index = new DataIndex[count];
795         DataIndex::win_langid = win_langid;
796         DataIndex::col        = col;
797 
798 
799         char16_t * p = icu_data_all;
800         int32_t s;
801         int32_t t;
802         for (int i=0; i < count; i++) {
803             // ICU all data
804             s = sizeof(char16_t) * icu_data->lengthOf(i);
805             memcpy(p, icu_data->dataOf(i), s);
806             p += icu_data->lengthOf(i);
807 
808             // ICU data
809 
810             // ICU key
811             s = ucol_getSortKey(col, icu_data->dataOf(i), -1,nullptr, 0);
812             icu_key->append_one(s);
813             t = ucol_getSortKey(col, icu_data->dataOf(i), -1,icu_key->last(), s);
814             if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
815 
816             // POSIX data
817             s = ucnv_fromUChars(conv,nullptr, 0, icu_data->dataOf(i), icu_data->lengthOf(i), &status);
818             if (status == U_BUFFER_OVERFLOW_ERROR || status == U_ZERO_ERROR){
819                 status = U_ZERO_ERROR;
820             } else {
821                 return;
822             }
823             posix_data->append_one(s + 1); // plus terminal NUL
824             t = ucnv_fromUChars(conv,posix_data->last(), s, icu_data->dataOf(i), icu_data->lengthOf(i), &status);
825             if (U_FAILURE(status)) return;
826             if ( t != s){status = U_INVALID_FORMAT_ERROR;return;}
827             posix_data->last()[s] = 0;
828 
829             // POSIX key
830             s = strxfrm(nullptr, posix_data->dataOf(i), 0);
831             if (s == INT_MAX){status = U_INVALID_FORMAT_ERROR;return;}
832             posix_key->append_one(s);
833             t = strxfrm(posix_key->last(), posix_data->dataOf(i), s);
834             if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
835 
836 #if U_PLATFORM_HAS_WIN32_API
837             // Win data
838             s = icu_data->lengthOf(i) + 1; // plus terminal NUL
839             win_data->append_one(s);
840             memcpy(win_data->last(), icu_data->dataOf(i), sizeof(WCHAR) * s);
841 
842             // Win key
843             s = LCMapStringW(win_langid, LCMAP_SORTKEY, win_data->dataOf(i), win_data->lengthOf(i), nullptr,0);
844             if (s == 0) {status = U_INVALID_FORMAT_ERROR;return;}
845             win_key->append_one(s);
846             t = LCMapStringW(win_langid, LCMAP_SORTKEY, win_data->dataOf(i), win_data->lengthOf(i), (WCHAR *)(win_key->last()),s);
847             if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
848 #endif
849         };
850 
851         // append_one() will make points shifting, should not merge following code into previous iteration
852         for (int i=0; i < count; i++) {
853             rnd_index[i].icu_key = icu_key->dataOf(i);
854             rnd_index[i].icu_data = icu_data->dataOf(i);
855             rnd_index[i].icu_data_len = icu_data->lengthOf(i);
856             rnd_index[i].posix_key = posix_key->last();
857             rnd_index[i].posix_data = posix_data->dataOf(i);
858             rnd_index[i].posix_data_len = posix_data->lengthOf(i);
859 #if U_PLATFORM_HAS_WIN32_API
860             rnd_index[i].win_key = win_key->dataOf(i);
861             rnd_index[i].win_data = win_data->dataOf(i);
862             rnd_index[i].win_data_len = win_data->lengthOf(i);
863 #endif
864         };
865 
866         ucnv_close(conv);
867         qsort(rnd_index, count, sizeof(DataIndex), CmdQsort::q_random);
868 
869 #define SORT(data, func) \
870     data = new DataIndex[count];\
871     memcpy(data, rnd_index, count * sizeof(DataIndex));\
872     qsort(data, count, sizeof(DataIndex), CmdQsort::func)
873 
874         SORT(ord_icu_data, icu_strcoll_len);
875         SORT(ord_icu_key, icu_cmpkey);
876         SORT(ord_posix_data, posix_strcoll_null);
877         SORT(ord_posix_key, posix_cmpkey);
878 #if U_PLATFORM_HAS_WIN32_API
879         SORT(ord_win_data, win_cmp_len);
880         SORT(ord_win_key, win_cmpkey);
881         SORT(ord_win_wcscmp, win_wcscmp);
882 #endif
883         SORT(ord_icu_strcmp, icu_strcmp);
884         SORT(ord_icu_cmpcpo, icu_cmpcpo);
885     }
886 };
887 
888 
main(int argc,const char * argv[])889 int main(int argc, const char *argv[])
890 {
891 
892     UErrorCode status = U_ZERO_ERROR;
893     CollPerfTest test(argc, argv, status);
894 
895     if (U_FAILURE(status)){
896         printf("The error is %s\n", u_errorName(status));
897         //TODO: print usage here
898         return status;
899     }
900 
901     if (test.run() == false){
902         fprintf(stderr, "FAILED: Tests could not be run please check the "
903             "arguments.\n");
904         return -1;
905     }
906     return 0;
907 }
908 
909