1 /***********************************************************************
2 * © 2016 and later: Unicode, Inc. and others.
3 * License & terms of use: http://www.unicode.org/copyright.html
4 *
5 ***********************************************************************
6 ***********************************************************************
7 * COPYRIGHT:
8 * Copyright (C) 2001-2016 IBM, Inc. All Rights Reserved.
9 *
10 ***********************************************************************/
11
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <locale.h>
15 #include <limits.h>
16 #include <string.h>
17 #include "cmemory.h"
18 #include "unicode/uperf.h"
19 #include "uoptions.h"
20 #include "unicode/coll.h"
21 #include <unicode/ucoleitr.h>
22
23 #if !U_PLATFORM_HAS_WIN32_API
24 #define DWORD uint32_t
25 #define WCHAR wchar_t
26 #endif
27
28 /* To store an array of string<UNIT> in continue space.
29 Since string<UNIT> itself is treated as an array of UNIT, this
30 class will ease our memory management for an array of string<UNIT>.
31 */
32
33 //template<typename UNIT>
34 #define COMPATCT_ARRAY(CompactArrays, UNIT) \
35 struct CompactArrays{\
36 CompactArrays(const CompactArrays & );\
37 CompactArrays & operator=(const CompactArrays & );\
38 int32_t count;/*total number of the strings*/ \
39 int32_t * index;/*relative offset in data*/ \
40 UNIT * data; /*the real space to hold strings*/ \
41 \
42 ~CompactArrays(){free(index);free(data);} \
43 CompactArrays():count(0), index(nullptr), data(nullptr){ \
44 index = (int32_t *) realloc(index, sizeof(int32_t)); \
45 index[0] = 0; \
46 } \
47 void append_one(int32_t theLen){ /*include terminal NUL*/ \
48 count++; \
49 index = (int32_t *) realloc(index, sizeof(int32_t) * (count + 1)); \
50 index[count] = index[count - 1] + theLen; \
51 data = (UNIT *) realloc(data, sizeof(UNIT) * index[count]); \
52 } \
53 UNIT * last(){return data + index[count - 1];} \
54 UNIT * dataOf(int32_t i){return data + index[i];} \
55 int32_t lengthOf(int i){return index[i+1] - index[i] - 1; } /*exclude terminating NUL*/ \
56 };
57
58 //typedef CompactArrays<char16_t> CA_uchar;
59 //typedef CompactArrays<char> CA_char;
60 //typedef CompactArrays<uint8_t> CA_uint8;
61 //typedef CompactArrays<WCHAR> CA_win_wchar;
62
63 COMPATCT_ARRAY(CA_uchar, char16_t)
64 COMPATCT_ARRAY(CA_char, char)
65 COMPATCT_ARRAY(CA_uint8, uint8_t)
66 COMPATCT_ARRAY(CA_win_wchar, WCHAR)
67
68
69 struct DataIndex {
70 static DWORD win_langid; // for qsort callback function
71 static UCollator * col; // for qsort callback function
72 uint8_t * icu_key;
73 char16_t * icu_data;
74 int32_t icu_data_len;
75 char* posix_key;
76 char* posix_data;
77 int32_t posix_data_len;
78 char* win_key;
79 WCHAR * win_data;
80 int32_t win_data_len;
81 };
82 DWORD DataIndex::win_langid;
83 UCollator * DataIndex::col;
84
85
86
87 class CmdKeyGen : public UPerfFunction {
88 typedef void (CmdKeyGen::* Func)(int32_t);
89 enum{MAX_KEY_LENGTH = 5000};
90 UCollator * col;
91 DWORD win_langid;
92 int32_t count;
93 DataIndex * data;
94 Func fn;
95
96 union { // to save sapce
97 uint8_t icu_key[MAX_KEY_LENGTH];
98 char posix_key[MAX_KEY_LENGTH];
99 WCHAR win_key[MAX_KEY_LENGTH];
100 };
101 public:
CmdKeyGen(UErrorCode,UCollator * col,DWORD win_langid,int32_t count,DataIndex * data,Func fn,int32_t)102 CmdKeyGen(UErrorCode, UCollator * col,DWORD win_langid, int32_t count, DataIndex * data,Func fn,int32_t)
103 :col(col),win_langid(win_langid), count(count), data(data), fn(fn){}
104
getOperationsPerIteration()105 long getOperationsPerIteration() override { return count; }
106
call(UErrorCode * status)107 void call(UErrorCode* status) override {
108 for(int32_t i = 0; i< count; i++){
109 (this->*fn)(i);
110 }
111 }
112
icu_key_null(int32_t i)113 void icu_key_null(int32_t i){
114 ucol_getSortKey(col, data[i].icu_data, -1, icu_key, MAX_KEY_LENGTH);
115 }
116
icu_key_len(int32_t i)117 void icu_key_len(int32_t i){
118 ucol_getSortKey(col, data[i].icu_data, data[i].icu_data_len, icu_key, MAX_KEY_LENGTH);
119 }
120
121 #if U_PLATFORM_HAS_WIN32_API
122 // pre-generated in CollPerfTest::prepareData(), need not to check error here
win_key_null(int32_t i)123 void win_key_null(int32_t i){
124 //LCMAP_SORTsk 0x00000400 // WC sort sk (normalize)
125 LCMapStringW(win_langid, LCMAP_SORTKEY, data[i].win_data, -1, win_key, MAX_KEY_LENGTH);
126 }
127
win_key_len(int32_t i)128 void win_key_len(int32_t i){
129 LCMapStringW(win_langid, LCMAP_SORTKEY, data[i].win_data, data[i].win_data_len, win_key, MAX_KEY_LENGTH);
130 }
131 #endif
132
posix_key_null(int32_t i)133 void posix_key_null(int32_t i){
134 strxfrm(posix_key, data[i].posix_data, MAX_KEY_LENGTH);
135 }
136 };
137
138
139 class CmdIter : public UPerfFunction {
140 typedef void (CmdIter::* Func)(UErrorCode* , int32_t );
141 int32_t count;
142 CA_uchar * data;
143 Func fn;
144 UCollationElements *iter;
145 int32_t exec_count;
146 public:
CmdIter(UErrorCode & status,UCollator * col,int32_t count,CA_uchar * data,Func fn,int32_t,int32_t)147 CmdIter(UErrorCode & status, UCollator * col, int32_t count, CA_uchar *data, Func fn, int32_t,int32_t)
148 :count(count), data(data), fn(fn){
149 exec_count = 0;
150 char16_t dummytext[] = {0, 0};
151 iter = ucol_openElements(col, nullptr, 0, &status);
152 ucol_setText(iter, dummytext, 1, &status);
153 }
~CmdIter()154 ~CmdIter(){
155 ucol_closeElements(iter);
156 }
157
getOperationsPerIteration()158 long getOperationsPerIteration() override { return exec_count ? exec_count : 1; }
159
call(UErrorCode * status)160 void call(UErrorCode* status) override {
161 exec_count = 0;
162 for(int32_t i = 0; i< count; i++){
163 (this->*fn)(status, i);
164 }
165 }
166
icu_forward_null(UErrorCode * status,int32_t i)167 void icu_forward_null(UErrorCode* status, int32_t i){
168 ucol_setText(iter, data->dataOf(i), -1, status);
169 while (ucol_next(iter, status) != UCOL_NULLORDER) exec_count++;
170 }
171
icu_forward_len(UErrorCode * status,int32_t i)172 void icu_forward_len(UErrorCode* status, int32_t i){
173 ucol_setText(iter, data->dataOf(i), data->lengthOf(i) , status);
174 while (ucol_next(iter, status) != UCOL_NULLORDER) exec_count++;
175 }
176
icu_backward_null(UErrorCode * status,int32_t i)177 void icu_backward_null(UErrorCode* status, int32_t i){
178 ucol_setText(iter, data->dataOf(i), -1, status);
179 while (ucol_previous(iter, status) != UCOL_NULLORDER) exec_count++;
180 }
181
icu_backward_len(UErrorCode * status,int32_t i)182 void icu_backward_len(UErrorCode* status, int32_t i){
183 ucol_setText(iter, data->dataOf(i), data->lengthOf(i) , status);
184 while (ucol_previous(iter, status) != UCOL_NULLORDER) exec_count++;
185 }
186 };
187
188 class CmdIterAll : public UPerfFunction {
189 typedef void (CmdIterAll::* Func)(UErrorCode* status);
190 int32_t count;
191 Func fn;
192 UCollationElements *iter;
193 int32_t exec_count;
194
195 public:
196 enum CALL {forward_null, forward_len, backward_null, backward_len};
197
~CmdIterAll()198 ~CmdIterAll(){
199 ucol_closeElements(iter);
200 }
CmdIterAll(UErrorCode & status,UCollator * col,int32_t count,char16_t * data,CALL call,int32_t,int32_t)201 CmdIterAll(UErrorCode & status, UCollator * col, int32_t count, char16_t * data, CALL call,int32_t,int32_t)
202 :count(count)
203 {
204 exec_count = 0;
205 if (call == forward_null || call == backward_null) {
206 iter = ucol_openElements(col, data, -1, &status);
207 } else {
208 iter = ucol_openElements(col, data, count, &status);
209 }
210
211 if (call == forward_null || call == forward_len){
212 fn = &CmdIterAll::icu_forward_all;
213 } else {
214 fn = &CmdIterAll::icu_backward_all;
215 }
216 }
getOperationsPerIteration()217 long getOperationsPerIteration() override { return exec_count ? exec_count : 1; }
218
call(UErrorCode * status)219 void call(UErrorCode* status) override {
220 (this->*fn)(status);
221 }
222
icu_forward_all(UErrorCode * status)223 void icu_forward_all(UErrorCode* status){
224 int strlen = count - 5;
225 int count5 = 5;
226 int strindex = 0;
227 ucol_setOffset(iter, strindex, status);
228 while (true) {
229 if (ucol_next(iter, status) == UCOL_NULLORDER) {
230 break;
231 }
232 exec_count++;
233 count5 --;
234 if (count5 == 0) {
235 strindex += 10;
236 if (strindex > strlen) {
237 break;
238 }
239 ucol_setOffset(iter, strindex, status);
240 count5 = 5;
241 }
242 }
243 }
244
icu_backward_all(UErrorCode * status)245 void icu_backward_all(UErrorCode* status){
246 int strlen = count;
247 int count5 = 5;
248 int strindex = 5;
249 ucol_setOffset(iter, strindex, status);
250 while (true) {
251 if (ucol_previous(iter, status) == UCOL_NULLORDER) {
252 break;
253 }
254 exec_count++;
255 count5 --;
256 if (count5 == 0) {
257 strindex += 10;
258 if (strindex > strlen) {
259 break;
260 }
261 ucol_setOffset(iter, strindex, status);
262 count5 = 5;
263 }
264 }
265 }
266
267 };
268
269 struct CmdQsort : public UPerfFunction{
270
q_randomCmdQsort271 static int q_random(const void * a, const void * b){
272 uint8_t * key_a = ((DataIndex *)a)->icu_key;
273 uint8_t * key_b = ((DataIndex *)b)->icu_key;
274
275 int val_a = 0;
276 int val_b = 0;
277 while (*key_a != 0) {val_a += val_a*37 + *key_a++;}
278 while (*key_b != 0) {val_b += val_b*37 + *key_b++;}
279 return val_a - val_b;
280 }
281
282 #define QCAST() \
283 DataIndex * da = (DataIndex *) a; \
284 DataIndex * db = (DataIndex *) b; \
285 ++exec_count
286
icu_strcoll_nullCmdQsort287 static int icu_strcoll_null(const void *a, const void *b){
288 QCAST();
289 return ucol_strcoll(da->col, da->icu_data, -1, db->icu_data, -1) - UCOL_EQUAL;
290 }
291
icu_strcoll_lenCmdQsort292 static int icu_strcoll_len(const void *a, const void *b){
293 QCAST();
294 return ucol_strcoll(da->col, da->icu_data, da->icu_data_len, db->icu_data, db->icu_data_len) - UCOL_EQUAL;
295 }
296
icu_cmpkeyCmdQsort297 static int icu_cmpkey (const void *a, const void *b){
298 QCAST();
299 return strcmp(reinterpret_cast<char*>(da->icu_key), reinterpret_cast<char*>(db->icu_key));
300 }
301
302 #if U_PLATFORM_HAS_WIN32_API
win_cmp_nullCmdQsort303 static int win_cmp_null(const void *a, const void *b) {
304 QCAST();
305 //CSTR_LESS_THAN 1
306 //CSTR_EQUAL 2
307 //CSTR_GREATER_THAN 3
308 int t = CompareStringW(da->win_langid, 0, da->win_data, -1, db->win_data, -1);
309 if (t == 0){
310 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
311 exit(-1);
312 } else{
313 return t - CSTR_EQUAL;
314 }
315 }
316
win_cmp_lenCmdQsort317 static int win_cmp_len(const void *a, const void *b) {
318 QCAST();
319 int t = CompareStringW(da->win_langid, 0, da->win_data, da->win_data_len, db->win_data, db->win_data_len);
320 if (t == 0){
321 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
322 exit(-1);
323 } else{
324 return t - CSTR_EQUAL;
325 }
326 }
327 #endif
328
329 #define QFUNC(name, func, data) \
330 static int name (const void *a, const void *b){ \
331 QCAST(); \
332 return func(da->data, db->data); \
333 }
334
335 QFUNC(posix_strcoll_null, strcoll, posix_data)
336 QFUNC(posix_cmpkey, strcmp, posix_key)
337 #if U_PLATFORM_HAS_WIN32_API
338 QFUNC(win_cmpkey, strcmp, win_key)
339 QFUNC(win_wcscmp, wcscmp, win_data)
340 #endif
341 QFUNC(icu_strcmp, u_strcmp, icu_data)
342 QFUNC(icu_cmpcpo, u_strcmpCodePointOrder, icu_data)
343
344 private:
345 static int32_t exec_count; // potential muilt-thread problem
346
347 typedef int (* Func)(const void *, const void *);
348
349 Func fn;
350 void * base; //Start of target array.
351 int32_t num; //Array size in elements.
352 int32_t width; //Element size in bytes.
353
354 void * backup; //copy source of base
355 public:
CmdQsortCmdQsort356 CmdQsort(UErrorCode & status,void *theBase, int32_t num, int32_t width, Func fn, int32_t,int32_t)
357 :fn(fn),num(num),width(width),backup(theBase){
358 base = malloc(num * width);
359 time_empty(100, &status); // warm memory/cache
360 }
361
~CmdQsortCmdQsort362 ~CmdQsort(){
363 free(base);
364 }
365
empty_callCmdQsort366 void empty_call(){
367 exec_count = 0;
368 memcpy(base, backup, num * width);
369 }
370
time_emptyCmdQsort371 double time_empty(int32_t n, UErrorCode* status) {
372 UTimer start, stop;
373 utimer_getTime(&start);
374 while (n-- > 0) {
375 empty_call();
376 }
377 utimer_getTime(&stop);
378 return utimer_getDeltaSeconds(&start,&stop); // ms
379 }
380
callCmdQsort381 void call(UErrorCode* status) override {
382 exec_count = 0;
383 memcpy(base, backup, num * width);
384 qsort(base, num, width, fn);
385 }
timeCmdQsort386 double time(int32_t n, UErrorCode* status) override {
387 double t1 = time_empty(n,status);
388 double t2 = UPerfFunction::time(n, status);
389 return t2-t1;// < 0 ? t2 : t2-t1;
390 }
391
getOperationsPerIterationCmdQsort392 long getOperationsPerIteration() override { return exec_count ? exec_count : 1; }
393 };
394 int32_t CmdQsort::exec_count;
395
396
397 class CmdBinSearch : public UPerfFunction{
398 public:
399 typedef int (CmdBinSearch::* Func)(int, int);
400
401 UCollator * col;
402 DWORD win_langid;
403 int32_t count;
404 DataIndex * rnd;
405 DataIndex * ord;
406 Func fn;
407 int32_t exec_count;
408
CmdBinSearch(UErrorCode,UCollator * col,DWORD win_langid,int32_t count,DataIndex * rnd,DataIndex * ord,Func fn)409 CmdBinSearch(UErrorCode, UCollator * col,DWORD win_langid,int32_t count,DataIndex * rnd,DataIndex * ord,Func fn)
410 :col(col),win_langid(win_langid), count(count), rnd(rnd), ord(ord), fn(fn),exec_count(0){}
411
412
call(UErrorCode * status)413 void call(UErrorCode* status) override {
414 exec_count = 0;
415 for(int32_t i = 0; i< count; i++){ // search all data
416 binary_search(i);
417 }
418 }
getOperationsPerIteration()419 long getOperationsPerIteration() override { return exec_count ? exec_count : 1; }
420
binary_search(int32_t random)421 void binary_search(int32_t random) {
422 int low = 0;
423 int high = count - 1;
424 int guess;
425 int last_guess = -1;
426 int r;
427 while (true) {
428 guess = (high + low)/2;
429 if (last_guess == guess) break; // nothing to search
430
431 r = (this->*fn)(random, guess);
432 exec_count++;
433
434 if (r == 0)
435 return; // found, search end.
436 if (r < 0) {
437 high = guess;
438 } else {
439 low = guess;
440 }
441 last_guess = guess;
442 }
443 }
444
icu_strcoll_null(int32_t i,int32_t j)445 int icu_strcoll_null(int32_t i, int32_t j){
446 return ucol_strcoll(col, rnd[i].icu_data, -1, ord[j].icu_data,-1);
447 }
448
icu_strcoll_len(int32_t i,int32_t j)449 int icu_strcoll_len(int32_t i, int32_t j){
450 return ucol_strcoll(col, rnd[i].icu_data, rnd[i].icu_data_len, ord[j].icu_data, ord[j].icu_data_len);
451 }
452
icu_cmpkey(int32_t i,int32_t j)453 int icu_cmpkey(int32_t i, int32_t j) {
454 return strcmp(reinterpret_cast<char*>(rnd[i].icu_key),
455 reinterpret_cast<char*>(ord[j].icu_key));
456 }
457
458 #if U_PLATFORM_HAS_WIN32_API
win_cmp_null(int32_t i,int32_t j)459 int win_cmp_null(int32_t i, int32_t j) {
460 int t = CompareStringW(win_langid, 0, rnd[i].win_data, -1, ord[j].win_data, -1);
461 if (t == 0){
462 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
463 exit(-1);
464 } else{
465 return t - CSTR_EQUAL;
466 }
467 }
468
win_cmp_len(int32_t i,int32_t j)469 int win_cmp_len(int32_t i, int32_t j) {
470 int t = CompareStringW(win_langid, 0, rnd[i].win_data, rnd[i].win_data_len, ord[j].win_data, ord[j].win_data_len);
471 if (t == 0){
472 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
473 exit(-1);
474 } else{
475 return t - CSTR_EQUAL;
476 }
477 }
478 #endif
479
480 #define BFUNC(name, func, data) \
481 int name(int32_t i, int32_t j) { \
482 return func(rnd[i].data, ord[j].data); \
483 }
484
485 BFUNC(posix_strcoll_null, strcoll, posix_data)
486 BFUNC(posix_cmpkey, strcmp, posix_key)
487 BFUNC(win_cmpkey, strcmp, win_key)
488 BFUNC(win_wcscmp, wcscmp, win_data)
489 BFUNC(icu_strcmp, u_strcmp, icu_data)
490 BFUNC(icu_cmpcpo, u_strcmpCodePointOrder, icu_data)
491 };
492
493 class CollPerfTest : public UPerfTest {
494 public:
495 UCollator * col;
496 DWORD win_langid;
497
498 char16_t * icu_data_all;
499 int32_t icu_data_all_len;
500
501 int32_t count;
502 CA_uchar * icu_data;
503 CA_uint8 * icu_key;
504 CA_char * posix_data;
505 CA_char * posix_key;
506 CA_win_wchar * win_data;
507 CA_char * win_key;
508
509 DataIndex * rnd_index; // random by icu key
510 DataIndex * ord_win_data;
511 DataIndex * ord_win_key;
512 DataIndex * ord_posix_data;
513 DataIndex * ord_posix_key;
514 DataIndex * ord_icu_data;
515 DataIndex * ord_icu_key;
516 DataIndex * ord_win_wcscmp;
517 DataIndex * ord_icu_strcmp;
518 DataIndex * ord_icu_cmpcpo;
519
~CollPerfTest()520 virtual ~CollPerfTest(){
521 ucol_close(col);
522 delete [] icu_data_all;
523 delete icu_data;
524 delete icu_key;
525 delete posix_data;
526 delete posix_key;
527 delete win_data;
528 delete win_key;
529 delete[] rnd_index;
530 delete[] ord_win_data;
531 delete[] ord_win_key;
532 delete[] ord_posix_data;
533 delete[] ord_posix_key;
534 delete[] ord_icu_data;
535 delete[] ord_icu_key;
536 delete[] ord_win_wcscmp;
537 delete[] ord_icu_strcmp;
538 delete[] ord_icu_cmpcpo;
539 }
540
CollPerfTest(int32_t argc,const char * argv[],UErrorCode & status)541 CollPerfTest(int32_t argc, const char* argv[], UErrorCode& status):UPerfTest(argc, argv, status){
542 col = nullptr;
543 icu_data_all = nullptr;
544 icu_data = nullptr;
545 icu_key = nullptr;
546 posix_data = nullptr;
547 posix_key = nullptr;
548 win_data =nullptr;
549 win_key = nullptr;
550
551 rnd_index = nullptr;
552 ord_win_data= nullptr;
553 ord_win_key= nullptr;
554 ord_posix_data= nullptr;
555 ord_posix_key= nullptr;
556 ord_icu_data= nullptr;
557 ord_icu_key= nullptr;
558 ord_win_wcscmp = nullptr;
559 ord_icu_strcmp = nullptr;
560 ord_icu_cmpcpo = nullptr;
561
562 if (U_FAILURE(status)){
563 return;
564 }
565
566 // Parse additional arguments
567
568 UOption options[] = {
569 UOPTION_DEF("langid", 'i', UOPT_REQUIRES_ARG), // Windows Language ID number.
570 UOPTION_DEF("rulefile", 'r', UOPT_REQUIRES_ARG), // --rulefile <filename>
571 // Collation related arguments. All are optional.
572 // To simplify parsing, two choice arguments are disigned as NO_ARG.
573 // The default value is UPPER word in the comment
574 UOPTION_DEF("c_french", 'f', UOPT_NO_ARG), // --french <on | OFF>
575 UOPTION_DEF("c_alternate", 'a', UOPT_NO_ARG), // --alternate <NON_IGNORE | shifted>
576 UOPTION_DEF("c_casefirst", 'c', UOPT_REQUIRES_ARG), // --casefirst <lower | upper | OFF>
577 UOPTION_DEF("c_caselevel", 'l', UOPT_NO_ARG), // --caselevel <on | OFF>
578 UOPTION_DEF("c_normal", 'n', UOPT_NO_ARG), // --normal <on | OFF>
579 UOPTION_DEF("c_strength", 's', UOPT_REQUIRES_ARG), // --strength <1-5>
580 };
581 int32_t opt_len = UPRV_LENGTHOF(options);
582 enum {i, r,f,a,c,l,n,s}; // The buffer between the option items' order and their references
583
584 _remainingArgc = u_parseArgs(_remainingArgc, const_cast<char**>(argv), opt_len, options);
585
586 if (_remainingArgc < 0){
587 status = U_ILLEGAL_ARGUMENT_ERROR;
588 return;
589 }
590
591 if (locale == nullptr){
592 locale = "en_US"; // set default locale
593 }
594
595 #if U_PLATFORM_HAS_WIN32_API
596 if (options[i].doesOccur) {
597 char *endp;
598 int tmp = strtol(options[i].value, &endp, 0);
599 if (endp == options[i].value) {
600 status = U_ILLEGAL_ARGUMENT_ERROR;
601 return;
602 }
603 win_langid = MAKELCID(tmp, SORT_DEFAULT);
604 } else {
605 win_langid = uloc_getLCID(locale);
606 }
607 #endif
608
609 // Set up an ICU collator
610 if (options[r].doesOccur) {
611 // TODO: implement it
612 } else {
613 col = ucol_open(locale, &status);
614 if (U_FAILURE(status)) {
615 return;
616 }
617 }
618
619 if (options[f].doesOccur) {
620 ucol_setAttribute(col, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
621 } else {
622 ucol_setAttribute(col, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
623 }
624
625 if (options[a].doesOccur) {
626 ucol_setAttribute(col, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
627 }
628
629 if (options[c].doesOccur) { // strcmp() has i18n encoding problem
630 if (strcmp("lower", options[c].value) == 0){
631 ucol_setAttribute(col, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, &status);
632 } else if (strcmp("upper", options[c].value) == 0) {
633 ucol_setAttribute(col, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, &status);
634 } else {
635 status = U_ILLEGAL_ARGUMENT_ERROR;
636 return;
637 }
638 }
639
640 if (options[l].doesOccur){
641 ucol_setAttribute(col, UCOL_CASE_LEVEL, UCOL_ON, &status);
642 }
643
644 if (options[n].doesOccur){
645 ucol_setAttribute(col, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
646 }
647
648 if (options[s].doesOccur) {
649 char *endp;
650 int tmp = strtol(options[l].value, &endp, 0);
651 if (endp == options[l].value) {
652 status = U_ILLEGAL_ARGUMENT_ERROR;
653 return;
654 }
655 switch (tmp) {
656 case 1: ucol_setAttribute(col, UCOL_STRENGTH, UCOL_PRIMARY, &status); break;
657 case 2: ucol_setAttribute(col, UCOL_STRENGTH, UCOL_SECONDARY, &status); break;
658 case 3: ucol_setAttribute(col, UCOL_STRENGTH, UCOL_TERTIARY, &status); break;
659 case 4: ucol_setAttribute(col, UCOL_STRENGTH, UCOL_QUATERNARY, &status); break;
660 case 5: ucol_setAttribute(col, UCOL_STRENGTH, UCOL_IDENTICAL, &status); break;
661 default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
662 }
663 }
664 prepareData(status);
665 }
666
667 //to avoid use the annoying 'id' in TESTCASE(id,test) macro or the like
668 #define TEST(testname, classname, arg1, arg2, arg3, arg4, arg5, arg6) \
669 if(temp == index) {\
670 name = #testname;\
671 if (exec) {\
672 UErrorCode status = U_ZERO_ERROR;\
673 UPerfFunction * t = new classname(status,arg1, arg2, arg3, arg4, arg5, arg6);\
674 if (U_FAILURE(status)) {\
675 delete t;\
676 return nullptr;\
677 } else {\
678 return t;\
679 }\
680 } else {\
681 return nullptr;\
682 }\
683 }\
684 temp++\
685
686
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par=nullptr)687 UPerfFunction* runIndexedTest(/*[in]*/int32_t index, /*[in]*/UBool exec, /*[out]*/const char*& name, /*[in]*/char* par = nullptr) override {
688 int temp = 0;
689
690 #define TEST_KEYGEN(testname, func)\
691 TEST(testname, CmdKeyGen, col, win_langid, count, rnd_index, &CmdKeyGen::func, 0)
692 TEST_KEYGEN(TestIcu_KeyGen_null, icu_key_null);
693 TEST_KEYGEN(TestIcu_KeyGen_len, icu_key_len);
694 TEST_KEYGEN(TestPosix_KeyGen_null, posix_key_null);
695 #if U_PLATFORM_HAS_WIN32_API
696 TEST_KEYGEN(TestWin_KeyGen_null, win_key_null);
697 TEST_KEYGEN(TestWin_KeyGen_len, win_key_len);
698 #endif
699
700 #define TEST_ITER(testname, func)\
701 TEST(testname, CmdIter, col, count, icu_data, &CmdIter::func,0,0)
702 TEST_ITER(TestIcu_ForwardIter_null, icu_forward_null);
703 TEST_ITER(TestIcu_ForwardIter_len, icu_forward_len);
704 TEST_ITER(TestIcu_BackwardIter_null, icu_backward_null);
705 TEST_ITER(TestIcu_BackwardIter_len, icu_backward_len);
706
707 #define TEST_ITER_ALL(testname, func)\
708 TEST(testname, CmdIterAll, col, icu_data_all_len, icu_data_all, CmdIterAll::func,0,0)
709 TEST_ITER_ALL(TestIcu_ForwardIter_all_null, forward_null);
710 TEST_ITER_ALL(TestIcu_ForwardIter_all_len, forward_len);
711 TEST_ITER_ALL(TestIcu_BackwardIter_all_null, backward_null);
712 TEST_ITER_ALL(TestIcu_BackwardIter_all_len, backward_len);
713
714 #define TEST_QSORT(testname, func)\
715 TEST(testname, CmdQsort, rnd_index, count, sizeof(DataIndex), CmdQsort::func,0,0)
716 TEST_QSORT(TestIcu_qsort_strcoll_null, icu_strcoll_null);
717 TEST_QSORT(TestIcu_qsort_strcoll_len, icu_strcoll_len);
718 TEST_QSORT(TestIcu_qsort_usekey, icu_cmpkey);
719 TEST_QSORT(TestPosix_qsort_strcoll_null, posix_strcoll_null);
720 TEST_QSORT(TestPosix_qsort_usekey, posix_cmpkey);
721 #if U_PLATFORM_HAS_WIN32_API
722 TEST_QSORT(TestWin_qsort_CompareStringW_null, win_cmp_null);
723 TEST_QSORT(TestWin_qsort_CompareStringW_len, win_cmp_len);
724 TEST_QSORT(TestWin_qsort_usekey, win_cmpkey);
725 #endif
726
727 #define TEST_BIN(testname, func)\
728 TEST(testname, CmdBinSearch, col, win_langid, count, rnd_index, ord_icu_key, &CmdBinSearch::func)
729 TEST_BIN(TestIcu_BinarySearch_strcoll_null, icu_strcoll_null);
730 TEST_BIN(TestIcu_BinarySearch_strcoll_len, icu_strcoll_len);
731 TEST_BIN(TestIcu_BinarySearch_usekey, icu_cmpkey);
732 TEST_BIN(TestIcu_BinarySearch_strcmp, icu_strcmp);
733 TEST_BIN(TestIcu_BinarySearch_cmpCPO, icu_cmpcpo);
734 TEST_BIN(TestPosix_BinarySearch_strcoll_null, posix_strcoll_null);
735 TEST_BIN(TestPosix_BinarySearch_usekey, posix_cmpkey);
736 #if U_PLATFORM_HAS_WIN32_API
737 TEST_BIN(TestWin_BinarySearch_CompareStringW_null, win_cmp_null);
738 TEST_BIN(TestWin_BinarySearch_CompareStringW_len, win_cmp_len);
739 #endif
740 TEST_BIN(TestWin_BinarySearch_usekey, win_cmpkey);
741 TEST_BIN(TestWin_BinarySearch_wcscmp, win_wcscmp);
742
743 name="";
744 return nullptr;
745 }
746
747
748
prepareData(UErrorCode & status)749 void prepareData(UErrorCode& status){
750 if(U_FAILURE(status)) return;
751 if (icu_data) return; // prepared
752
753 icu_data = new CA_uchar();
754
755 // Following code is borrowed from UPerfTest::getLines();
756 const char16_t* line=nullptr;
757 int32_t len =0;
758 for (;;) {
759 line = ucbuf_readline(ucharBuf,&len,&status);
760 if(line == nullptr || U_FAILURE(status)){break;}
761
762 // Refer to the source code of ucbuf_readline()
763 // 1. 'len' includs the line terminal symbols
764 // 2. The length of the line terminal symbols is only one character
765 // 3. The Windows CR LF line terminal symbols will be converted to CR
766
767 if (len == 1) {
768 continue; //skip empty line
769 } else {
770 icu_data->append_one(len);
771 memcpy(icu_data->last(), line, len * sizeof(char16_t));
772 icu_data->last()[len -1] = 0;
773 }
774 }
775 if(U_FAILURE(status)) return;
776
777 // UTF-16 -> UTF-8 conversion.
778 UConverter *conv = ucnv_open("utf-8", &status); // just UTF-8 for now.
779 if (U_FAILURE(status)) return;
780
781 count = icu_data->count;
782
783 icu_data_all_len = icu_data->index[count]; // includes all NULs
784 icu_data_all_len -= count; // excludes all NULs
785 icu_data_all_len += 1; // the terminal NUL
786 icu_data_all = new char16_t[icu_data_all_len];
787 icu_data_all[icu_data_all_len - 1] = 0; //the terminal NUL
788
789 icu_key = new CA_uint8;
790 win_data = new CA_win_wchar;
791 win_key = new CA_char;
792 posix_data = new CA_char;
793 posix_key = new CA_char;
794 rnd_index = new DataIndex[count];
795 DataIndex::win_langid = win_langid;
796 DataIndex::col = col;
797
798
799 char16_t * p = icu_data_all;
800 int32_t s;
801 int32_t t;
802 for (int i=0; i < count; i++) {
803 // ICU all data
804 s = sizeof(char16_t) * icu_data->lengthOf(i);
805 memcpy(p, icu_data->dataOf(i), s);
806 p += icu_data->lengthOf(i);
807
808 // ICU data
809
810 // ICU key
811 s = ucol_getSortKey(col, icu_data->dataOf(i), -1,nullptr, 0);
812 icu_key->append_one(s);
813 t = ucol_getSortKey(col, icu_data->dataOf(i), -1,icu_key->last(), s);
814 if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
815
816 // POSIX data
817 s = ucnv_fromUChars(conv,nullptr, 0, icu_data->dataOf(i), icu_data->lengthOf(i), &status);
818 if (status == U_BUFFER_OVERFLOW_ERROR || status == U_ZERO_ERROR){
819 status = U_ZERO_ERROR;
820 } else {
821 return;
822 }
823 posix_data->append_one(s + 1); // plus terminal NUL
824 t = ucnv_fromUChars(conv,posix_data->last(), s, icu_data->dataOf(i), icu_data->lengthOf(i), &status);
825 if (U_FAILURE(status)) return;
826 if ( t != s){status = U_INVALID_FORMAT_ERROR;return;}
827 posix_data->last()[s] = 0;
828
829 // POSIX key
830 s = strxfrm(nullptr, posix_data->dataOf(i), 0);
831 if (s == INT_MAX){status = U_INVALID_FORMAT_ERROR;return;}
832 posix_key->append_one(s);
833 t = strxfrm(posix_key->last(), posix_data->dataOf(i), s);
834 if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
835
836 #if U_PLATFORM_HAS_WIN32_API
837 // Win data
838 s = icu_data->lengthOf(i) + 1; // plus terminal NUL
839 win_data->append_one(s);
840 memcpy(win_data->last(), icu_data->dataOf(i), sizeof(WCHAR) * s);
841
842 // Win key
843 s = LCMapStringW(win_langid, LCMAP_SORTKEY, win_data->dataOf(i), win_data->lengthOf(i), nullptr,0);
844 if (s == 0) {status = U_INVALID_FORMAT_ERROR;return;}
845 win_key->append_one(s);
846 t = LCMapStringW(win_langid, LCMAP_SORTKEY, win_data->dataOf(i), win_data->lengthOf(i), (WCHAR *)(win_key->last()),s);
847 if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
848 #endif
849 };
850
851 // append_one() will make points shifting, should not merge following code into previous iteration
852 for (int i=0; i < count; i++) {
853 rnd_index[i].icu_key = icu_key->dataOf(i);
854 rnd_index[i].icu_data = icu_data->dataOf(i);
855 rnd_index[i].icu_data_len = icu_data->lengthOf(i);
856 rnd_index[i].posix_key = posix_key->last();
857 rnd_index[i].posix_data = posix_data->dataOf(i);
858 rnd_index[i].posix_data_len = posix_data->lengthOf(i);
859 #if U_PLATFORM_HAS_WIN32_API
860 rnd_index[i].win_key = win_key->dataOf(i);
861 rnd_index[i].win_data = win_data->dataOf(i);
862 rnd_index[i].win_data_len = win_data->lengthOf(i);
863 #endif
864 };
865
866 ucnv_close(conv);
867 qsort(rnd_index, count, sizeof(DataIndex), CmdQsort::q_random);
868
869 #define SORT(data, func) \
870 data = new DataIndex[count];\
871 memcpy(data, rnd_index, count * sizeof(DataIndex));\
872 qsort(data, count, sizeof(DataIndex), CmdQsort::func)
873
874 SORT(ord_icu_data, icu_strcoll_len);
875 SORT(ord_icu_key, icu_cmpkey);
876 SORT(ord_posix_data, posix_strcoll_null);
877 SORT(ord_posix_key, posix_cmpkey);
878 #if U_PLATFORM_HAS_WIN32_API
879 SORT(ord_win_data, win_cmp_len);
880 SORT(ord_win_key, win_cmpkey);
881 SORT(ord_win_wcscmp, win_wcscmp);
882 #endif
883 SORT(ord_icu_strcmp, icu_strcmp);
884 SORT(ord_icu_cmpcpo, icu_cmpcpo);
885 }
886 };
887
888
main(int argc,const char * argv[])889 int main(int argc, const char *argv[])
890 {
891
892 UErrorCode status = U_ZERO_ERROR;
893 CollPerfTest test(argc, argv, status);
894
895 if (U_FAILURE(status)){
896 printf("The error is %s\n", u_errorName(status));
897 //TODO: print usage here
898 return status;
899 }
900
901 if (test.run() == false){
902 fprintf(stderr, "FAILED: Tests could not be run please check the "
903 "arguments.\n");
904 return -1;
905 }
906 return 0;
907 }
908
909