1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (C) 2001-2006 IBM, Inc. All Rights Reserved.
4 *
5 ********************************************************************/
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <locale.h>
10 #include <limits.h>
11 #include <string.h>
12 #include "unicode/uperf.h"
13 #include "uoptions.h"
14 #include "unicode/coll.h"
15 #include <unicode/ucoleitr.h>
16
17
18
19 /* To store an array of string<UNIT> in continue space.
20 Since string<UNIT> itself is treated as an array of UNIT, this
21 class will ease our memory management for an array of string<UNIT>.
22 */
23
24 //template<typename UNIT>
25 #define COMPATCT_ARRAY(CompactArrays, UNIT) \
26 struct CompactArrays{\
27 CompactArrays(const CompactArrays & );\
28 CompactArrays & operator=(const CompactArrays & );\
29 int32_t count;/*total number of the strings*/ \
30 int32_t * index;/*relative offset in data*/ \
31 UNIT * data; /*the real space to hold strings*/ \
32 \
33 ~CompactArrays(){free(index);free(data);} \
34 CompactArrays():data(NULL), index(NULL), count(0){ \
35 index = (int32_t *) realloc(index, sizeof(int32_t)); \
36 index[0] = 0; \
37 } \
38 void append_one(int32_t theLen){ /*include terminal NULL*/ \
39 count++; \
40 index = (int32_t *) realloc(index, sizeof(int32_t) * (count + 1)); \
41 index[count] = index[count - 1] + theLen; \
42 data = (UNIT *) realloc(data, sizeof(UNIT) * index[count]); \
43 } \
44 UNIT * last(){return data + index[count - 1];} \
45 UNIT * dataOf(int32_t i){return data + index[i];} \
46 int32_t lengthOf(int i){return index[i+1] - index[i] - 1; } /*exclude terminating NULL*/ \
47 };
48
49 //typedef CompactArrays<UChar> CA_uchar;
50 //typedef CompactArrays<char> CA_char;
51 //typedef CompactArrays<uint8_t> CA_uint8;
52 //typedef CompactArrays<WCHAR> CA_win_wchar;
53
54 COMPATCT_ARRAY(CA_uchar, UChar)
55 COMPATCT_ARRAY(CA_char, char)
56 COMPATCT_ARRAY(CA_uint8, uint8_t)
57 COMPATCT_ARRAY(CA_win_wchar, WCHAR)
58
59
60 struct DataIndex {
61 static DWORD win_langid; // for qsort callback function
62 static UCollator * col; // for qsort callback function
63 uint8_t * icu_key;
64 UChar * icu_data;
65 int32_t icu_data_len;
66 char* posix_key;
67 char* posix_data;
68 int32_t posix_data_len;
69 char* win_key;
70 WCHAR * win_data;
71 int32_t win_data_len;
72 };
73 DWORD DataIndex::win_langid;
74 UCollator * DataIndex::col;
75
76
77
78 class CmdKeyGen : public UPerfFunction {
79 typedef void (CmdKeyGen::* Func)(int32_t);
80 enum{MAX_KEY_LENGTH = 5000};
81 UCollator * col;
82 DWORD win_langid;
83 int32_t count;
84 DataIndex * data;
85 Func fn;
86
87 union { // to save sapce
88 uint8_t icu_key[MAX_KEY_LENGTH];
89 char posix_key[MAX_KEY_LENGTH];
90 WCHAR win_key[MAX_KEY_LENGTH];
91 };
92 public:
CmdKeyGen(UErrorCode,UCollator * col,DWORD win_langid,int32_t count,DataIndex * data,Func fn,int32_t)93 CmdKeyGen(UErrorCode, UCollator * col,DWORD win_langid, int32_t count, DataIndex * data,Func fn,int32_t)
94 :col(col),win_langid(win_langid), count(count), data(data), fn(fn){}
95
getOperationsPerIteration()96 virtual long getOperationsPerIteration(){return count;}
97
call(UErrorCode * status)98 virtual void call(UErrorCode* status){
99 for(int32_t i = 0; i< count; i++){
100 (this->*fn)(i);
101 }
102 }
103
icu_key_null(int32_t i)104 void icu_key_null(int32_t i){
105 ucol_getSortKey(col, data[i].icu_data, -1, icu_key, MAX_KEY_LENGTH);
106 }
107
icu_key_len(int32_t i)108 void icu_key_len(int32_t i){
109 ucol_getSortKey(col, data[i].icu_data, data[i].icu_data_len, icu_key, MAX_KEY_LENGTH);
110 }
111
112 // pre-generated in CollPerfTest::prepareData(), need not to check error here
win_key_null(int32_t i)113 void win_key_null(int32_t i){
114 //LCMAP_SORTsk 0x00000400 // WC sort sk (normalize)
115 LCMapStringW(win_langid, LCMAP_SORTKEY, data[i].win_data, -1, win_key, MAX_KEY_LENGTH);
116 }
117
win_key_len(int32_t i)118 void win_key_len(int32_t i){
119 LCMapStringW(win_langid, LCMAP_SORTKEY, data[i].win_data, data[i].win_data_len, win_key, MAX_KEY_LENGTH);
120 }
121
posix_key_null(int32_t i)122 void posix_key_null(int32_t i){
123 strxfrm(posix_key, data[i].posix_data, MAX_KEY_LENGTH);
124 }
125 };
126
127
128 class CmdIter : public UPerfFunction {
129 typedef void (CmdIter::* Func)(UErrorCode* , int32_t );
130 int32_t count;
131 CA_uchar * data;
132 Func fn;
133 UCollationElements *iter;
134 int32_t exec_count;
135 public:
CmdIter(UErrorCode & status,UCollator * col,int32_t count,CA_uchar * data,Func fn,int32_t,int32_t)136 CmdIter(UErrorCode & status, UCollator * col, int32_t count, CA_uchar *data, Func fn, int32_t,int32_t)
137 :count(count), data(data), fn(fn){
138 exec_count = 0;
139 UChar dummytext[] = {0, 0};
140 iter = ucol_openElements(col, NULL, 0, &status);
141 ucol_setText(iter, dummytext, 1, &status);
142 }
~CmdIter()143 ~CmdIter(){
144 ucol_closeElements(iter);
145 }
146
getOperationsPerIteration()147 virtual long getOperationsPerIteration(){return exec_count ? exec_count : 1;}
148
call(UErrorCode * status)149 virtual void call(UErrorCode* status){
150 exec_count = 0;
151 for(int32_t i = 0; i< count; i++){
152 (this->*fn)(status, i);
153 }
154 }
155
icu_forward_null(UErrorCode * status,int32_t i)156 void icu_forward_null(UErrorCode* status, int32_t i){
157 ucol_setText(iter, data->dataOf(i), -1, status);
158 while (ucol_next(iter, status) != UCOL_NULLORDER) exec_count++;
159 }
160
icu_forward_len(UErrorCode * status,int32_t i)161 void icu_forward_len(UErrorCode* status, int32_t i){
162 ucol_setText(iter, data->dataOf(i), data->lengthOf(i) , status);
163 while (ucol_next(iter, status) != UCOL_NULLORDER) exec_count++;
164 }
165
icu_backward_null(UErrorCode * status,int32_t i)166 void icu_backward_null(UErrorCode* status, int32_t i){
167 ucol_setText(iter, data->dataOf(i), -1, status);
168 while (ucol_previous(iter, status) != UCOL_NULLORDER) exec_count++;
169 }
170
icu_backward_len(UErrorCode * status,int32_t i)171 void icu_backward_len(UErrorCode* status, int32_t i){
172 ucol_setText(iter, data->dataOf(i), data->lengthOf(i) , status);
173 while (ucol_previous(iter, status) != UCOL_NULLORDER) exec_count++;
174 }
175 };
176
177 class CmdIterAll : public UPerfFunction {
178 typedef void (CmdIterAll::* Func)(UErrorCode* status);
179 int32_t count;
180 UChar * data;
181 Func fn;
182 UCollationElements *iter;
183 int32_t exec_count;
184
185 public:
186 enum CALL {forward_null, forward_len, backward_null, backward_len};
187
~CmdIterAll()188 ~CmdIterAll(){
189 ucol_closeElements(iter);
190 }
CmdIterAll(UErrorCode & status,UCollator * col,int32_t count,UChar * data,CALL call,int32_t,int32_t)191 CmdIterAll(UErrorCode & status, UCollator * col, int32_t count, UChar * data, CALL call,int32_t,int32_t)
192 :count(count),data(data)
193 {
194 exec_count = 0;
195 if (call == forward_null || call == backward_null) {
196 iter = ucol_openElements(col, data, -1, &status);
197 } else {
198 iter = ucol_openElements(col, data, count, &status);
199 }
200
201 if (call == forward_null || call == forward_len){
202 fn = &CmdIterAll::icu_forward_all;
203 } else {
204 fn = &CmdIterAll::icu_backward_all;
205 }
206 }
getOperationsPerIteration()207 virtual long getOperationsPerIteration(){return exec_count ? exec_count : 1;}
208
call(UErrorCode * status)209 virtual void call(UErrorCode* status){
210 (this->*fn)(status);
211 }
212
icu_forward_all(UErrorCode * status)213 void icu_forward_all(UErrorCode* status){
214 int strlen = count - 5;
215 int count5 = 5;
216 int strindex = 0;
217 ucol_setOffset(iter, strindex, status);
218 while (TRUE) {
219 if (ucol_next(iter, status) == UCOL_NULLORDER) {
220 break;
221 }
222 exec_count++;
223 count5 --;
224 if (count5 == 0) {
225 strindex += 10;
226 if (strindex > strlen) {
227 break;
228 }
229 ucol_setOffset(iter, strindex, status);
230 count5 = 5;
231 }
232 }
233 }
234
icu_backward_all(UErrorCode * status)235 void icu_backward_all(UErrorCode* status){
236 int strlen = count;
237 int count5 = 5;
238 int strindex = 5;
239 ucol_setOffset(iter, strindex, status);
240 while (TRUE) {
241 if (ucol_previous(iter, status) == UCOL_NULLORDER) {
242 break;
243 }
244 exec_count++;
245 count5 --;
246 if (count5 == 0) {
247 strindex += 10;
248 if (strindex > strlen) {
249 break;
250 }
251 ucol_setOffset(iter, strindex, status);
252 count5 = 5;
253 }
254 }
255 }
256
257 };
258
259 struct CmdQsort : public UPerfFunction{
260
q_randomCmdQsort261 static int q_random(const void * a, const void * b){
262 uint8_t * key_a = ((DataIndex *)a)->icu_key;
263 uint8_t * key_b = ((DataIndex *)b)->icu_key;
264
265 int val_a = 0;
266 int val_b = 0;
267 while (*key_a != 0) {val_a += val_a*37 + *key_a++;}
268 while (*key_b != 0) {val_b += val_b*37 + *key_b++;}
269 return val_a - val_b;
270 }
271
272 #define QCAST() \
273 DataIndex * da = (DataIndex *) a; \
274 DataIndex * db = (DataIndex *) b; \
275 ++exec_count
276
icu_strcoll_nullCmdQsort277 static int icu_strcoll_null(const void *a, const void *b){
278 QCAST();
279 return ucol_strcoll(da->col, da->icu_data, -1, db->icu_data, -1) - UCOL_EQUAL;
280 }
281
icu_strcoll_lenCmdQsort282 static int icu_strcoll_len(const void *a, const void *b){
283 QCAST();
284 return ucol_strcoll(da->col, da->icu_data, da->icu_data_len, db->icu_data, db->icu_data_len) - UCOL_EQUAL;
285 }
286
icu_cmpkeyCmdQsort287 static int icu_cmpkey (const void *a, const void *b){
288 QCAST();
289 return strcmp((char *) da->icu_key, (char *) db->icu_key);
290 }
291
win_cmp_nullCmdQsort292 static int win_cmp_null(const void *a, const void *b) {
293 QCAST();
294 //CSTR_LESS_THAN 1
295 //CSTR_EQUAL 2
296 //CSTR_GREATER_THAN 3
297 int t = CompareStringW(da->win_langid, 0, da->win_data, -1, db->win_data, -1);
298 if (t == 0){
299 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
300 exit(-1);
301 } else{
302 return t - CSTR_EQUAL;
303 }
304 }
305
win_cmp_lenCmdQsort306 static int win_cmp_len(const void *a, const void *b) {
307 QCAST();
308 int t = CompareStringW(da->win_langid, 0, da->win_data, da->win_data_len, db->win_data, db->win_data_len);
309 if (t == 0){
310 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
311 exit(-1);
312 } else{
313 return t - CSTR_EQUAL;
314 }
315 }
316
317 #define QFUNC(name, func, data) \
318 static int name (const void *a, const void *b){ \
319 QCAST(); \
320 return func(da->data, db->data); \
321 }
322
323 QFUNC(posix_strcoll_null, strcoll, posix_data)
324 QFUNC(posix_cmpkey, strcmp, posix_key)
325 QFUNC(win_cmpkey, strcmp, win_key)
326 QFUNC(win_wcscmp, wcscmp, win_data)
327 QFUNC(icu_strcmp, u_strcmp, icu_data)
328 QFUNC(icu_cmpcpo, u_strcmpCodePointOrder, icu_data)
329
330 private:
331 static int32_t exec_count; // potential muilt-thread problem
332
333 typedef int (* Func)(const void *, const void *);
334
335 Func fn;
336 void * base; //Start of target array.
337 int32_t num; //Array size in elements.
338 int32_t width; //Element size in bytes.
339
340 void * backup; //copy source of base
341 public:
CmdQsortCmdQsort342 CmdQsort(UErrorCode & status,void *theBase, int32_t num, int32_t width, Func fn, int32_t,int32_t)
343 :backup(theBase),num(num),width(width),fn(fn){
344 base = malloc(num * width);
345 time_empty(100, &status); // warm memory/cache
346 }
347
~CmdQsortCmdQsort348 ~CmdQsort(){
349 free(base);
350 }
351
empty_callCmdQsort352 void empty_call(){
353 exec_count = 0;
354 memcpy(base, backup, num * width);
355 }
356
time_emptyCmdQsort357 double time_empty(int32_t n, UErrorCode* status) {
358 UTimer start, stop;
359 utimer_getTime(&start);
360 while (n-- > 0) {
361 empty_call();
362 }
363 utimer_getTime(&stop);
364 return utimer_getDeltaSeconds(&start,&stop); // ms
365 }
366
callCmdQsort367 virtual void call(UErrorCode* status){
368 exec_count = 0;
369 memcpy(base, backup, num * width);
370 qsort(base, num, width, fn);
371 }
timeCmdQsort372 virtual double time(int32_t n, UErrorCode* status) {
373 double t1 = time_empty(n,status);
374 double t2 = UPerfFunction::time(n, status);
375 return t2-t1;// < 0 ? t2 : t2-t1;
376 }
377
getOperationsPerIterationCmdQsort378 virtual long getOperationsPerIteration(){ return exec_count?exec_count:1;}
379 };
380 int32_t CmdQsort::exec_count;
381
382
383 class CmdBinSearch : public UPerfFunction{
384 public:
385 typedef int (CmdBinSearch::* Func)(int, int);
386
387 UCollator * col;
388 DWORD win_langid;
389 int32_t count;
390 DataIndex * rnd;
391 DataIndex * ord;
392 Func fn;
393 int32_t exec_count;
394
CmdBinSearch(UErrorCode,UCollator * col,DWORD win_langid,int32_t count,DataIndex * rnd,DataIndex * ord,Func fn)395 CmdBinSearch(UErrorCode, UCollator * col,DWORD win_langid,int32_t count,DataIndex * rnd,DataIndex * ord,Func fn)
396 :col(col),win_langid(win_langid), count(count), rnd(rnd), ord(ord), fn(fn),exec_count(0){}
397
398
call(UErrorCode * status)399 virtual void call(UErrorCode* status){
400 exec_count = 0;
401 for(int32_t i = 0; i< count; i++){ // search all data
402 binary_search(i);
403 }
404 }
getOperationsPerIteration()405 virtual long getOperationsPerIteration(){ return exec_count?exec_count:1;}
406
binary_search(int32_t random)407 void binary_search(int32_t random) {
408 int low = 0;
409 int high = count - 1;
410 int guess;
411 int last_guess = -1;
412 int r;
413 while (TRUE) {
414 guess = (high + low)/2;
415 if (last_guess == guess) break; // nothing to search
416
417 r = (this->*fn)(random, guess);
418 exec_count++;
419
420 if (r == 0)
421 return; // found, search end.
422 if (r < 0) {
423 high = guess;
424 } else {
425 low = guess;
426 }
427 last_guess = guess;
428 }
429 }
430
icu_strcoll_null(int32_t i,int32_t j)431 int icu_strcoll_null(int32_t i, int32_t j){
432 return ucol_strcoll(col, rnd[i].icu_data, -1, ord[j].icu_data,-1);
433 }
434
icu_strcoll_len(int32_t i,int32_t j)435 int icu_strcoll_len(int32_t i, int32_t j){
436 return ucol_strcoll(col, rnd[i].icu_data, rnd[i].icu_data_len, ord[j].icu_data, ord[j].icu_data_len);
437 }
438
icu_cmpkey(int32_t i,int32_t j)439 int icu_cmpkey(int32_t i, int32_t j) {
440 return strcmp( (char *) rnd[i].icu_key, (char *) ord[j].icu_key );
441 }
442
win_cmp_null(int32_t i,int32_t j)443 int win_cmp_null(int32_t i, int32_t j) {
444 int t = CompareStringW(win_langid, 0, rnd[i].win_data, -1, ord[j].win_data, -1);
445 if (t == 0){
446 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
447 exit(-1);
448 } else{
449 return t - CSTR_EQUAL;
450 }
451 }
452
win_cmp_len(int32_t i,int32_t j)453 int win_cmp_len(int32_t i, int32_t j) {
454 int t = CompareStringW(win_langid, 0, rnd[i].win_data, rnd[i].win_data_len, ord[j].win_data, ord[j].win_data_len);
455 if (t == 0){
456 fprintf(stderr, "CompareStringW error, error number %x\n", GetLastError());
457 exit(-1);
458 } else{
459 return t - CSTR_EQUAL;
460 }
461 }
462
463 #define BFUNC(name, func, data) \
464 int name(int32_t i, int32_t j) { \
465 return func(rnd[i].data, ord[j].data); \
466 }
467
468 BFUNC(posix_strcoll_null, strcoll, posix_data)
469 BFUNC(posix_cmpkey, strcmp, posix_key)
470 BFUNC(win_cmpkey, strcmp, win_key)
471 BFUNC(win_wcscmp, wcscmp, win_data)
472 BFUNC(icu_strcmp, u_strcmp, icu_data)
473 BFUNC(icu_cmpcpo, u_strcmpCodePointOrder, icu_data)
474 };
475
476 class CollPerfTest : public UPerfTest {
477 public:
478 UCollator * col;
479 DWORD win_langid;
480
481 UChar * icu_data_all;
482 int32_t icu_data_all_len;
483
484 int32_t count;
485 CA_uchar * icu_data;
486 CA_uint8 * icu_key;
487 CA_char * posix_data;
488 CA_char * posix_key;
489 CA_win_wchar * win_data;
490 CA_char * win_key;
491
492 DataIndex * rnd_index; // random by icu key
493 DataIndex * ord_win_data;
494 DataIndex * ord_win_key;
495 DataIndex * ord_posix_data;
496 DataIndex * ord_posix_key;
497 DataIndex * ord_icu_data;
498 DataIndex * ord_icu_key;
499 DataIndex * ord_win_wcscmp;
500 DataIndex * ord_icu_strcmp;
501 DataIndex * ord_icu_cmpcpo;
502
~CollPerfTest()503 virtual ~CollPerfTest(){
504 ucol_close(col);
505 delete [] icu_data_all;
506 delete icu_data;
507 delete icu_key;
508 delete posix_data;
509 delete posix_key;
510 delete win_data;
511 delete win_key;
512 delete[] rnd_index;
513 delete[] ord_win_data;
514 delete[] ord_win_key;
515 delete[] ord_posix_data;
516 delete[] ord_posix_key;
517 delete[] ord_icu_data;
518 delete[] ord_icu_key;
519 delete[] ord_win_wcscmp;
520 delete[] ord_icu_strcmp;
521 delete[] ord_icu_cmpcpo;
522 }
523
CollPerfTest(int32_t argc,const char * argv[],UErrorCode & status)524 CollPerfTest(int32_t argc, const char* argv[], UErrorCode& status):UPerfTest(argc, argv, status){
525 col = NULL;
526 icu_data_all = NULL;
527 icu_data = NULL;
528 icu_key = NULL;
529 posix_data = NULL;
530 posix_key = NULL;
531 win_data =NULL;
532 win_key = NULL;
533
534 rnd_index = NULL;
535 ord_win_data= NULL;
536 ord_win_key= NULL;
537 ord_posix_data= NULL;
538 ord_posix_key= NULL;
539 ord_icu_data= NULL;
540 ord_icu_key= NULL;
541 ord_win_wcscmp = NULL;
542 ord_icu_strcmp = NULL;
543 ord_icu_cmpcpo = NULL;
544
545 if (U_FAILURE(status)){
546 return;
547 }
548
549 // Parse additional arguments
550
551 UOption options[] = {
552 UOPTION_DEF("langid", 'i', UOPT_REQUIRES_ARG), // Windows Language ID number.
553 UOPTION_DEF("rulefile", 'r', UOPT_REQUIRES_ARG), // --rulefile <filename>
554 // Collation related arguments. All are optional.
555 // To simplify parsing, two choice arguments are disigned as NO_ARG.
556 // The default value is UPPER word in the comment
557 UOPTION_DEF("c_french", 'f', UOPT_NO_ARG), // --french <on | OFF>
558 UOPTION_DEF("c_alternate", 'a', UOPT_NO_ARG), // --alternate <NON_IGNORE | shifted>
559 UOPTION_DEF("c_casefirst", 'c', UOPT_REQUIRES_ARG), // --casefirst <lower | upper | OFF>
560 UOPTION_DEF("c_caselevel", 'l', UOPT_NO_ARG), // --caselevel <on | OFF>
561 UOPTION_DEF("c_normal", 'n', UOPT_NO_ARG), // --normal <on | OFF>
562 UOPTION_DEF("c_strength", 's', UOPT_REQUIRES_ARG), // --strength <1-5>
563 };
564 int32_t opt_len = (sizeof(options)/sizeof(options[0]));
565 enum {i, r,f,a,c,l,n,s}; // The buffer between the option items' order and their references
566
567 _remainingArgc = u_parseArgs(_remainingArgc, (char**)argv, opt_len, options);
568
569 if (_remainingArgc < 0){
570 status = U_ILLEGAL_ARGUMENT_ERROR;
571 return;
572 }
573
574 if (locale == NULL){
575 locale = "en_US"; // set default locale
576 }
577
578 //#ifdef U_WINDOWS
579 if (options[i].doesOccur) {
580 char *endp;
581 int tmp = strtol(options[i].value, &endp, 0);
582 if (endp == options[i].value) {
583 status = U_ILLEGAL_ARGUMENT_ERROR;
584 return;
585 }
586 win_langid = MAKELCID(tmp, SORT_DEFAULT);
587 } else {
588 win_langid = uloc_getLCID(locale);
589 }
590 //#endif
591
592 // Set up an ICU collator
593 if (options[r].doesOccur) {
594 // TODO: implement it
595 } else {
596 col = ucol_open(locale, &status);
597 if (U_FAILURE(status)) {
598 return;
599 }
600 }
601
602 if (options[f].doesOccur) {
603 ucol_setAttribute(col, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
604 } else {
605 ucol_setAttribute(col, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
606 }
607
608 if (options[a].doesOccur) {
609 ucol_setAttribute(col, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
610 }
611
612 if (options[c].doesOccur) { // strcmp() has i18n encoding problem
613 if (strcmp("lower", options[c].value) == 0){
614 ucol_setAttribute(col, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, &status);
615 } else if (strcmp("upper", options[c].value) == 0) {
616 ucol_setAttribute(col, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, &status);
617 } else {
618 status = U_ILLEGAL_ARGUMENT_ERROR;
619 return;
620 }
621 }
622
623 if (options[l].doesOccur){
624 ucol_setAttribute(col, UCOL_CASE_LEVEL, UCOL_ON, &status);
625 }
626
627 if (options[n].doesOccur){
628 ucol_setAttribute(col, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
629 }
630
631 if (options[s].doesOccur) {
632 char *endp;
633 int tmp = strtol(options[l].value, &endp, 0);
634 if (endp == options[l].value) {
635 status = U_ILLEGAL_ARGUMENT_ERROR;
636 return;
637 }
638 switch (tmp) {
639 case 1: ucol_setAttribute(col, UCOL_STRENGTH, UCOL_PRIMARY, &status); break;
640 case 2: ucol_setAttribute(col, UCOL_STRENGTH, UCOL_SECONDARY, &status); break;
641 case 3: ucol_setAttribute(col, UCOL_STRENGTH, UCOL_TERTIARY, &status); break;
642 case 4: ucol_setAttribute(col, UCOL_STRENGTH, UCOL_QUATERNARY, &status); break;
643 case 5: ucol_setAttribute(col, UCOL_STRENGTH, UCOL_IDENTICAL, &status); break;
644 default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
645 }
646 }
647 prepareData(status);
648 }
649
650 //to avoid use the annoying 'id' in TESTCASE(id,test) macro or the like
651 #define TEST(testname, classname, arg1, arg2, arg3, arg4, arg5, arg6) \
652 if(temp == index) {\
653 name = #testname;\
654 if (exec) {\
655 UErrorCode status = U_ZERO_ERROR;\
656 UPerfFunction * t = new classname(status,arg1, arg2, arg3, arg4, arg5, arg6);\
657 if (U_FAILURE(status)) {\
658 delete t;\
659 return NULL;\
660 } else {\
661 return t;\
662 }\
663 } else {\
664 return NULL;\
665 }\
666 }\
667 temp++\
668
669
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par=NULL)670 virtual UPerfFunction* runIndexedTest( /*[in]*/int32_t index, /*[in]*/UBool exec, /*[out]*/const char* &name, /*[in]*/ char* par = NULL ){
671 int temp = 0;
672
673 #define TEST_KEYGEN(testname, func)\
674 TEST(testname, CmdKeyGen, col, win_langid, count, rnd_index, &CmdKeyGen::func, 0)
675 TEST_KEYGEN(TestIcu_KeyGen_null, icu_key_null);
676 TEST_KEYGEN(TestIcu_KeyGen_len, icu_key_len);
677 TEST_KEYGEN(TestPosix_KeyGen_null, posix_key_null);
678 TEST_KEYGEN(TestWin_KeyGen_null, win_key_null);
679 TEST_KEYGEN(TestWin_KeyGen_len, win_key_len);
680
681 #define TEST_ITER(testname, func)\
682 TEST(testname, CmdIter, col, count, icu_data, &CmdIter::func,0,0)
683 TEST_ITER(TestIcu_ForwardIter_null, icu_forward_null);
684 TEST_ITER(TestIcu_ForwardIter_len, icu_forward_len);
685 TEST_ITER(TestIcu_BackwardIter_null, icu_backward_null);
686 TEST_ITER(TestIcu_BackwardIter_len, icu_backward_len);
687
688 #define TEST_ITER_ALL(testname, func)\
689 TEST(testname, CmdIterAll, col, icu_data_all_len, icu_data_all, CmdIterAll::func,0,0)
690 TEST_ITER_ALL(TestIcu_ForwardIter_all_null, forward_null);
691 TEST_ITER_ALL(TestIcu_ForwardIter_all_len, forward_len);
692 TEST_ITER_ALL(TestIcu_BackwardIter_all_null, backward_null);
693 TEST_ITER_ALL(TestIcu_BackwardIter_all_len, backward_len);
694
695 #define TEST_QSORT(testname, func)\
696 TEST(testname, CmdQsort, rnd_index, count, sizeof(DataIndex), CmdQsort::func,0,0)
697 TEST_QSORT(TestIcu_qsort_strcoll_null, icu_strcoll_null);
698 TEST_QSORT(TestIcu_qsort_strcoll_len, icu_strcoll_len);
699 TEST_QSORT(TestIcu_qsort_usekey, icu_cmpkey);
700 TEST_QSORT(TestPosix_qsort_strcoll_null, posix_strcoll_null);
701 TEST_QSORT(TestPosix_qsort_usekey, posix_cmpkey);
702 TEST_QSORT(TestWin_qsort_CompareStringW_null, win_cmp_null);
703 TEST_QSORT(TestWin_qsort_CompareStringW_len, win_cmp_len);
704 TEST_QSORT(TestWin_qsort_usekey, win_cmpkey);
705
706 #define TEST_BIN(testname, func)\
707 TEST(testname, CmdBinSearch, col, win_langid, count, rnd_index, ord_icu_key, &CmdBinSearch::func)
708 TEST_BIN(TestIcu_BinarySearch_strcoll_null, icu_strcoll_null);
709 TEST_BIN(TestIcu_BinarySearch_strcoll_len, icu_strcoll_len);
710 TEST_BIN(TestIcu_BinarySearch_usekey, icu_cmpkey);
711 TEST_BIN(TestIcu_BinarySearch_strcmp, icu_strcmp);
712 TEST_BIN(TestIcu_BinarySearch_cmpCPO, icu_cmpcpo);
713 TEST_BIN(TestPosix_BinarySearch_strcoll_null, posix_strcoll_null);
714 TEST_BIN(TestPosix_BinarySearch_usekey, posix_cmpkey);
715 TEST_BIN(TestWin_BinarySearch_CompareStringW_null, win_cmp_null);
716 TEST_BIN(TestWin_BinarySearch_CompareStringW_len, win_cmp_len);
717 TEST_BIN(TestWin_BinarySearch_usekey, win_cmpkey);
718 TEST_BIN(TestWin_BinarySearch_wcscmp, win_wcscmp);
719
720 name="";
721 return NULL;
722 }
723
724
725
prepareData(UErrorCode & status)726 void prepareData(UErrorCode& status){
727 if(U_FAILURE(status)) return;
728 if (icu_data) return; // prepared
729
730 icu_data = new CA_uchar();
731
732 // Following code is borrowed from UPerfTest::getLines();
733 const UChar* line=NULL;
734 int32_t len =0;
735 for (;;) {
736 line = ucbuf_readline(ucharBuf,&len,&status);
737 if(line == NULL || U_FAILURE(status)){break;}
738
739 // Refer to the source code of ucbuf_readline()
740 // 1. 'len' includs the line terminal symbols
741 // 2. The length of the line terminal symbols is only one character
742 // 3. The Windows CR LF line terminal symbols will be converted to CR
743
744 if (len == 1) {
745 continue; //skip empty line
746 } else {
747 icu_data->append_one(len);
748 memcpy(icu_data->last(), line, len * sizeof(UChar));
749 icu_data->last()[len -1] = NULL;
750 }
751 }
752 if(U_FAILURE(status)) return;
753
754 // UTF-16 -> UTF-8 conversion.
755 UConverter *conv = ucnv_open("utf-8", &status); // just UTF-8 for now.
756 if (U_FAILURE(status)) return;
757
758 count = icu_data->count;
759
760 icu_data_all_len = icu_data->index[count]; // includes all NULLs
761 icu_data_all_len -= count; // excludes all NULLs
762 icu_data_all_len += 1; // the terminal NULL
763 icu_data_all = new UChar[icu_data_all_len];
764 icu_data_all[icu_data_all_len - 1] = 0; //the terminal NULL
765
766 icu_key = new CA_uint8;
767 win_data = new CA_win_wchar;
768 win_key = new CA_char;
769 posix_data = new CA_char;
770 posix_key = new CA_char;
771 rnd_index = new DataIndex[count];
772 DataIndex::win_langid = win_langid;
773 DataIndex::col = col;
774
775
776 UChar * p = icu_data_all;
777 int32_t s;
778 int32_t t;
779 for (int i=0; i < count; i++) {
780 // ICU all data
781 s = sizeof(UChar) * icu_data->lengthOf(i);
782 memcpy(p, icu_data->dataOf(i), s);
783 p += icu_data->lengthOf(i);
784
785 // ICU data
786
787 // ICU key
788 s = ucol_getSortKey(col, icu_data->dataOf(i), -1,NULL, 0);
789 icu_key->append_one(s);
790 t = ucol_getSortKey(col, icu_data->dataOf(i), -1,icu_key->last(), s);
791 if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
792
793 // POSIX data
794 s = ucnv_fromUChars(conv,NULL, 0, icu_data->dataOf(i), icu_data->lengthOf(i), &status);
795 if (status == U_BUFFER_OVERFLOW_ERROR || status == U_ZERO_ERROR){
796 status = U_ZERO_ERROR;
797 } else {
798 return;
799 }
800 posix_data->append_one(s + 1); // plus terminal NULL
801 t = ucnv_fromUChars(conv,posix_data->last(), s, icu_data->dataOf(i), icu_data->lengthOf(i), &status);
802 if (U_FAILURE(status)) return;
803 if ( t != s){status = U_INVALID_FORMAT_ERROR;return;}
804 posix_data->last()[s] = 0;
805
806 // POSIX key
807 s = strxfrm(NULL, posix_data->dataOf(i), 0);
808 if (s == INT_MAX){status = U_INVALID_FORMAT_ERROR;return;}
809 posix_key->append_one(s);
810 t = strxfrm(posix_key->last(), posix_data->dataOf(i), s);
811 if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
812
813 // Win data
814 s = icu_data->lengthOf(i) + 1; // plus terminal NULL
815 win_data->append_one(s);
816 memcpy(win_data->last(), icu_data->dataOf(i), sizeof(WCHAR) * s);
817
818 // Win key
819 s = LCMapStringW(win_langid, LCMAP_SORTKEY, win_data->dataOf(i), win_data->lengthOf(i), NULL,0);
820 if (s == 0) {status = U_INVALID_FORMAT_ERROR;return;}
821 win_key->append_one(s);
822 t = LCMapStringW(win_langid, LCMAP_SORTKEY, win_data->dataOf(i), win_data->lengthOf(i), (WCHAR *)(win_key->last()),s);
823 if (t != s) {status = U_INVALID_FORMAT_ERROR;return;}
824
825 };
826
827 // append_one() will make points shifting, should not merge following code into previous iteration
828 for (int i=0; i < count; i++) {
829 rnd_index[i].icu_key = icu_key->dataOf(i);
830 rnd_index[i].icu_data = icu_data->dataOf(i);
831 rnd_index[i].icu_data_len = icu_data->lengthOf(i);
832 rnd_index[i].posix_key = posix_key->last();
833 rnd_index[i].posix_data = posix_data->dataOf(i);
834 rnd_index[i].posix_data_len = posix_data->lengthOf(i);
835 rnd_index[i].win_key = win_key->dataOf(i);
836 rnd_index[i].win_data = win_data->dataOf(i);
837 rnd_index[i].win_data_len = win_data->lengthOf(i);
838 };
839
840 ucnv_close(conv);
841 qsort(rnd_index, count, sizeof(DataIndex), CmdQsort::q_random);
842
843 #define SORT(data, func) \
844 data = new DataIndex[count];\
845 memcpy(data, rnd_index, count * sizeof(DataIndex));\
846 qsort(data, count, sizeof(DataIndex), CmdQsort::func)
847
848 SORT(ord_icu_data, icu_strcoll_len);
849 SORT(ord_icu_key, icu_cmpkey);
850 SORT(ord_posix_data, posix_strcoll_null);
851 SORT(ord_posix_key, posix_cmpkey);
852 SORT(ord_win_data, win_cmp_len);
853 SORT(ord_win_key, win_cmpkey);
854 SORT(ord_win_wcscmp, win_wcscmp);
855 SORT(ord_icu_strcmp, icu_strcmp);
856 SORT(ord_icu_cmpcpo, icu_cmpcpo);
857 }
858 };
859
860
main(int argc,const char * argv[])861 int main(int argc, const char *argv[])
862 {
863
864 UErrorCode status = U_ZERO_ERROR;
865 CollPerfTest test(argc, argv, status);
866
867 if (U_FAILURE(status)){
868 printf("The error is %s\n", u_errorName(status));
869 //TODO: print usage here
870 return status;
871 }
872
873 if (test.run() == FALSE){
874 fprintf(stderr, "FAILED: Tests could not be run please check the "
875 "arguments.\n");
876 return -1;
877 }
878 return 0;
879 }
880
881