1 /*
2 ***********************************************************************
3 * © 2016 and later: Unicode, Inc. and others.
4 * License & terms of use: http://www.unicode.org/copyright.html
5 ***********************************************************************
6 ***********************************************************************
7 * Copyright (c) 2013-2014, International Business Machines
8 * Corporation and others. All Rights Reserved.
9 ***********************************************************************
10 */
11
12 #include <string.h>
13 #include "unicode/localpointer.h"
14 #include "unicode/uperf.h"
15 #include "unicode/ucol.h"
16 #include "unicode/coll.h"
17 #include "unicode/uiter.h"
18 #include "unicode/ustring.h"
19 #include "unicode/sortkey.h"
20 #include "uarrsort.h"
21 #include "uoptions.h"
22 #include "ustr_imp.h"
23
24 #define COMPACT_ARRAY(CompactArrays, UNIT) \
25 struct CompactArrays{\
26 CompactArrays(const CompactArrays & );\
27 CompactArrays & operator=(const CompactArrays & );\
28 int32_t count;/*total number of the strings*/ \
29 int32_t * index;/*relative offset in data*/ \
30 UNIT * data; /*the real space to hold strings*/ \
31 \
32 ~CompactArrays(){free(index);free(data);} \
33 CompactArrays() : count(0), index(NULL), data(NULL) { \
34 index = (int32_t *) realloc(index, sizeof(int32_t)); \
35 index[0] = 0; \
36 } \
37 void append_one(int32_t theLen){ /*include terminal NULL*/ \
38 count++; \
39 index = (int32_t *) realloc(index, sizeof(int32_t) * (count + 1)); \
40 index[count] = index[count - 1] + theLen; \
41 data = (UNIT *) realloc(data, sizeof(UNIT) * index[count]); \
42 } \
43 UNIT * last(){return data + index[count - 1];} \
44 const UNIT * dataOf(int32_t i) const {return data + index[i];} \
45 int32_t lengthOf(int i) const {return index[i+1] - index[i] - 1; } /*exclude terminating NULL*/ \
46 };
47
48 COMPACT_ARRAY(CA_uchar, UChar)
49 COMPACT_ARRAY(CA_char, char)
50
51 #define MAX_TEST_STRINGS_FOR_PERMUTING 1000
52
53 // C API test cases
54
55 //
56 // Test case taking a single test data array, calling ucol_strcoll by permuting the test data
57 //
58 class Strcoll : public UPerfFunction
59 {
60 public:
61 Strcoll(const UCollator* coll, const CA_uchar* source, UBool useLen);
62 ~Strcoll();
63 virtual void call(UErrorCode* status);
64 virtual long getOperationsPerIteration();
65
66 private:
67 const UCollator *coll;
68 const CA_uchar *source;
69 UBool useLen;
70 int32_t maxTestStrings;
71 };
72
Strcoll(const UCollator * coll,const CA_uchar * source,UBool useLen)73 Strcoll::Strcoll(const UCollator* coll, const CA_uchar* source, UBool useLen)
74 : coll(coll),
75 source(source),
76 useLen(useLen)
77 {
78 maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count;
79 }
80
~Strcoll()81 Strcoll::~Strcoll()
82 {
83 }
84
call(UErrorCode * status)85 void Strcoll::call(UErrorCode* status)
86 {
87 if (U_FAILURE(*status)) return;
88
89 // call strcoll for permutation
90 int32_t divisor = source->count / maxTestStrings;
91 int32_t srcLen, tgtLen;
92 int32_t cmp = 0;
93 for (int32_t i = 0, numTestStringsI = 0; i < source->count && numTestStringsI < maxTestStrings; i++) {
94 if (i % divisor) continue;
95 numTestStringsI++;
96 srcLen = useLen ? source->lengthOf(i) : -1;
97 for (int32_t j = 0, numTestStringsJ = 0; j < source->count && numTestStringsJ < maxTestStrings; j++) {
98 if (j % divisor) continue;
99 numTestStringsJ++;
100 tgtLen = useLen ? source->lengthOf(j) : -1;
101 cmp += ucol_strcoll(coll, source->dataOf(i), srcLen, source->dataOf(j), tgtLen);
102 }
103 }
104 // At the end, cmp must be 0
105 if (cmp != 0) {
106 *status = U_INTERNAL_PROGRAM_ERROR;
107 }
108 }
109
getOperationsPerIteration()110 long Strcoll::getOperationsPerIteration()
111 {
112 return maxTestStrings * maxTestStrings;
113 }
114
115 //
116 // Test case taking two test data arrays, calling ucol_strcoll for strings at a same index
117 //
118 class Strcoll_2 : public UPerfFunction
119 {
120 public:
121 Strcoll_2(const UCollator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen);
122 ~Strcoll_2();
123 virtual void call(UErrorCode* status);
124 virtual long getOperationsPerIteration();
125
126 private:
127 const UCollator *coll;
128 const CA_uchar *source;
129 const CA_uchar *target;
130 UBool useLen;
131 };
132
Strcoll_2(const UCollator * coll,const CA_uchar * source,const CA_uchar * target,UBool useLen)133 Strcoll_2::Strcoll_2(const UCollator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen)
134 : coll(coll),
135 source(source),
136 target(target),
137 useLen(useLen)
138 {
139 }
140
~Strcoll_2()141 Strcoll_2::~Strcoll_2()
142 {
143 }
144
call(UErrorCode * status)145 void Strcoll_2::call(UErrorCode* status)
146 {
147 if (U_FAILURE(*status)) return;
148
149 // call strcoll for two strings at the same index
150 if (source->count < target->count) {
151 *status = U_ILLEGAL_ARGUMENT_ERROR;
152 } else {
153 for (int32_t i = 0; i < source->count; i++) {
154 int32_t srcLen = useLen ? source->lengthOf(i) : -1;
155 int32_t tgtLen = useLen ? target->lengthOf(i) : -1;
156 ucol_strcoll(coll, source->dataOf(i), srcLen, target->dataOf(i), tgtLen);
157 }
158 }
159 }
160
getOperationsPerIteration()161 long Strcoll_2::getOperationsPerIteration()
162 {
163 return source->count;
164 }
165
166
167 //
168 // Test case taking a single test data array, calling ucol_strcollUTF8 by permuting the test data
169 //
170 class StrcollUTF8 : public UPerfFunction
171 {
172 public:
173 StrcollUTF8(const UCollator* coll, const CA_char* source, UBool useLen);
174 ~StrcollUTF8();
175 virtual void call(UErrorCode* status);
176 virtual long getOperationsPerIteration();
177
178 private:
179 const UCollator *coll;
180 const CA_char *source;
181 UBool useLen;
182 int32_t maxTestStrings;
183 };
184
StrcollUTF8(const UCollator * coll,const CA_char * source,UBool useLen)185 StrcollUTF8::StrcollUTF8(const UCollator* coll, const CA_char* source, UBool useLen)
186 : coll(coll),
187 source(source),
188 useLen(useLen)
189 {
190 maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count;
191 }
192
~StrcollUTF8()193 StrcollUTF8::~StrcollUTF8()
194 {
195 }
196
call(UErrorCode * status)197 void StrcollUTF8::call(UErrorCode* status)
198 {
199 if (U_FAILURE(*status)) return;
200
201 // call strcollUTF8 for permutation
202 int32_t divisor = source->count / maxTestStrings;
203 int32_t srcLen, tgtLen;
204 int32_t cmp = 0;
205 for (int32_t i = 0, numTestStringsI = 0; U_SUCCESS(*status) && i < source->count && numTestStringsI < maxTestStrings; i++) {
206 if (i % divisor) continue;
207 numTestStringsI++;
208 srcLen = useLen ? source->lengthOf(i) : -1;
209 for (int32_t j = 0, numTestStringsJ = 0; U_SUCCESS(*status) && j < source->count && numTestStringsJ < maxTestStrings; j++) {
210 if (j % divisor) continue;
211 numTestStringsJ++;
212 tgtLen = useLen ? source->lengthOf(j) : -1;
213 cmp += ucol_strcollUTF8(coll, source->dataOf(i), srcLen, source->dataOf(j), tgtLen, status);
214 }
215 }
216 // At the end, cmp must be 0
217 if (cmp != 0) {
218 *status = U_INTERNAL_PROGRAM_ERROR;
219 }
220 }
221
getOperationsPerIteration()222 long StrcollUTF8::getOperationsPerIteration()
223 {
224 return maxTestStrings * maxTestStrings;
225 }
226
227 //
228 // Test case taking two test data arrays, calling ucol_strcoll for strings at a same index
229 //
230 class StrcollUTF8_2 : public UPerfFunction
231 {
232 public:
233 StrcollUTF8_2(const UCollator* coll, const CA_char* source, const CA_char* target, UBool useLen);
234 ~StrcollUTF8_2();
235 virtual void call(UErrorCode* status);
236 virtual long getOperationsPerIteration();
237
238 private:
239 const UCollator *coll;
240 const CA_char *source;
241 const CA_char *target;
242 UBool useLen;
243 };
244
StrcollUTF8_2(const UCollator * coll,const CA_char * source,const CA_char * target,UBool useLen)245 StrcollUTF8_2::StrcollUTF8_2(const UCollator* coll, const CA_char* source, const CA_char* target, UBool useLen)
246 : coll(coll),
247 source(source),
248 target(target),
249 useLen(useLen)
250 {
251 }
252
~StrcollUTF8_2()253 StrcollUTF8_2::~StrcollUTF8_2()
254 {
255 }
256
call(UErrorCode * status)257 void StrcollUTF8_2::call(UErrorCode* status)
258 {
259 if (U_FAILURE(*status)) return;
260
261 // call strcoll for two strings at the same index
262 if (source->count < target->count) {
263 *status = U_ILLEGAL_ARGUMENT_ERROR;
264 } else {
265 for (int32_t i = 0; U_SUCCESS(*status) && i < source->count; i++) {
266 int32_t srcLen = useLen ? source->lengthOf(i) : -1;
267 int32_t tgtLen = useLen ? target->lengthOf(i) : -1;
268 ucol_strcollUTF8(coll, source->dataOf(i), srcLen, target->dataOf(i), tgtLen, status);
269 }
270 }
271 }
272
getOperationsPerIteration()273 long StrcollUTF8_2::getOperationsPerIteration()
274 {
275 return source->count;
276 }
277
278 //
279 // Test case taking a single test data array, calling ucol_getSortKey for each
280 //
281 class GetSortKey : public UPerfFunction
282 {
283 public:
284 GetSortKey(const UCollator* coll, const CA_uchar* source, UBool useLen);
285 ~GetSortKey();
286 virtual void call(UErrorCode* status);
287 virtual long getOperationsPerIteration();
288
289 private:
290 const UCollator *coll;
291 const CA_uchar *source;
292 UBool useLen;
293 };
294
GetSortKey(const UCollator * coll,const CA_uchar * source,UBool useLen)295 GetSortKey::GetSortKey(const UCollator* coll, const CA_uchar* source, UBool useLen)
296 : coll(coll),
297 source(source),
298 useLen(useLen)
299 {
300 }
301
~GetSortKey()302 GetSortKey::~GetSortKey()
303 {
304 }
305
306 #define KEY_BUF_SIZE 512
307
call(UErrorCode * status)308 void GetSortKey::call(UErrorCode* status)
309 {
310 if (U_FAILURE(*status)) return;
311
312 uint8_t key[KEY_BUF_SIZE];
313 int32_t len;
314
315 if (useLen) {
316 for (int32_t i = 0; i < source->count; i++) {
317 len = ucol_getSortKey(coll, source->dataOf(i), source->lengthOf(i), key, KEY_BUF_SIZE);
318 }
319 } else {
320 for (int32_t i = 0; i < source->count; i++) {
321 len = ucol_getSortKey(coll, source->dataOf(i), -1, key, KEY_BUF_SIZE);
322 }
323 }
324 }
325
getOperationsPerIteration()326 long GetSortKey::getOperationsPerIteration()
327 {
328 return source->count;
329 }
330
331 //
332 // Test case taking a single test data array in UTF-16, calling ucol_nextSortKeyPart for each for the
333 // given buffer size
334 //
335 class NextSortKeyPart : public UPerfFunction
336 {
337 public:
338 NextSortKeyPart(const UCollator* coll, const CA_uchar* source, int32_t bufSize, int32_t maxIteration = -1);
339 ~NextSortKeyPart();
340 virtual void call(UErrorCode* status);
341 virtual long getOperationsPerIteration();
342 virtual long getEventsPerIteration();
343
344 private:
345 const UCollator *coll;
346 const CA_uchar *source;
347 int32_t bufSize;
348 int32_t maxIteration;
349 long events;
350 };
351
352 // Note: maxIteration = -1 -> repeat until the end of collation key
NextSortKeyPart(const UCollator * coll,const CA_uchar * source,int32_t bufSize,int32_t maxIteration)353 NextSortKeyPart::NextSortKeyPart(const UCollator* coll, const CA_uchar* source, int32_t bufSize, int32_t maxIteration /* = -1 */)
354 : coll(coll),
355 source(source),
356 bufSize(bufSize),
357 maxIteration(maxIteration),
358 events(0)
359 {
360 }
361
~NextSortKeyPart()362 NextSortKeyPart::~NextSortKeyPart()
363 {
364 }
365
call(UErrorCode * status)366 void NextSortKeyPart::call(UErrorCode* status)
367 {
368 if (U_FAILURE(*status)) return;
369
370 uint8_t *part = (uint8_t *)malloc(bufSize);
371 uint32_t state[2];
372 UCharIterator iter;
373
374 events = 0;
375 for (int i = 0; i < source->count && U_SUCCESS(*status); i++) {
376 uiter_setString(&iter, source->dataOf(i), source->lengthOf(i));
377 state[0] = 0;
378 state[1] = 0;
379 int32_t partLen = bufSize;
380 for (int32_t n = 0; U_SUCCESS(*status) && partLen == bufSize && (maxIteration < 0 || n < maxIteration); n++) {
381 partLen = ucol_nextSortKeyPart(coll, &iter, state, part, bufSize, status);
382 events++;
383 }
384 }
385 free(part);
386 }
387
getOperationsPerIteration()388 long NextSortKeyPart::getOperationsPerIteration()
389 {
390 return source->count;
391 }
392
getEventsPerIteration()393 long NextSortKeyPart::getEventsPerIteration()
394 {
395 return events;
396 }
397
398 //
399 // Test case taking a single test data array in UTF-8, calling ucol_nextSortKeyPart for each for the
400 // given buffer size
401 //
402 class NextSortKeyPartUTF8 : public UPerfFunction
403 {
404 public:
405 NextSortKeyPartUTF8(const UCollator* coll, const CA_char* source, int32_t bufSize, int32_t maxIteration = -1);
406 ~NextSortKeyPartUTF8();
407 virtual void call(UErrorCode* status);
408 virtual long getOperationsPerIteration();
409 virtual long getEventsPerIteration();
410
411 private:
412 const UCollator *coll;
413 const CA_char *source;
414 int32_t bufSize;
415 int32_t maxIteration;
416 long events;
417 };
418
419 // Note: maxIteration = -1 -> repeat until the end of collation key
NextSortKeyPartUTF8(const UCollator * coll,const CA_char * source,int32_t bufSize,int32_t maxIteration)420 NextSortKeyPartUTF8::NextSortKeyPartUTF8(const UCollator* coll, const CA_char* source, int32_t bufSize, int32_t maxIteration /* = -1 */)
421 : coll(coll),
422 source(source),
423 bufSize(bufSize),
424 maxIteration(maxIteration),
425 events(0)
426 {
427 }
428
~NextSortKeyPartUTF8()429 NextSortKeyPartUTF8::~NextSortKeyPartUTF8()
430 {
431 }
432
call(UErrorCode * status)433 void NextSortKeyPartUTF8::call(UErrorCode* status)
434 {
435 if (U_FAILURE(*status)) return;
436
437 uint8_t *part = (uint8_t *)malloc(bufSize);
438 uint32_t state[2];
439 UCharIterator iter;
440
441 events = 0;
442 for (int i = 0; i < source->count && U_SUCCESS(*status); i++) {
443 uiter_setUTF8(&iter, source->dataOf(i), source->lengthOf(i));
444 state[0] = 0;
445 state[1] = 0;
446 int32_t partLen = bufSize;
447 for (int32_t n = 0; U_SUCCESS(*status) && partLen == bufSize && (maxIteration < 0 || n < maxIteration); n++) {
448 partLen = ucol_nextSortKeyPart(coll, &iter, state, part, bufSize, status);
449 events++;
450 }
451 }
452 free(part);
453 }
454
getOperationsPerIteration()455 long NextSortKeyPartUTF8::getOperationsPerIteration()
456 {
457 return source->count;
458 }
459
getEventsPerIteration()460 long NextSortKeyPartUTF8::getEventsPerIteration()
461 {
462 return events;
463 }
464
465 // CPP API test cases
466
467 //
468 // Test case taking a single test data array, calling Collator::compare by permuting the test data
469 //
470 class CppCompare : public UPerfFunction
471 {
472 public:
473 CppCompare(const Collator* coll, const CA_uchar* source, UBool useLen);
474 ~CppCompare();
475 virtual void call(UErrorCode* status);
476 virtual long getOperationsPerIteration();
477
478 private:
479 const Collator *coll;
480 const CA_uchar *source;
481 UBool useLen;
482 int32_t maxTestStrings;
483 };
484
CppCompare(const Collator * coll,const CA_uchar * source,UBool useLen)485 CppCompare::CppCompare(const Collator* coll, const CA_uchar* source, UBool useLen)
486 : coll(coll),
487 source(source),
488 useLen(useLen)
489 {
490 maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count;
491 }
492
~CppCompare()493 CppCompare::~CppCompare()
494 {
495 }
496
call(UErrorCode * status)497 void CppCompare::call(UErrorCode* status) {
498 if (U_FAILURE(*status)) return;
499
500 // call compare for permutation of test data
501 int32_t divisor = source->count / maxTestStrings;
502 int32_t srcLen, tgtLen;
503 int32_t cmp = 0;
504 for (int32_t i = 0, numTestStringsI = 0; i < source->count && numTestStringsI < maxTestStrings; i++) {
505 if (i % divisor) continue;
506 numTestStringsI++;
507 srcLen = useLen ? source->lengthOf(i) : -1;
508 for (int32_t j = 0, numTestStringsJ = 0; j < source->count && numTestStringsJ < maxTestStrings; j++) {
509 if (j % divisor) continue;
510 numTestStringsJ++;
511 tgtLen = useLen ? source->lengthOf(j) : -1;
512 cmp += coll->compare(source->dataOf(i), srcLen, source->dataOf(j), tgtLen);
513 }
514 }
515 // At the end, cmp must be 0
516 if (cmp != 0) {
517 *status = U_INTERNAL_PROGRAM_ERROR;
518 }
519 }
520
getOperationsPerIteration()521 long CppCompare::getOperationsPerIteration()
522 {
523 return maxTestStrings * maxTestStrings;
524 }
525
526 //
527 // Test case taking two test data arrays, calling Collator::compare for strings at a same index
528 //
529 class CppCompare_2 : public UPerfFunction
530 {
531 public:
532 CppCompare_2(const Collator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen);
533 ~CppCompare_2();
534 virtual void call(UErrorCode* status);
535 virtual long getOperationsPerIteration();
536
537 private:
538 const Collator *coll;
539 const CA_uchar *source;
540 const CA_uchar *target;
541 UBool useLen;
542 };
543
CppCompare_2(const Collator * coll,const CA_uchar * source,const CA_uchar * target,UBool useLen)544 CppCompare_2::CppCompare_2(const Collator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen)
545 : coll(coll),
546 source(source),
547 target(target),
548 useLen(useLen)
549 {
550 }
551
~CppCompare_2()552 CppCompare_2::~CppCompare_2()
553 {
554 }
555
call(UErrorCode * status)556 void CppCompare_2::call(UErrorCode* status) {
557 if (U_FAILURE(*status)) return;
558
559 // call strcoll for two strings at the same index
560 if (source->count < target->count) {
561 *status = U_ILLEGAL_ARGUMENT_ERROR;
562 } else {
563 for (int32_t i = 0; i < source->count; i++) {
564 int32_t srcLen = useLen ? source->lengthOf(i) : -1;
565 int32_t tgtLen = useLen ? target->lengthOf(i) : -1;
566 coll->compare(source->dataOf(i), srcLen, target->dataOf(i), tgtLen);
567 }
568 }
569 }
570
getOperationsPerIteration()571 long CppCompare_2::getOperationsPerIteration()
572 {
573 return source->count;
574 }
575
576
577 //
578 // Test case taking a single test data array, calling Collator::compareUTF8 by permuting the test data
579 //
580 class CppCompareUTF8 : public UPerfFunction
581 {
582 public:
583 CppCompareUTF8(const Collator* coll, const CA_char* source, UBool useLen);
584 ~CppCompareUTF8();
585 virtual void call(UErrorCode* status);
586 virtual long getOperationsPerIteration();
587
588 private:
589 const Collator *coll;
590 const CA_char *source;
591 UBool useLen;
592 int32_t maxTestStrings;
593 };
594
CppCompareUTF8(const Collator * coll,const CA_char * source,UBool useLen)595 CppCompareUTF8::CppCompareUTF8(const Collator* coll, const CA_char* source, UBool useLen)
596 : coll(coll),
597 source(source),
598 useLen(useLen)
599 {
600 maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count;
601 }
602
~CppCompareUTF8()603 CppCompareUTF8::~CppCompareUTF8()
604 {
605 }
606
call(UErrorCode * status)607 void CppCompareUTF8::call(UErrorCode* status) {
608 if (U_FAILURE(*status)) return;
609
610 // call compareUTF8 for all permutations
611 int32_t divisor = source->count / maxTestStrings;
612 StringPiece src, tgt;
613 int32_t cmp = 0;
614 for (int32_t i = 0, numTestStringsI = 0; U_SUCCESS(*status) && i < source->count && numTestStringsI < maxTestStrings; i++) {
615 if (i % divisor) continue;
616 numTestStringsI++;
617
618 if (useLen) {
619 src.set(source->dataOf(i), source->lengthOf(i));
620 } else {
621 src.set(source->dataOf(i));
622 }
623 for (int32_t j = 0, numTestStringsJ = 0; U_SUCCESS(*status) && j < source->count && numTestStringsJ < maxTestStrings; j++) {
624 if (j % divisor) continue;
625 numTestStringsJ++;
626
627 if (useLen) {
628 tgt.set(source->dataOf(i), source->lengthOf(i));
629 } else {
630 tgt.set(source->dataOf(i));
631 }
632 cmp += coll->compareUTF8(src, tgt, *status);
633 }
634 }
635 // At the end, cmp must be 0
636 if (cmp != 0) {
637 *status = U_INTERNAL_PROGRAM_ERROR;
638 }
639 }
640
getOperationsPerIteration()641 long CppCompareUTF8::getOperationsPerIteration()
642 {
643 return maxTestStrings * maxTestStrings;
644 }
645
646
647 //
648 // Test case taking two test data arrays, calling Collator::compareUTF8 for strings at a same index
649 //
650 class CppCompareUTF8_2 : public UPerfFunction
651 {
652 public:
653 CppCompareUTF8_2(const Collator* coll, const CA_char* source, const CA_char* target, UBool useLen);
654 ~CppCompareUTF8_2();
655 virtual void call(UErrorCode* status);
656 virtual long getOperationsPerIteration();
657
658 private:
659 const Collator *coll;
660 const CA_char *source;
661 const CA_char *target;
662 UBool useLen;
663 };
664
CppCompareUTF8_2(const Collator * coll,const CA_char * source,const CA_char * target,UBool useLen)665 CppCompareUTF8_2::CppCompareUTF8_2(const Collator* coll, const CA_char* source, const CA_char* target, UBool useLen)
666 : coll(coll),
667 source(source),
668 target(target),
669 useLen(useLen)
670 {
671 }
672
~CppCompareUTF8_2()673 CppCompareUTF8_2::~CppCompareUTF8_2()
674 {
675 }
676
call(UErrorCode * status)677 void CppCompareUTF8_2::call(UErrorCode* status) {
678 if (U_FAILURE(*status)) return;
679
680 // call strcoll for two strings at the same index
681 StringPiece src, tgt;
682 if (source->count < target->count) {
683 *status = U_ILLEGAL_ARGUMENT_ERROR;
684 } else {
685 for (int32_t i = 0; U_SUCCESS(*status) && i < source->count; i++) {
686 if (useLen) {
687 src.set(source->dataOf(i), source->lengthOf(i));
688 tgt.set(target->dataOf(i), target->lengthOf(i));
689 } else {
690 src.set(source->dataOf(i));
691 tgt.set(target->dataOf(i));
692 }
693 coll->compareUTF8(src, tgt, *status);
694 }
695 }
696 }
697
getOperationsPerIteration()698 long CppCompareUTF8_2::getOperationsPerIteration()
699 {
700 return source->count;
701 }
702
703
704 //
705 // Test case taking a single test data array, calling Collator::getCollationKey for each
706 //
707 class CppGetCollationKey : public UPerfFunction
708 {
709 public:
710 CppGetCollationKey(const Collator* coll, const CA_uchar* source, UBool useLen);
711 ~CppGetCollationKey();
712 virtual void call(UErrorCode* status);
713 virtual long getOperationsPerIteration();
714
715 private:
716 const Collator *coll;
717 const CA_uchar *source;
718 UBool useLen;
719 };
720
CppGetCollationKey(const Collator * coll,const CA_uchar * source,UBool useLen)721 CppGetCollationKey::CppGetCollationKey(const Collator* coll, const CA_uchar* source, UBool useLen)
722 : coll(coll),
723 source(source),
724 useLen(useLen)
725 {
726 }
727
~CppGetCollationKey()728 CppGetCollationKey::~CppGetCollationKey()
729 {
730 }
731
call(UErrorCode * status)732 void CppGetCollationKey::call(UErrorCode* status)
733 {
734 if (U_FAILURE(*status)) return;
735
736 CollationKey key;
737 for (int32_t i = 0; U_SUCCESS(*status) && i < source->count; i++) {
738 coll->getCollationKey(source->dataOf(i), source->lengthOf(i), key, *status);
739 }
740 }
741
getOperationsPerIteration()742 long CppGetCollationKey::getOperationsPerIteration() {
743 return source->count;
744 }
745
746 namespace {
747
748 struct CollatorAndCounter {
CollatorAndCounter__anon519b62b60111::CollatorAndCounter749 CollatorAndCounter(const Collator& coll) : coll(coll), ucoll(NULL), counter(0) {}
CollatorAndCounter__anon519b62b60111::CollatorAndCounter750 CollatorAndCounter(const Collator& coll, const UCollator *ucoll)
751 : coll(coll), ucoll(ucoll), counter(0) {}
752 const Collator& coll;
753 const UCollator *ucoll;
754 int32_t counter;
755 };
756
757 int32_t U_CALLCONV
UniStrCollatorComparator(const void * context,const void * left,const void * right)758 UniStrCollatorComparator(const void* context, const void* left, const void* right) {
759 CollatorAndCounter& cc = *(CollatorAndCounter*)context;
760 const UnicodeString& leftString = **(const UnicodeString**)left;
761 const UnicodeString& rightString = **(const UnicodeString**)right;
762 UErrorCode errorCode = U_ZERO_ERROR;
763 ++cc.counter;
764 return cc.coll.compare(leftString, rightString, errorCode);
765 }
766
767 } // namespace
768
769 class CollPerfFunction : public UPerfFunction {
770 public:
CollPerfFunction(const Collator & coll,const UCollator * ucoll)771 CollPerfFunction(const Collator& coll, const UCollator *ucoll)
772 : coll(coll), ucoll(ucoll), ops(0) {}
773 virtual ~CollPerfFunction();
774 /** Calls call() to set the ops field, and returns that. */
775 virtual long getOperationsPerIteration();
776
777 protected:
778 const Collator& coll;
779 const UCollator *ucoll;
780 int32_t ops;
781 };
782
~CollPerfFunction()783 CollPerfFunction::~CollPerfFunction() {}
784
getOperationsPerIteration()785 long CollPerfFunction::getOperationsPerIteration() {
786 UErrorCode errorCode = U_ZERO_ERROR;
787 call(&errorCode);
788 return U_SUCCESS(errorCode) ? ops : 0;
789 }
790
791 class UniStrCollPerfFunction : public CollPerfFunction {
792 public:
UniStrCollPerfFunction(const Collator & coll,const UCollator * ucoll,const CA_uchar * data16)793 UniStrCollPerfFunction(const Collator& coll, const UCollator *ucoll, const CA_uchar* data16)
794 : CollPerfFunction(coll, ucoll), d16(data16),
795 source(new UnicodeString*[d16->count]) {
796 for (int32_t i = 0; i < d16->count; ++i) {
797 source[i] = new UnicodeString(TRUE, d16->dataOf(i), d16->lengthOf(i));
798 }
799 }
800 virtual ~UniStrCollPerfFunction();
801
802 protected:
803 const CA_uchar* d16;
804 UnicodeString** source;
805 };
806
~UniStrCollPerfFunction()807 UniStrCollPerfFunction::~UniStrCollPerfFunction() {
808 for (int32_t i = 0; i < d16->count; ++i) {
809 delete source[i];
810 }
811 delete[] source;
812 }
813
814 //
815 // Test case sorting an array of UnicodeString pointers.
816 //
817 class UniStrSort : public UniStrCollPerfFunction {
818 public:
UniStrSort(const Collator & coll,const UCollator * ucoll,const CA_uchar * data16)819 UniStrSort(const Collator& coll, const UCollator *ucoll, const CA_uchar* data16)
820 : UniStrCollPerfFunction(coll, ucoll, data16),
821 dest(new UnicodeString*[d16->count]) {}
822 virtual ~UniStrSort();
823 virtual void call(UErrorCode* status);
824
825 private:
826 UnicodeString** dest; // aliases only
827 };
828
~UniStrSort()829 UniStrSort::~UniStrSort() {
830 delete[] dest;
831 }
832
call(UErrorCode * status)833 void UniStrSort::call(UErrorCode* status) {
834 if (U_FAILURE(*status)) return;
835
836 CollatorAndCounter cc(coll);
837 int32_t count = d16->count;
838 memcpy(dest, source, count * sizeof(UnicodeString *));
839 uprv_sortArray(dest, count, (int32_t)sizeof(UnicodeString *),
840 UniStrCollatorComparator, &cc, TRUE, status);
841 ops = cc.counter;
842 }
843
844 namespace {
845
846 int32_t U_CALLCONV
StringPieceCollatorComparator(const void * context,const void * left,const void * right)847 StringPieceCollatorComparator(const void* context, const void* left, const void* right) {
848 CollatorAndCounter& cc = *(CollatorAndCounter*)context;
849 const StringPiece& leftString = *(const StringPiece*)left;
850 const StringPiece& rightString = *(const StringPiece*)right;
851 UErrorCode errorCode = U_ZERO_ERROR;
852 ++cc.counter;
853 return cc.coll.compareUTF8(leftString, rightString, errorCode);
854 }
855
856 int32_t U_CALLCONV
StringPieceUCollatorComparator(const void * context,const void * left,const void * right)857 StringPieceUCollatorComparator(const void* context, const void* left, const void* right) {
858 CollatorAndCounter& cc = *(CollatorAndCounter*)context;
859 const StringPiece& leftString = *(const StringPiece*)left;
860 const StringPiece& rightString = *(const StringPiece*)right;
861 UErrorCode errorCode = U_ZERO_ERROR;
862 ++cc.counter;
863 return ucol_strcollUTF8(cc.ucoll,
864 leftString.data(), leftString.length(),
865 rightString.data(), rightString.length(), &errorCode);
866 }
867
868 } // namespace
869
870 class StringPieceCollPerfFunction : public CollPerfFunction {
871 public:
StringPieceCollPerfFunction(const Collator & coll,const UCollator * ucoll,const CA_char * data8)872 StringPieceCollPerfFunction(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
873 : CollPerfFunction(coll, ucoll), d8(data8),
874 source(new StringPiece[d8->count]) {
875 for (int32_t i = 0; i < d8->count; ++i) {
876 source[i].set(d8->dataOf(i), d8->lengthOf(i));
877 }
878 }
879 virtual ~StringPieceCollPerfFunction();
880
881 protected:
882 const CA_char* d8;
883 StringPiece* source;
884 };
885
~StringPieceCollPerfFunction()886 StringPieceCollPerfFunction::~StringPieceCollPerfFunction() {
887 delete[] source;
888 }
889
890 class StringPieceSort : public StringPieceCollPerfFunction {
891 public:
StringPieceSort(const Collator & coll,const UCollator * ucoll,const CA_char * data8)892 StringPieceSort(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
893 : StringPieceCollPerfFunction(coll, ucoll, data8),
894 dest(new StringPiece[d8->count]) {}
895 virtual ~StringPieceSort();
896
897 protected:
898 StringPiece* dest;
899 };
900
~StringPieceSort()901 StringPieceSort::~StringPieceSort() {
902 delete[] dest;
903 }
904
905 //
906 // Test case sorting an array of UTF-8 StringPiece's with Collator::compareUTF8().
907 //
908 class StringPieceSortCpp : public StringPieceSort {
909 public:
StringPieceSortCpp(const Collator & coll,const UCollator * ucoll,const CA_char * data8)910 StringPieceSortCpp(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
911 : StringPieceSort(coll, ucoll, data8) {}
912 virtual ~StringPieceSortCpp();
913 virtual void call(UErrorCode* status);
914 };
915
~StringPieceSortCpp()916 StringPieceSortCpp::~StringPieceSortCpp() {}
917
call(UErrorCode * status)918 void StringPieceSortCpp::call(UErrorCode* status) {
919 if (U_FAILURE(*status)) return;
920
921 CollatorAndCounter cc(coll);
922 int32_t count = d8->count;
923 memcpy(dest, source, count * sizeof(StringPiece));
924 uprv_sortArray(dest, count, (int32_t)sizeof(StringPiece),
925 StringPieceCollatorComparator, &cc, TRUE, status);
926 ops = cc.counter;
927 }
928
929 //
930 // Test case sorting an array of UTF-8 StringPiece's with ucol_strcollUTF8().
931 //
932 class StringPieceSortC : public StringPieceSort {
933 public:
StringPieceSortC(const Collator & coll,const UCollator * ucoll,const CA_char * data8)934 StringPieceSortC(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
935 : StringPieceSort(coll, ucoll, data8) {}
936 virtual ~StringPieceSortC();
937 virtual void call(UErrorCode* status);
938 };
939
~StringPieceSortC()940 StringPieceSortC::~StringPieceSortC() {}
941
call(UErrorCode * status)942 void StringPieceSortC::call(UErrorCode* status) {
943 if (U_FAILURE(*status)) return;
944
945 CollatorAndCounter cc(coll, ucoll);
946 int32_t count = d8->count;
947 memcpy(dest, source, count * sizeof(StringPiece));
948 uprv_sortArray(dest, count, (int32_t)sizeof(StringPiece),
949 StringPieceUCollatorComparator, &cc, TRUE, status);
950 ops = cc.counter;
951 }
952
953 //
954 // Test case performing binary searches in a sorted array of UnicodeString pointers.
955 //
956 class UniStrBinSearch : public UniStrCollPerfFunction {
957 public:
UniStrBinSearch(const Collator & coll,const UCollator * ucoll,const CA_uchar * data16)958 UniStrBinSearch(const Collator& coll, const UCollator *ucoll, const CA_uchar* data16)
959 : UniStrCollPerfFunction(coll, ucoll, data16) {}
960 virtual ~UniStrBinSearch();
961 virtual void call(UErrorCode* status);
962 };
963
~UniStrBinSearch()964 UniStrBinSearch::~UniStrBinSearch() {}
965
call(UErrorCode * status)966 void UniStrBinSearch::call(UErrorCode* status) {
967 if (U_FAILURE(*status)) return;
968
969 CollatorAndCounter cc(coll);
970 int32_t count = d16->count;
971 for (int32_t i = 0; i < count; ++i) {
972 (void)uprv_stableBinarySearch((char *)source, count,
973 source + i, (int32_t)sizeof(UnicodeString *),
974 UniStrCollatorComparator, &cc);
975 }
976 ops = cc.counter;
977 }
978
979 class StringPieceBinSearch : public StringPieceCollPerfFunction {
980 public:
StringPieceBinSearch(const Collator & coll,const UCollator * ucoll,const CA_char * data8)981 StringPieceBinSearch(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
982 : StringPieceCollPerfFunction(coll, ucoll, data8) {}
983 virtual ~StringPieceBinSearch();
984 };
985
~StringPieceBinSearch()986 StringPieceBinSearch::~StringPieceBinSearch() {}
987
988 //
989 // Test case performing binary searches in a sorted array of UTF-8 StringPiece's
990 // with Collator::compareUTF8().
991 //
992 class StringPieceBinSearchCpp : public StringPieceBinSearch {
993 public:
StringPieceBinSearchCpp(const Collator & coll,const UCollator * ucoll,const CA_char * data8)994 StringPieceBinSearchCpp(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
995 : StringPieceBinSearch(coll, ucoll, data8) {}
996 virtual ~StringPieceBinSearchCpp();
997 virtual void call(UErrorCode* status);
998 };
999
~StringPieceBinSearchCpp()1000 StringPieceBinSearchCpp::~StringPieceBinSearchCpp() {}
1001
call(UErrorCode * status)1002 void StringPieceBinSearchCpp::call(UErrorCode* status) {
1003 if (U_FAILURE(*status)) return;
1004
1005 CollatorAndCounter cc(coll);
1006 int32_t count = d8->count;
1007 for (int32_t i = 0; i < count; ++i) {
1008 (void)uprv_stableBinarySearch((char *)source, count,
1009 source + i, (int32_t)sizeof(StringPiece),
1010 StringPieceCollatorComparator, &cc);
1011 }
1012 ops = cc.counter;
1013 }
1014
1015 //
1016 // Test case performing binary searches in a sorted array of UTF-8 StringPiece's
1017 // with ucol_strcollUTF8().
1018 //
1019 class StringPieceBinSearchC : public StringPieceBinSearch {
1020 public:
StringPieceBinSearchC(const Collator & coll,const UCollator * ucoll,const CA_char * data8)1021 StringPieceBinSearchC(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
1022 : StringPieceBinSearch(coll, ucoll, data8) {}
1023 virtual ~StringPieceBinSearchC();
1024 virtual void call(UErrorCode* status);
1025 };
1026
~StringPieceBinSearchC()1027 StringPieceBinSearchC::~StringPieceBinSearchC() {}
1028
call(UErrorCode * status)1029 void StringPieceBinSearchC::call(UErrorCode* status) {
1030 if (U_FAILURE(*status)) return;
1031
1032 CollatorAndCounter cc(coll, ucoll);
1033 int32_t count = d8->count;
1034 for (int32_t i = 0; i < count; ++i) {
1035 (void)uprv_stableBinarySearch((char *)source, count,
1036 source + i, (int32_t)sizeof(StringPiece),
1037 StringPieceUCollatorComparator, &cc);
1038 }
1039 ops = cc.counter;
1040 }
1041
1042
1043 class CollPerf2Test : public UPerfTest
1044 {
1045 public:
1046 CollPerf2Test(int32_t argc, const char *argv[], UErrorCode &status);
1047 ~CollPerf2Test();
1048 virtual UPerfFunction* runIndexedTest(
1049 int32_t index, UBool exec, const char *&name, char *par = NULL);
1050
1051 private:
1052 UCollator* coll;
1053 Collator* collObj;
1054
1055 int32_t count;
1056 CA_uchar* data16;
1057 CA_char* data8;
1058
1059 CA_uchar* modData16;
1060 CA_char* modData8;
1061
1062 CA_uchar* sortedData16;
1063 CA_char* sortedData8;
1064
1065 CA_uchar* randomData16;
1066 CA_char* randomData8;
1067
1068 const CA_uchar* getData16(UErrorCode &status);
1069 const CA_char* getData8(UErrorCode &status);
1070
1071 const CA_uchar* getModData16(UErrorCode &status);
1072 const CA_char* getModData8(UErrorCode &status);
1073
1074 const CA_uchar* getSortedData16(UErrorCode &status);
1075 const CA_char* getSortedData8(UErrorCode &status);
1076
1077 const CA_uchar* getRandomData16(UErrorCode &status);
1078 const CA_char* getRandomData8(UErrorCode &status);
1079
1080 static CA_uchar* sortData16(
1081 const CA_uchar* d16,
1082 UComparator *cmp, const void *context,
1083 UErrorCode &status);
1084 static CA_char* getData8FromData16(const CA_uchar* d16, UErrorCode &status);
1085
1086 UPerfFunction* TestStrcoll();
1087 UPerfFunction* TestStrcollNull();
1088 UPerfFunction* TestStrcollSimilar();
1089
1090 UPerfFunction* TestStrcollUTF8();
1091 UPerfFunction* TestStrcollUTF8Null();
1092 UPerfFunction* TestStrcollUTF8Similar();
1093
1094 UPerfFunction* TestGetSortKey();
1095 UPerfFunction* TestGetSortKeyNull();
1096
1097 UPerfFunction* TestNextSortKeyPart_4All();
1098 UPerfFunction* TestNextSortKeyPart_4x2();
1099 UPerfFunction* TestNextSortKeyPart_4x4();
1100 UPerfFunction* TestNextSortKeyPart_4x8();
1101 UPerfFunction* TestNextSortKeyPart_32All();
1102 UPerfFunction* TestNextSortKeyPart_32x2();
1103
1104 UPerfFunction* TestNextSortKeyPartUTF8_4All();
1105 UPerfFunction* TestNextSortKeyPartUTF8_4x2();
1106 UPerfFunction* TestNextSortKeyPartUTF8_4x4();
1107 UPerfFunction* TestNextSortKeyPartUTF8_4x8();
1108 UPerfFunction* TestNextSortKeyPartUTF8_32All();
1109 UPerfFunction* TestNextSortKeyPartUTF8_32x2();
1110
1111 UPerfFunction* TestCppCompare();
1112 UPerfFunction* TestCppCompareNull();
1113 UPerfFunction* TestCppCompareSimilar();
1114
1115 UPerfFunction* TestCppCompareUTF8();
1116 UPerfFunction* TestCppCompareUTF8Null();
1117 UPerfFunction* TestCppCompareUTF8Similar();
1118
1119 UPerfFunction* TestCppGetCollationKey();
1120 UPerfFunction* TestCppGetCollationKeyNull();
1121
1122 UPerfFunction* TestUniStrSort();
1123 UPerfFunction* TestStringPieceSortCpp();
1124 UPerfFunction* TestStringPieceSortC();
1125
1126 UPerfFunction* TestUniStrBinSearch();
1127 UPerfFunction* TestStringPieceBinSearchCpp();
1128 UPerfFunction* TestStringPieceBinSearchC();
1129 };
1130
CollPerf2Test(int32_t argc,const char * argv[],UErrorCode & status)1131 CollPerf2Test::CollPerf2Test(int32_t argc, const char *argv[], UErrorCode &status) :
1132 UPerfTest(argc, argv, status),
1133 coll(NULL),
1134 collObj(NULL),
1135 count(0),
1136 data16(NULL),
1137 data8(NULL),
1138 modData16(NULL),
1139 modData8(NULL),
1140 sortedData16(NULL),
1141 sortedData8(NULL),
1142 randomData16(NULL),
1143 randomData8(NULL)
1144 {
1145 if (U_FAILURE(status)) {
1146 return;
1147 }
1148
1149 if (locale == NULL){
1150 locale = "root";
1151 }
1152
1153 // Set up an ICU collator.
1154 // Starting with ICU 54 (ticket #8260), this supports standard collation locale keywords.
1155 coll = ucol_open(locale, &status);
1156 collObj = Collator::createInstance(locale, status);
1157 }
1158
~CollPerf2Test()1159 CollPerf2Test::~CollPerf2Test()
1160 {
1161 ucol_close(coll);
1162 delete collObj;
1163
1164 delete data16;
1165 delete data8;
1166 delete modData16;
1167 delete modData8;
1168 delete sortedData16;
1169 delete sortedData8;
1170 delete randomData16;
1171 delete randomData8;
1172 }
1173
1174 #define MAX_NUM_DATA 10000
1175
getData16(UErrorCode & status)1176 const CA_uchar* CollPerf2Test::getData16(UErrorCode &status)
1177 {
1178 if (U_FAILURE(status)) return NULL;
1179 if (data16) return data16;
1180
1181 CA_uchar* d16 = new CA_uchar();
1182 const UChar *line = NULL;
1183 int32_t len = 0;
1184 int32_t numData = 0;
1185
1186 for (;;) {
1187 line = ucbuf_readline(ucharBuf, &len, &status);
1188 if (line == NULL || U_FAILURE(status)) break;
1189
1190 // Refer to the source code of ucbuf_readline()
1191 // 1. 'len' includes the line terminal symbols
1192 // 2. The length of the line terminal symbols is only one character
1193 // 3. The Windows CR LF line terminal symbols will be converted to CR
1194
1195 if (len == 1 || line[0] == 0x23 /* '#' */) {
1196 continue; // skip empty/comment line
1197 } else {
1198 d16->append_one(len);
1199 UChar *p = d16->last();
1200 u_memcpy(p, line, len - 1); // exclude the CR
1201 p[len - 1] = 0; // NUL-terminate
1202
1203 numData++;
1204 if (numData >= MAX_NUM_DATA) break;
1205 }
1206 }
1207
1208 if (U_SUCCESS(status)) {
1209 data16 = d16;
1210 } else {
1211 delete d16;
1212 }
1213
1214 return data16;
1215 }
1216
getData8(UErrorCode & status)1217 const CA_char* CollPerf2Test::getData8(UErrorCode &status)
1218 {
1219 if (U_FAILURE(status)) return NULL;
1220 if (data8) return data8;
1221 return data8 = getData8FromData16(getData16(status), status);
1222 }
1223
getModData16(UErrorCode & status)1224 const CA_uchar* CollPerf2Test::getModData16(UErrorCode &status)
1225 {
1226 if (U_FAILURE(status)) return NULL;
1227 if (modData16) return modData16;
1228
1229 const CA_uchar* d16 = getData16(status);
1230 if (U_FAILURE(status)) return NULL;
1231
1232 CA_uchar* modData16 = new CA_uchar();
1233
1234 for (int32_t i = 0; i < d16->count; i++) {
1235 const UChar *s = d16->dataOf(i);
1236 int32_t len = d16->lengthOf(i) + 1; // including NULL terminator
1237
1238 modData16->append_one(len);
1239 u_memcpy(modData16->last(), s, len);
1240
1241 // replacing the last character with a different character
1242 UChar *lastChar = &modData16->last()[len -2];
1243 for (int32_t j = i + 1; j != i; j++) {
1244 if (j >= d16->count) {
1245 j = 0;
1246 }
1247 const UChar *s1 = d16->dataOf(j);
1248 UChar lastChar1 = s1[d16->lengthOf(j) - 1];
1249 if (*lastChar != lastChar1) {
1250 *lastChar = lastChar1;
1251 break;
1252 }
1253 }
1254 }
1255
1256 return modData16;
1257 }
1258
getModData8(UErrorCode & status)1259 const CA_char* CollPerf2Test::getModData8(UErrorCode &status)
1260 {
1261 if (U_FAILURE(status)) return NULL;
1262 if (modData8) return modData8;
1263 return modData8 = getData8FromData16(getModData16(status), status);
1264 }
1265
1266 namespace {
1267
1268 struct ArrayAndColl {
ArrayAndColl__anon519b62b60311::ArrayAndColl1269 ArrayAndColl(const CA_uchar* a, const Collator& c) : d16(a), coll(c) {}
1270 const CA_uchar* d16;
1271 const Collator& coll;
1272 };
1273
1274 int32_t U_CALLCONV
U16CollatorComparator(const void * context,const void * left,const void * right)1275 U16CollatorComparator(const void* context, const void* left, const void* right) {
1276 const ArrayAndColl& ac = *(const ArrayAndColl*)context;
1277 const CA_uchar* d16 = ac.d16;
1278 int32_t leftIndex = *(const int32_t*)left;
1279 int32_t rightIndex = *(const int32_t*)right;
1280 UErrorCode errorCode = U_ZERO_ERROR;
1281 return ac.coll.compare(d16->dataOf(leftIndex), d16->lengthOf(leftIndex),
1282 d16->dataOf(rightIndex), d16->lengthOf(rightIndex),
1283 errorCode);
1284 }
1285
1286 int32_t U_CALLCONV
U16HashComparator(const void * context,const void * left,const void * right)1287 U16HashComparator(const void* context, const void* left, const void* right) {
1288 const CA_uchar* d16 = (const CA_uchar*)context;
1289 int32_t leftIndex = *(const int32_t*)left;
1290 int32_t rightIndex = *(const int32_t*)right;
1291 int32_t leftHash = ustr_hashUCharsN(d16->dataOf(leftIndex), d16->lengthOf(leftIndex));
1292 int32_t rightHash = ustr_hashUCharsN(d16->dataOf(rightIndex), d16->lengthOf(rightIndex));
1293 return leftHash < rightHash ? -1 : leftHash == rightHash ? 0 : 1;
1294 }
1295
1296 } // namespace
1297
getSortedData16(UErrorCode & status)1298 const CA_uchar* CollPerf2Test::getSortedData16(UErrorCode &status) {
1299 if (U_FAILURE(status)) return NULL;
1300 if (sortedData16) return sortedData16;
1301
1302 ArrayAndColl ac(getData16(status), *collObj);
1303 return sortedData16 = sortData16(ac.d16, U16CollatorComparator, &ac, status);
1304 }
1305
getSortedData8(UErrorCode & status)1306 const CA_char* CollPerf2Test::getSortedData8(UErrorCode &status) {
1307 if (U_FAILURE(status)) return NULL;
1308 if (sortedData8) return sortedData8;
1309 return sortedData8 = getData8FromData16(getSortedData16(status), status);
1310 }
1311
getRandomData16(UErrorCode & status)1312 const CA_uchar* CollPerf2Test::getRandomData16(UErrorCode &status) {
1313 if (U_FAILURE(status)) return NULL;
1314 if (randomData16) return randomData16;
1315
1316 // Sort the strings by their hash codes, which should be a reasonably pseudo-random order.
1317 const CA_uchar* d16 = getData16(status);
1318 return randomData16 = sortData16(d16, U16HashComparator, d16, status);
1319 }
1320
getRandomData8(UErrorCode & status)1321 const CA_char* CollPerf2Test::getRandomData8(UErrorCode &status) {
1322 if (U_FAILURE(status)) return NULL;
1323 if (randomData8) return randomData8;
1324 return randomData8 = getData8FromData16(getRandomData16(status), status);
1325 }
1326
sortData16(const CA_uchar * d16,UComparator * cmp,const void * context,UErrorCode & status)1327 CA_uchar* CollPerf2Test::sortData16(const CA_uchar* d16,
1328 UComparator *cmp, const void *context,
1329 UErrorCode &status) {
1330 if (U_FAILURE(status)) return NULL;
1331
1332 LocalArray<int32_t> indexes(new int32_t[d16->count]);
1333 for (int32_t i = 0; i < d16->count; ++i) {
1334 indexes[i] = i;
1335 }
1336 uprv_sortArray(indexes.getAlias(), d16->count, 4, cmp, context, TRUE, &status);
1337 if (U_FAILURE(status)) return NULL;
1338
1339 // Copy the strings in sorted order into a new array.
1340 LocalPointer<CA_uchar> newD16(new CA_uchar());
1341 for (int32_t i = 0; i < d16->count; i++) {
1342 int32_t j = indexes[i];
1343 const UChar* s = d16->dataOf(j);
1344 int32_t len = d16->lengthOf(j);
1345 int32_t capacity = len + 1; // including NULL terminator
1346 newD16->append_one(capacity);
1347 u_memcpy(newD16->last(), s, capacity);
1348 }
1349
1350 if (U_SUCCESS(status)) {
1351 return newD16.orphan();
1352 } else {
1353 return NULL;
1354 }
1355 }
1356
getData8FromData16(const CA_uchar * d16,UErrorCode & status)1357 CA_char* CollPerf2Test::getData8FromData16(const CA_uchar* d16, UErrorCode &status) {
1358 if (U_FAILURE(status)) return NULL;
1359
1360 // UTF-16 -> UTF-8 conversion
1361 LocalPointer<CA_char> d8(new CA_char());
1362 for (int32_t i = 0; i < d16->count; i++) {
1363 const UChar *s16 = d16->dataOf(i);
1364 int32_t length16 = d16->lengthOf(i);
1365
1366 // get length in UTF-8
1367 int32_t length8;
1368 u_strToUTF8(NULL, 0, &length8, s16, length16, &status);
1369 if (status == U_BUFFER_OVERFLOW_ERROR || status == U_ZERO_ERROR){
1370 status = U_ZERO_ERROR;
1371 } else {
1372 break;
1373 }
1374 int32_t capacity8 = length8 + 1; // plus terminal NULL
1375 d8->append_one(capacity8);
1376
1377 // convert to UTF-8
1378 u_strToUTF8(d8->last(), capacity8, NULL, s16, length16, &status);
1379 if (U_FAILURE(status)) break;
1380 }
1381
1382 if (U_SUCCESS(status)) {
1383 return d8.orphan();
1384 } else {
1385 return NULL;
1386 }
1387 }
1388
1389 UPerfFunction*
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)1390 CollPerf2Test::runIndexedTest(int32_t index, UBool exec, const char *&name, char *par /*= NULL*/)
1391 {
1392 (void)par;
1393 TESTCASE_AUTO_BEGIN;
1394
1395 TESTCASE_AUTO(TestStrcoll);
1396 TESTCASE_AUTO(TestStrcollNull);
1397 TESTCASE_AUTO(TestStrcollSimilar);
1398
1399 TESTCASE_AUTO(TestStrcollUTF8);
1400 TESTCASE_AUTO(TestStrcollUTF8Null);
1401 TESTCASE_AUTO(TestStrcollUTF8Similar);
1402
1403 TESTCASE_AUTO(TestGetSortKey);
1404 TESTCASE_AUTO(TestGetSortKeyNull);
1405
1406 TESTCASE_AUTO(TestNextSortKeyPart_4All);
1407 TESTCASE_AUTO(TestNextSortKeyPart_4x4);
1408 TESTCASE_AUTO(TestNextSortKeyPart_4x8);
1409 TESTCASE_AUTO(TestNextSortKeyPart_32All);
1410 TESTCASE_AUTO(TestNextSortKeyPart_32x2);
1411
1412 TESTCASE_AUTO(TestNextSortKeyPartUTF8_4All);
1413 TESTCASE_AUTO(TestNextSortKeyPartUTF8_4x4);
1414 TESTCASE_AUTO(TestNextSortKeyPartUTF8_4x8);
1415 TESTCASE_AUTO(TestNextSortKeyPartUTF8_32All);
1416 TESTCASE_AUTO(TestNextSortKeyPartUTF8_32x2);
1417
1418 TESTCASE_AUTO(TestCppCompare);
1419 TESTCASE_AUTO(TestCppCompareNull);
1420 TESTCASE_AUTO(TestCppCompareSimilar);
1421
1422 TESTCASE_AUTO(TestCppCompareUTF8);
1423 TESTCASE_AUTO(TestCppCompareUTF8Null);
1424 TESTCASE_AUTO(TestCppCompareUTF8Similar);
1425
1426 TESTCASE_AUTO(TestCppGetCollationKey);
1427 TESTCASE_AUTO(TestCppGetCollationKeyNull);
1428
1429 TESTCASE_AUTO(TestUniStrSort);
1430 TESTCASE_AUTO(TestStringPieceSortCpp);
1431 TESTCASE_AUTO(TestStringPieceSortC);
1432
1433 TESTCASE_AUTO(TestUniStrBinSearch);
1434 TESTCASE_AUTO(TestStringPieceBinSearchCpp);
1435 TESTCASE_AUTO(TestStringPieceBinSearchC);
1436
1437 TESTCASE_AUTO_END;
1438 return NULL;
1439 }
1440
1441
1442
TestStrcoll()1443 UPerfFunction* CollPerf2Test::TestStrcoll()
1444 {
1445 UErrorCode status = U_ZERO_ERROR;
1446 Strcoll *testCase = new Strcoll(coll, getData16(status), TRUE /* useLen */);
1447 if (U_FAILURE(status)) {
1448 delete testCase;
1449 return NULL;
1450 }
1451 return testCase;
1452 }
1453
TestStrcollNull()1454 UPerfFunction* CollPerf2Test::TestStrcollNull()
1455 {
1456 UErrorCode status = U_ZERO_ERROR;
1457 Strcoll *testCase = new Strcoll(coll, getData16(status), FALSE /* useLen */);
1458 if (U_FAILURE(status)) {
1459 delete testCase;
1460 return NULL;
1461 }
1462 return testCase;
1463 }
1464
TestStrcollSimilar()1465 UPerfFunction* CollPerf2Test::TestStrcollSimilar()
1466 {
1467 UErrorCode status = U_ZERO_ERROR;
1468 Strcoll_2 *testCase = new Strcoll_2(coll, getData16(status), getModData16(status), TRUE /* useLen */);
1469 if (U_FAILURE(status)) {
1470 delete testCase;
1471 return NULL;
1472 }
1473 return testCase;
1474 }
1475
TestStrcollUTF8()1476 UPerfFunction* CollPerf2Test::TestStrcollUTF8()
1477 {
1478 UErrorCode status = U_ZERO_ERROR;
1479 StrcollUTF8 *testCase = new StrcollUTF8(coll, getData8(status), TRUE /* useLen */);
1480 if (U_FAILURE(status)) {
1481 delete testCase;
1482 return NULL;
1483 }
1484 return testCase;
1485 }
1486
TestStrcollUTF8Null()1487 UPerfFunction* CollPerf2Test::TestStrcollUTF8Null()
1488 {
1489 UErrorCode status = U_ZERO_ERROR;
1490 StrcollUTF8 *testCase = new StrcollUTF8(coll, getData8(status),FALSE /* useLen */);
1491 if (U_FAILURE(status)) {
1492 delete testCase;
1493 return NULL;
1494 }
1495 return testCase;
1496 }
1497
TestStrcollUTF8Similar()1498 UPerfFunction* CollPerf2Test::TestStrcollUTF8Similar()
1499 {
1500 UErrorCode status = U_ZERO_ERROR;
1501 StrcollUTF8_2 *testCase = new StrcollUTF8_2(coll, getData8(status), getModData8(status), TRUE /* useLen */);
1502 if (U_FAILURE(status)) {
1503 delete testCase;
1504 return NULL;
1505 }
1506 return testCase;
1507 }
1508
TestGetSortKey()1509 UPerfFunction* CollPerf2Test::TestGetSortKey()
1510 {
1511 UErrorCode status = U_ZERO_ERROR;
1512 GetSortKey *testCase = new GetSortKey(coll, getData16(status), TRUE /* useLen */);
1513 if (U_FAILURE(status)) {
1514 delete testCase;
1515 return NULL;
1516 }
1517 return testCase;
1518 }
1519
TestGetSortKeyNull()1520 UPerfFunction* CollPerf2Test::TestGetSortKeyNull()
1521 {
1522 UErrorCode status = U_ZERO_ERROR;
1523 GetSortKey *testCase = new GetSortKey(coll, getData16(status), FALSE /* useLen */);
1524 if (U_FAILURE(status)) {
1525 delete testCase;
1526 return NULL;
1527 }
1528 return testCase;
1529 }
1530
TestNextSortKeyPart_4All()1531 UPerfFunction* CollPerf2Test::TestNextSortKeyPart_4All()
1532 {
1533 UErrorCode status = U_ZERO_ERROR;
1534 NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 4 /* bufSize */);
1535 if (U_FAILURE(status)) {
1536 delete testCase;
1537 return NULL;
1538 }
1539 return testCase;
1540 }
1541
TestNextSortKeyPart_4x4()1542 UPerfFunction* CollPerf2Test::TestNextSortKeyPart_4x4()
1543 {
1544 UErrorCode status = U_ZERO_ERROR;
1545 NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 4 /* bufSize */, 4 /* maxIteration */);
1546 if (U_FAILURE(status)) {
1547 delete testCase;
1548 return NULL;
1549 }
1550 return testCase;
1551 }
1552
TestNextSortKeyPart_4x8()1553 UPerfFunction* CollPerf2Test::TestNextSortKeyPart_4x8()
1554 {
1555 UErrorCode status = U_ZERO_ERROR;
1556 NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 4 /* bufSize */, 8 /* maxIteration */);
1557 if (U_FAILURE(status)) {
1558 delete testCase;
1559 return NULL;
1560 }
1561 return testCase;
1562 }
1563
TestNextSortKeyPart_32All()1564 UPerfFunction* CollPerf2Test::TestNextSortKeyPart_32All()
1565 {
1566 UErrorCode status = U_ZERO_ERROR;
1567 NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 32 /* bufSize */);
1568 if (U_FAILURE(status)) {
1569 delete testCase;
1570 return NULL;
1571 }
1572 return testCase;
1573 }
1574
TestNextSortKeyPart_32x2()1575 UPerfFunction* CollPerf2Test::TestNextSortKeyPart_32x2()
1576 {
1577 UErrorCode status = U_ZERO_ERROR;
1578 NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 32 /* bufSize */, 2 /* maxIteration */);
1579 if (U_FAILURE(status)) {
1580 delete testCase;
1581 return NULL;
1582 }
1583 return testCase;
1584 }
1585
TestNextSortKeyPartUTF8_4All()1586 UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_4All()
1587 {
1588 UErrorCode status = U_ZERO_ERROR;
1589 NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 4 /* bufSize */);
1590 if (U_FAILURE(status)) {
1591 delete testCase;
1592 return NULL;
1593 }
1594 return testCase;
1595 }
1596
TestNextSortKeyPartUTF8_4x4()1597 UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_4x4()
1598 {
1599 UErrorCode status = U_ZERO_ERROR;
1600 NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 4 /* bufSize */, 4 /* maxIteration */);
1601 if (U_FAILURE(status)) {
1602 delete testCase;
1603 return NULL;
1604 }
1605 return testCase;
1606 }
1607
TestNextSortKeyPartUTF8_4x8()1608 UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_4x8()
1609 {
1610 UErrorCode status = U_ZERO_ERROR;
1611 NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 4 /* bufSize */, 8 /* maxIteration */);
1612 if (U_FAILURE(status)) {
1613 delete testCase;
1614 return NULL;
1615 }
1616 return testCase;
1617 }
1618
TestNextSortKeyPartUTF8_32All()1619 UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_32All()
1620 {
1621 UErrorCode status = U_ZERO_ERROR;
1622 NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 32 /* bufSize */);
1623 if (U_FAILURE(status)) {
1624 delete testCase;
1625 return NULL;
1626 }
1627 return testCase;
1628 }
1629
TestNextSortKeyPartUTF8_32x2()1630 UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_32x2()
1631 {
1632 UErrorCode status = U_ZERO_ERROR;
1633 NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 32 /* bufSize */, 2 /* maxIteration */);
1634 if (U_FAILURE(status)) {
1635 delete testCase;
1636 return NULL;
1637 }
1638 return testCase;
1639 }
1640
TestCppCompare()1641 UPerfFunction* CollPerf2Test::TestCppCompare()
1642 {
1643 UErrorCode status = U_ZERO_ERROR;
1644 CppCompare *testCase = new CppCompare(collObj, getData16(status), TRUE /* useLen */);
1645 if (U_FAILURE(status)) {
1646 delete testCase;
1647 return NULL;
1648 }
1649 return testCase;
1650 }
1651
TestCppCompareNull()1652 UPerfFunction* CollPerf2Test::TestCppCompareNull()
1653 {
1654 UErrorCode status = U_ZERO_ERROR;
1655 CppCompare *testCase = new CppCompare(collObj, getData16(status), FALSE /* useLen */);
1656 if (U_FAILURE(status)) {
1657 delete testCase;
1658 return NULL;
1659 }
1660 return testCase;
1661 }
1662
TestCppCompareSimilar()1663 UPerfFunction* CollPerf2Test::TestCppCompareSimilar()
1664 {
1665 UErrorCode status = U_ZERO_ERROR;
1666 CppCompare_2 *testCase = new CppCompare_2(collObj, getData16(status), getModData16(status), TRUE /* useLen */);
1667 if (U_FAILURE(status)) {
1668 delete testCase;
1669 return NULL;
1670 }
1671 return testCase;
1672 }
1673
TestCppCompareUTF8()1674 UPerfFunction* CollPerf2Test::TestCppCompareUTF8()
1675 {
1676 UErrorCode status = U_ZERO_ERROR;
1677 CppCompareUTF8 *testCase = new CppCompareUTF8(collObj, getData8(status), TRUE /* useLen */);
1678 if (U_FAILURE(status)) {
1679 delete testCase;
1680 return NULL;
1681 }
1682 return testCase;
1683 }
1684
TestCppCompareUTF8Null()1685 UPerfFunction* CollPerf2Test::TestCppCompareUTF8Null()
1686 {
1687 UErrorCode status = U_ZERO_ERROR;
1688 CppCompareUTF8 *testCase = new CppCompareUTF8(collObj, getData8(status), FALSE /* useLen */);
1689 if (U_FAILURE(status)) {
1690 delete testCase;
1691 return NULL;
1692 }
1693 return testCase;
1694 }
1695
TestCppCompareUTF8Similar()1696 UPerfFunction* CollPerf2Test::TestCppCompareUTF8Similar()
1697 {
1698 UErrorCode status = U_ZERO_ERROR;
1699 CppCompareUTF8_2 *testCase = new CppCompareUTF8_2(collObj, getData8(status), getModData8(status), TRUE /* useLen */);
1700 if (U_FAILURE(status)) {
1701 delete testCase;
1702 return NULL;
1703 }
1704 return testCase;
1705 }
1706
TestCppGetCollationKey()1707 UPerfFunction* CollPerf2Test::TestCppGetCollationKey()
1708 {
1709 UErrorCode status = U_ZERO_ERROR;
1710 CppGetCollationKey *testCase = new CppGetCollationKey(collObj, getData16(status), TRUE /* useLen */);
1711 if (U_FAILURE(status)) {
1712 delete testCase;
1713 return NULL;
1714 }
1715 return testCase;
1716 }
1717
TestCppGetCollationKeyNull()1718 UPerfFunction* CollPerf2Test::TestCppGetCollationKeyNull()
1719 {
1720 UErrorCode status = U_ZERO_ERROR;
1721 CppGetCollationKey *testCase = new CppGetCollationKey(collObj, getData16(status), FALSE /* useLen */);
1722 if (U_FAILURE(status)) {
1723 delete testCase;
1724 return NULL;
1725 }
1726 return testCase;
1727 }
1728
TestUniStrSort()1729 UPerfFunction* CollPerf2Test::TestUniStrSort() {
1730 UErrorCode status = U_ZERO_ERROR;
1731 UPerfFunction *testCase = new UniStrSort(*collObj, coll, getRandomData16(status));
1732 if (U_FAILURE(status)) {
1733 delete testCase;
1734 return NULL;
1735 }
1736 return testCase;
1737 }
1738
TestStringPieceSortCpp()1739 UPerfFunction* CollPerf2Test::TestStringPieceSortCpp() {
1740 UErrorCode status = U_ZERO_ERROR;
1741 UPerfFunction *testCase = new StringPieceSortCpp(*collObj, coll, getRandomData8(status));
1742 if (U_FAILURE(status)) {
1743 delete testCase;
1744 return NULL;
1745 }
1746 return testCase;
1747 }
1748
TestStringPieceSortC()1749 UPerfFunction* CollPerf2Test::TestStringPieceSortC() {
1750 UErrorCode status = U_ZERO_ERROR;
1751 UPerfFunction *testCase = new StringPieceSortC(*collObj, coll, getRandomData8(status));
1752 if (U_FAILURE(status)) {
1753 delete testCase;
1754 return NULL;
1755 }
1756 return testCase;
1757 }
1758
TestUniStrBinSearch()1759 UPerfFunction* CollPerf2Test::TestUniStrBinSearch() {
1760 UErrorCode status = U_ZERO_ERROR;
1761 UPerfFunction *testCase = new UniStrBinSearch(*collObj, coll, getSortedData16(status));
1762 if (U_FAILURE(status)) {
1763 delete testCase;
1764 return NULL;
1765 }
1766 return testCase;
1767 }
1768
TestStringPieceBinSearchCpp()1769 UPerfFunction* CollPerf2Test::TestStringPieceBinSearchCpp() {
1770 UErrorCode status = U_ZERO_ERROR;
1771 UPerfFunction *testCase = new StringPieceBinSearchCpp(*collObj, coll, getSortedData8(status));
1772 if (U_FAILURE(status)) {
1773 delete testCase;
1774 return NULL;
1775 }
1776 return testCase;
1777 }
1778
TestStringPieceBinSearchC()1779 UPerfFunction* CollPerf2Test::TestStringPieceBinSearchC() {
1780 UErrorCode status = U_ZERO_ERROR;
1781 UPerfFunction *testCase = new StringPieceBinSearchC(*collObj, coll, getSortedData8(status));
1782 if (U_FAILURE(status)) {
1783 delete testCase;
1784 return NULL;
1785 }
1786 return testCase;
1787 }
1788
1789
main(int argc,const char * argv[])1790 int main(int argc, const char *argv[])
1791 {
1792 UErrorCode status = U_ZERO_ERROR;
1793 CollPerf2Test test(argc, argv, status);
1794
1795 if (U_FAILURE(status)){
1796 printf("The error is %s\n", u_errorName(status));
1797 //TODO: print usage here
1798 return status;
1799 }
1800
1801 if (test.run() == FALSE){
1802 fprintf(stderr, "FAILED: Tests could not be run please check the arguments.\n");
1803 return -1;
1804 }
1805 return 0;
1806 }
1807