1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *****************************************************************************
5 * Copyright (C) 2001-2016, International Business Machines orporation
6 * and others. All Rights Reserved.
7 ****************************************************************************/
8
9 #include "unicode/utypes.h"
10
11 #if !UCONFIG_NO_COLLATION
12
13 #include "srchtest.h"
14 #if !UCONFIG_NO_BREAK_ITERATION
15 #include "../cintltst/usrchdat.c"
16 #endif
17 #include "unicode/stsearch.h"
18 #include "unicode/ustring.h"
19 #include "unicode/schriter.h"
20 #include "cmemory.h"
21 #include <string.h>
22 #include <stdio.h>
23
24 // private definitions -----------------------------------------------------
25
26 #define CASE(id,test) \
27 case id: \
28 name = #test; \
29 if (exec) { \
30 logln(#test "---"); \
31 logln((UnicodeString)""); \
32 if(areBroken) { \
33 dataerrln(__FILE__ " cannot test - failed to create collator."); \
34 } else { \
35 test(); \
36 } \
37 } \
38 break;
39
40 // public constructors and destructors --------------------------------------
41
StringSearchTest()42 StringSearchTest::StringSearchTest()
43 #if !UCONFIG_NO_BREAK_ITERATION
44 :
45 m_en_wordbreaker_(NULL), m_en_characterbreaker_(NULL)
46 #endif
47 {
48 #if !UCONFIG_NO_BREAK_ITERATION
49 UErrorCode status = U_ZERO_ERROR;
50
51 m_en_us_ = (RuleBasedCollator *)Collator::createInstance("en_US", status);
52 m_fr_fr_ = (RuleBasedCollator *)Collator::createInstance("fr_FR", status);
53 m_de_ = (RuleBasedCollator *)Collator::createInstance("de_DE", status);
54 m_es_ = (RuleBasedCollator *)Collator::createInstance("es_ES", status);
55 if(U_FAILURE(status)) {
56 delete m_en_us_;
57 delete m_fr_fr_;
58 delete m_de_;
59 delete m_es_;
60 m_en_us_ = 0;
61 m_fr_fr_ = 0;
62 m_de_ = 0;
63 m_es_ = 0;
64 errln("Collator creation failed with %s", u_errorName(status));
65 return;
66 }
67
68
69 UnicodeString rules;
70 rules.setTo(((RuleBasedCollator *)m_de_)->getRules());
71 UChar extrarules[128];
72 u_unescape(EXTRACOLLATIONRULE, extrarules, 128);
73 rules.append(extrarules, u_strlen(extrarules));
74 delete m_de_;
75
76 m_de_ = new RuleBasedCollator(rules, status);
77
78 rules.setTo(((RuleBasedCollator *)m_es_)->getRules());
79 rules.append(extrarules, u_strlen(extrarules));
80
81 delete m_es_;
82
83 m_es_ = new RuleBasedCollator(rules, status);
84
85 #if !UCONFIG_NO_BREAK_ITERATION
86 m_en_wordbreaker_ = BreakIterator::createWordInstance(
87 Locale::getEnglish(), status);
88 m_en_characterbreaker_ = BreakIterator::createCharacterInstance(
89 Locale::getEnglish(), status);
90 #endif
91 #endif
92 }
93
~StringSearchTest()94 StringSearchTest::~StringSearchTest()
95 {
96 #if !UCONFIG_NO_BREAK_ITERATION
97 delete m_en_us_;
98 delete m_fr_fr_;
99 delete m_de_;
100 delete m_es_;
101 #if !UCONFIG_NO_BREAK_ITERATION
102 delete m_en_wordbreaker_;
103 delete m_en_characterbreaker_;
104 #endif
105 #endif
106 }
107
108 // public methods ----------------------------------------------------------
109
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)110 void StringSearchTest::runIndexedTest(int32_t index, UBool exec,
111 const char* &name, char* )
112 {
113 #if !UCONFIG_NO_BREAK_ITERATION
114 UBool areBroken = false;
115 if (m_en_us_ == NULL && m_fr_fr_ == NULL && m_de_ == NULL &&
116 m_es_ == NULL && m_en_wordbreaker_ == NULL &&
117 m_en_characterbreaker_ == NULL && exec) {
118 areBroken = true;
119 }
120
121 switch (index) {
122 #if !UCONFIG_NO_FILE_IO
123 CASE(0, TestOpenClose)
124 #endif
125 CASE(1, TestInitialization)
126 CASE(2, TestBasic)
127 CASE(3, TestNormExact)
128 CASE(4, TestStrength)
129 #if UCONFIG_NO_BREAK_ITERATION
130 case 5:
131 name = "TestBreakIterator";
132 break;
133 #else
134 CASE(5, TestBreakIterator)
135 #endif
136 CASE(6, TestVariable)
137 CASE(7, TestOverlap)
138 CASE(8, TestCollator)
139 CASE(9, TestPattern)
140 CASE(10, TestText)
141 CASE(11, TestCompositeBoundaries)
142 CASE(12, TestGetSetOffset)
143 CASE(13, TestGetSetAttribute)
144 CASE(14, TestGetMatch)
145 CASE(15, TestSetMatch)
146 CASE(16, TestReset)
147 CASE(17, TestSupplementary)
148 CASE(18, TestContraction)
149 CASE(19, TestIgnorable)
150 CASE(20, TestCanonical)
151 CASE(21, TestNormCanonical)
152 CASE(22, TestStrengthCanonical)
153 #if UCONFIG_NO_BREAK_ITERATION
154 case 23:
155 name = "TestBreakIteratorCanonical";
156 break;
157 #else
158 CASE(23, TestBreakIteratorCanonical)
159 #endif
160 CASE(24, TestVariableCanonical)
161 CASE(25, TestOverlapCanonical)
162 CASE(26, TestCollatorCanonical)
163 CASE(27, TestPatternCanonical)
164 CASE(28, TestTextCanonical)
165 CASE(29, TestCompositeBoundariesCanonical)
166 CASE(30, TestGetSetOffsetCanonical)
167 CASE(31, TestSupplementaryCanonical)
168 CASE(32, TestContractionCanonical)
169 CASE(33, TestUClassID)
170 CASE(34, TestSubclass)
171 CASE(35, TestCoverage)
172 CASE(36, TestDiacriticMatch)
173 default: name = ""; break;
174 }
175 #else
176 name="";
177 #endif
178 }
179
180 #if !UCONFIG_NO_BREAK_ITERATION
181 // private methods ------------------------------------------------------
182
getCollator(const char * collator)183 RuleBasedCollator * StringSearchTest::getCollator(const char *collator)
184 {
185 if (collator == NULL) {
186 return m_en_us_;
187 }
188 if (strcmp(collator, "fr") == 0) {
189 return m_fr_fr_;
190 }
191 else if (strcmp(collator, "de") == 0) {
192 return m_de_;
193 }
194 else if (strcmp(collator, "es") == 0) {
195 return m_es_;
196 }
197 else {
198 return m_en_us_;
199 }
200 }
201
getBreakIterator(const char * breaker)202 BreakIterator * StringSearchTest::getBreakIterator(const char *breaker)
203 {
204 #if UCONFIG_NO_BREAK_ITERATION
205 return NULL;
206 #else
207 if (breaker == NULL) {
208 return NULL;
209 }
210 if (strcmp(breaker, "wordbreaker") == 0) {
211 return m_en_wordbreaker_;
212 }
213 else {
214 return m_en_characterbreaker_;
215 }
216 #endif
217 }
218
toCharString(const UnicodeString & text)219 char * StringSearchTest::toCharString(const UnicodeString &text)
220 {
221 static char result[1024];
222 int index = 0;
223 int count = 0;
224 int length = text.length();
225
226 for (; count < length; count ++) {
227 UChar ch = text[count];
228 if (ch >= 0x20 && ch <= 0x7e) {
229 result[index ++] = (char)ch;
230 }
231 else {
232 sprintf(result+index, "\\u%04x", ch);
233 index += 6; /* \uxxxx */
234 }
235 }
236 result[index] = 0;
237
238 return result;
239 }
240
getECollationStrength(const UCollationStrength & strength) const241 Collator::ECollationStrength StringSearchTest::getECollationStrength(
242 const UCollationStrength &strength) const
243 {
244 switch (strength)
245 {
246 case UCOL_PRIMARY :
247 return Collator::PRIMARY;
248 case UCOL_SECONDARY :
249 return Collator::SECONDARY;
250 case UCOL_TERTIARY :
251 return Collator::TERTIARY;
252 default :
253 return Collator::IDENTICAL;
254 }
255 }
256
assertEqualWithStringSearch(StringSearch * strsrch,const SearchData * search)257 UBool StringSearchTest::assertEqualWithStringSearch(StringSearch *strsrch,
258 const SearchData *search)
259 {
260 int32_t count = 0;
261 UErrorCode status = U_ZERO_ERROR;
262 int32_t matchindex = search->offset[count];
263 UnicodeString matchtext;
264 int32_t matchlength;
265
266 strsrch->setAttribute(USEARCH_ELEMENT_COMPARISON, search->elemCompare, status);
267 if (U_FAILURE(status)) {
268 errln("Error setting USEARCH_ELEMENT_COMPARISON attribute %s", u_errorName(status));
269 return false;
270 }
271
272 if (strsrch->getMatchedStart() != USEARCH_DONE ||
273 strsrch->getMatchedLength() != 0) {
274 errln("Error with the initialization of match start and length");
275 }
276
277 // start of next matches
278 while (U_SUCCESS(status) && matchindex >= 0) {
279 matchlength = search->size[count];
280 strsrch->next(status);
281 if (matchindex != strsrch->getMatchedStart() ||
282 matchlength != strsrch->getMatchedLength()) {
283 char *str = toCharString(strsrch->getText());
284 errln("Text: %s", str);
285 str = toCharString(strsrch->getPattern());
286 errln("Pattern: %s", str);
287 errln("Error next match found at %d (len:%d); expected %d (len:%d)",
288 strsrch->getMatchedStart(), strsrch->getMatchedLength(),
289 matchindex, matchlength);
290 return false;
291 }
292 count ++;
293
294 strsrch->getMatchedText(matchtext);
295
296 if (U_FAILURE(status) ||
297 strsrch->getText().compareBetween(matchindex,
298 matchindex + matchlength,
299 matchtext, 0,
300 matchtext.length())) {
301 errln("Error getting next matched text");
302 }
303
304 matchindex = search->offset[count];
305 }
306 strsrch->next(status);
307 if (strsrch->getMatchedStart() != USEARCH_DONE ||
308 strsrch->getMatchedLength() != 0) {
309 char *str = toCharString(strsrch->getText());
310 errln("Text: %s", str);
311 str = toCharString(strsrch->getPattern());
312 errln("Pattern: %s", str);
313 errln("Error next match found at %d (len:%d); expected <NO MATCH>",
314 strsrch->getMatchedStart(), strsrch->getMatchedLength());
315 return false;
316 }
317
318 // start of previous matches
319 count = count == 0 ? 0 : count - 1;
320 matchindex = search->offset[count];
321 while (U_SUCCESS(status) && matchindex >= 0) {
322 matchlength = search->size[count];
323 strsrch->previous(status);
324 if (matchindex != strsrch->getMatchedStart() ||
325 matchlength != strsrch->getMatchedLength()) {
326 char *str = toCharString(strsrch->getText());
327 errln("Text: %s", str);
328 str = toCharString(strsrch->getPattern());
329 errln("Pattern: %s", str);
330 errln("Error previous match found at %d (len:%d); expected %d (len:%d)",
331 strsrch->getMatchedStart(), strsrch->getMatchedLength(),
332 matchindex, matchlength);
333 return false;
334 }
335
336 strsrch->getMatchedText(matchtext);
337
338 if (U_FAILURE(status) ||
339 strsrch->getText().compareBetween(matchindex,
340 matchindex + matchlength,
341 matchtext, 0,
342 matchtext.length())) {
343 errln("Error getting previous matched text");
344 }
345
346 matchindex = count > 0 ? search->offset[count - 1] : -1;
347 count --;
348 }
349 strsrch->previous(status);
350 if (strsrch->getMatchedStart() != USEARCH_DONE ||
351 strsrch->getMatchedLength() != 0) {
352 char *str = toCharString(strsrch->getText());
353 errln("Text: %s", str);
354 str = toCharString(strsrch->getPattern());
355 errln("Pattern: %s", str);
356 errln("Error previous match found at %d (len:%d); expected <NO MATCH>",
357 strsrch->getMatchedStart(), strsrch->getMatchedLength());
358 return false;
359 }
360
361 int32_t nextStart;
362 UBool isOverlap = (strsrch->getAttribute(USEARCH_OVERLAP) == USEARCH_ON);
363
364 // start of following matches
365 count = 0;
366 matchindex = search->offset[count];
367 nextStart = 0;
368
369 while (true) {
370 strsrch->following(nextStart, status);
371
372 if (matchindex < 0) {
373 if (strsrch->getMatchedStart() != USEARCH_DONE ||
374 strsrch->getMatchedLength() != 0) {
375 char *str = toCharString(strsrch->getText());
376 errln("Text: %s", str);
377 str = toCharString(strsrch->getPattern());
378 errln("Pattern: %s", str);
379 errln("Error following match starting at %d (overlap:%d) found at %d (len:%d); expected <NO MATCH>",
380 nextStart, isOverlap,
381 strsrch->getMatchedStart(), strsrch->getMatchedLength());
382 return false;
383 }
384 // no more matches
385 break;
386 }
387
388 matchlength = search->size[count];
389 if (strsrch->getMatchedStart() != matchindex
390 || strsrch->getMatchedLength() != matchlength
391 || U_FAILURE(status)) {
392 char *str = toCharString(strsrch->getText());
393 errln("Text: %s\n", str);
394 str = toCharString(strsrch->getPattern());
395 errln("Pattern: %s\n", str);
396 errln("Error following match starting at %d (overlap: %d) found at %d (len:%d); expected %d (len:%d)\n",
397 nextStart, isOverlap,
398 strsrch->getMatchedStart(), strsrch->getMatchedLength(),
399 matchindex, matchlength);
400 return false;
401 }
402
403 if (isOverlap || strsrch->getMatchedLength() == 0) {
404 nextStart = strsrch->getMatchedStart() + 1;
405 } else {
406 nextStart = strsrch->getMatchedStart() + strsrch->getMatchedLength();
407 }
408
409 count++;
410 matchindex = search->offset[count];
411 }
412
413 // start preceding matches
414 count = -1; // last non-negative offset index, could be -1 if no match
415 while (search->offset[count + 1] >= 0) {
416 count++;
417 }
418 nextStart = strsrch->getText().length();
419
420 while (true) {
421 strsrch->preceding(nextStart, status);
422
423 if (count < 0) {
424 if (strsrch->getMatchedStart() != USEARCH_DONE || strsrch->getMatchedLength() != 0) {
425 char *str = toCharString(strsrch->getText());
426 errln("Text: %s\n", str);
427 str = toCharString(strsrch->getPattern());
428 errln("Pattern: %s\n", str);
429 errln("Error preceding match starting at %d (overlap: %d) found at %d (len:%d); expected <NO MATCH>\n",
430 nextStart, isOverlap,
431 strsrch->getMatchedStart(),
432 strsrch->getMatchedLength());
433 return false;
434 }
435 // no more matches
436 break;
437 }
438
439 matchindex = search->offset[count];
440 matchlength = search->size[count];
441 if (strsrch->getMatchedStart() != matchindex
442 || strsrch->getMatchedLength() != matchlength
443 || U_FAILURE(status)) {
444 char *str = toCharString(strsrch->getText());
445 errln("Text: %s\n", str);
446 str = toCharString(strsrch->getPattern());
447 errln("Pattern: %s\n", str);
448 errln("Error preceding match starting at %d (overlap: %d) found at %d (len:%d); expected %d (len:%d)\n",
449 nextStart, isOverlap,
450 strsrch->getMatchedStart(), strsrch->getMatchedLength(),
451 matchindex, matchlength);
452 return false;
453 }
454
455 nextStart = matchindex;
456 count--;
457 }
458
459 strsrch->setAttribute(USEARCH_ELEMENT_COMPARISON, USEARCH_STANDARD_ELEMENT_COMPARISON, status);
460 return true;
461 }
462
assertEqual(const SearchData * search)463 UBool StringSearchTest::assertEqual(const SearchData *search)
464 {
465 UErrorCode status = U_ZERO_ERROR;
466
467 Collator *collator = getCollator(search->collator);
468 BreakIterator *breaker = getBreakIterator(search->breaker);
469 StringSearch *strsrch, *strsrch2;
470 UChar temp[128];
471
472 #if UCONFIG_NO_BREAK_ITERATION
473 if(search->breaker) {
474 return true; /* skip test */
475 }
476 #endif
477 u_unescape(search->text, temp, 128);
478 UnicodeString text;
479 text.setTo(temp);
480 u_unescape(search->pattern, temp, 128);
481 UnicodeString pattern;
482 pattern.setTo(temp);
483
484 #if !UCONFIG_NO_BREAK_ITERATION
485 if (breaker != NULL) {
486 breaker->setText(text);
487 }
488 #endif
489 collator->setStrength(getECollationStrength(search->strength));
490 strsrch = new StringSearch(pattern, text, (RuleBasedCollator *)collator,
491 breaker, status);
492 if (U_FAILURE(status)) {
493 errln("Error opening string search %s", u_errorName(status));
494 return false;
495 }
496
497 if (!assertEqualWithStringSearch(strsrch, search)) {
498 collator->setStrength(getECollationStrength(UCOL_TERTIARY));
499 delete strsrch;
500 return false;
501 }
502
503
504 strsrch2 = strsrch->clone();
505 if( strsrch2 == strsrch || *strsrch2 != *strsrch ||
506 !assertEqualWithStringSearch(strsrch2, search)
507 ) {
508 infoln("failure with StringSearch.clone()");
509 collator->setStrength(getECollationStrength(UCOL_TERTIARY));
510 delete strsrch;
511 delete strsrch2;
512 return false;
513 }
514 delete strsrch2;
515
516 collator->setStrength(getECollationStrength(UCOL_TERTIARY));
517 delete strsrch;
518 return true;
519 }
520
assertCanonicalEqual(const SearchData * search)521 UBool StringSearchTest::assertCanonicalEqual(const SearchData *search)
522 {
523 UErrorCode status = U_ZERO_ERROR;
524 Collator *collator = getCollator(search->collator);
525 BreakIterator *breaker = getBreakIterator(search->breaker);
526 StringSearch *strsrch;
527 UChar temp[128];
528 UBool result = true;
529
530 #if UCONFIG_NO_BREAK_ITERATION
531 if(search->breaker) {
532 return true; /* skip test */
533 }
534 #endif
535
536 u_unescape(search->text, temp, 128);
537 UnicodeString text;
538 text.setTo(temp);
539 u_unescape(search->pattern, temp, 128);
540 UnicodeString pattern;
541 pattern.setTo(temp);
542
543 #if !UCONFIG_NO_BREAK_ITERATION
544 if (breaker != NULL) {
545 breaker->setText(text);
546 }
547 #endif
548 collator->setStrength(getECollationStrength(search->strength));
549 collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
550 strsrch = new StringSearch(pattern, text, (RuleBasedCollator *)collator,
551 breaker, status);
552 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
553 if (U_FAILURE(status)) {
554 errln("Error opening string search %s", u_errorName(status));
555 result = false;
556 goto bail;
557 }
558
559 if (!assertEqualWithStringSearch(strsrch, search)) {
560 result = false;
561 goto bail;
562 }
563
564 bail:
565 collator->setStrength(getECollationStrength(UCOL_TERTIARY));
566 collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
567 delete strsrch;
568
569 return result;
570 }
571
assertEqualWithAttribute(const SearchData * search,USearchAttributeValue canonical,USearchAttributeValue overlap)572 UBool StringSearchTest::assertEqualWithAttribute(const SearchData *search,
573 USearchAttributeValue canonical,
574 USearchAttributeValue overlap)
575 {
576 UErrorCode status = U_ZERO_ERROR;
577 Collator *collator = getCollator(search->collator);
578 BreakIterator *breaker = getBreakIterator(search->breaker);
579 StringSearch *strsrch;
580 UChar temp[128];
581
582
583 #if UCONFIG_NO_BREAK_ITERATION
584 if(search->breaker) {
585 return true; /* skip test */
586 }
587 #endif
588
589 u_unescape(search->text, temp, 128);
590 UnicodeString text;
591 text.setTo(temp);
592 u_unescape(search->pattern, temp, 128);
593 UnicodeString pattern;
594 pattern.setTo(temp);
595
596 #if !UCONFIG_NO_BREAK_ITERATION
597 if (breaker != NULL) {
598 breaker->setText(text);
599 }
600 #endif
601 collator->setStrength(getECollationStrength(search->strength));
602 strsrch = new StringSearch(pattern, text, (RuleBasedCollator *)collator,
603 breaker, status);
604 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, canonical, status);
605 strsrch->setAttribute(USEARCH_OVERLAP, overlap, status);
606
607 if (U_FAILURE(status)) {
608 errln("Error opening string search %s", u_errorName(status));
609 return false;
610 }
611
612 if (!assertEqualWithStringSearch(strsrch, search)) {
613 collator->setStrength(getECollationStrength(UCOL_TERTIARY));
614 delete strsrch;
615 return false;
616 }
617 collator->setStrength(getECollationStrength(UCOL_TERTIARY));
618 delete strsrch;
619 return true;
620 }
621
TestOpenClose()622 void StringSearchTest::TestOpenClose()
623 {
624 UErrorCode status = U_ZERO_ERROR;
625 StringSearch *result;
626 BreakIterator *breakiter = m_en_wordbreaker_;
627 UnicodeString pattern;
628 UnicodeString text;
629 UnicodeString temp("a");
630 StringCharacterIterator chariter(text);
631
632 /* testing null arguments */
633 result = new StringSearch(pattern, text, NULL, NULL, status);
634 if (U_SUCCESS(status)) {
635 errln("Error: NULL arguments should produce an error");
636 }
637 delete result;
638
639 chariter.setText(text);
640 status = U_ZERO_ERROR;
641 result = new StringSearch(pattern, chariter, NULL, NULL, status);
642 if (U_SUCCESS(status)) {
643 errln("Error: NULL arguments should produce an error");
644 }
645 delete result;
646
647 // No-op: text.append(0, 0x1); -- what was intended here?
648 status = U_ZERO_ERROR;
649 result = new StringSearch(pattern, text, NULL, NULL, status);
650 if (U_SUCCESS(status)) {
651 errln("Error: Empty pattern should produce an error");
652 }
653 delete result;
654
655 chariter.setText(text);
656 status = U_ZERO_ERROR;
657 result = new StringSearch(pattern, chariter, NULL, NULL, status);
658 if (U_SUCCESS(status)) {
659 errln("Error: Empty pattern should produce an error");
660 }
661 delete result;
662
663 text.remove();
664 pattern.append(temp);
665 status = U_ZERO_ERROR;
666 result = new StringSearch(pattern, text, NULL, NULL, status);
667 if (U_SUCCESS(status)) {
668 errln("Error: Empty text should produce an error");
669 }
670 delete result;
671
672 chariter.setText(text);
673 status = U_ZERO_ERROR;
674 result = new StringSearch(pattern, chariter, NULL, NULL, status);
675 if (U_SUCCESS(status)) {
676 errln("Error: Empty text should produce an error");
677 }
678 delete result;
679
680 text.append(temp);
681 status = U_ZERO_ERROR;
682 result = new StringSearch(pattern, text, NULL, NULL, status);
683 if (U_SUCCESS(status)) {
684 errln("Error: NULL arguments should produce an error");
685 }
686 delete result;
687
688 chariter.setText(text);
689 status = U_ZERO_ERROR;
690 result = new StringSearch(pattern, chariter, NULL, NULL, status);
691 if (U_SUCCESS(status)) {
692 errln("Error: NULL arguments should produce an error");
693 }
694 delete result;
695
696 status = U_ZERO_ERROR;
697 result = new StringSearch(pattern, text, m_en_us_, NULL, status);
698 if (U_FAILURE(status)) {
699 errln("Error: NULL break iterator is valid for opening search");
700 }
701 delete result;
702
703 status = U_ZERO_ERROR;
704 result = new StringSearch(pattern, chariter, m_en_us_, NULL, status);
705 if (U_FAILURE(status)) {
706 errln("Error: NULL break iterator is valid for opening search");
707 }
708 delete result;
709
710 status = U_ZERO_ERROR;
711 result = new StringSearch(pattern, text, Locale::getEnglish(), NULL, status);
712 if (U_FAILURE(status) || result == NULL) {
713 errln("Error: NULL break iterator is valid for opening search");
714 }
715 delete result;
716
717 status = U_ZERO_ERROR;
718 result = new StringSearch(pattern, chariter, Locale::getEnglish(), NULL, status);
719 if (U_FAILURE(status)) {
720 errln("Error: NULL break iterator is valid for opening search");
721 }
722 delete result;
723
724 status = U_ZERO_ERROR;
725 result = new StringSearch(pattern, text, m_en_us_, breakiter, status);
726 if (U_FAILURE(status)) {
727 errln("Error: Break iterator is valid for opening search");
728 }
729 delete result;
730
731 status = U_ZERO_ERROR;
732 result = new StringSearch(pattern, chariter, m_en_us_, NULL, status);
733 if (U_FAILURE(status)) {
734 errln("Error: Break iterator is valid for opening search");
735 }
736 delete result;
737 }
738
TestInitialization()739 void StringSearchTest::TestInitialization()
740 {
741 UErrorCode status = U_ZERO_ERROR;
742 UnicodeString pattern;
743 UnicodeString text;
744 UnicodeString temp("a");
745 StringSearch *result;
746 int count;
747
748 /* simple test on the pattern ce construction */
749 pattern.append(temp);
750 pattern.append(temp);
751 text.append(temp);
752 text.append(temp);
753 text.append(temp);
754 result = new StringSearch(pattern, text, m_en_us_, NULL, status);
755 if (U_FAILURE(status)) {
756 errln("Error opening search %s", u_errorName(status));
757 }
758 StringSearch *copy = new StringSearch(*result);
759 if (*(copy->getCollator()) != *(result->getCollator()) ||
760 copy->getBreakIterator() != result->getBreakIterator() ||
761 copy->getMatchedLength() != result->getMatchedLength() ||
762 copy->getMatchedStart() != result->getMatchedStart() ||
763 copy->getOffset() != result->getOffset() ||
764 copy->getPattern() != result->getPattern() ||
765 copy->getText() != result->getText() ||
766 *(copy) != *(result))
767 {
768 errln("Error copying StringSearch");
769 }
770 delete copy;
771
772 copy = result->safeClone();
773 if (*(copy->getCollator()) != *(result->getCollator()) ||
774 copy->getBreakIterator() != result->getBreakIterator() ||
775 copy->getMatchedLength() != result->getMatchedLength() ||
776 copy->getMatchedStart() != result->getMatchedStart() ||
777 copy->getOffset() != result->getOffset() ||
778 copy->getPattern() != result->getPattern() ||
779 copy->getText() != result->getText() ||
780 *(copy) != *(result)) {
781 errln("Error copying StringSearch");
782 }
783 delete result;
784
785 /* testing if an extremely large pattern will fail the initialization */
786 for (count = 0; count < 512; count ++) {
787 pattern.append(temp);
788 }
789 result = new StringSearch(pattern, text, m_en_us_, NULL, status);
790 if (*result != *result) {
791 errln("Error: string search object expected to match itself");
792 }
793 if (*result == *copy) {
794 errln("Error: string search objects are not expected to match");
795 }
796 *copy = *result;
797 if (*(copy->getCollator()) != *(result->getCollator()) ||
798 copy->getBreakIterator() != result->getBreakIterator() ||
799 copy->getMatchedLength() != result->getMatchedLength() ||
800 copy->getMatchedStart() != result->getMatchedStart() ||
801 copy->getOffset() != result->getOffset() ||
802 copy->getPattern() != result->getPattern() ||
803 copy->getText() != result->getText() ||
804 *(copy) != *(result)) {
805 errln("Error copying StringSearch");
806 }
807 if (U_FAILURE(status)) {
808 errln("Error opening search %s", u_errorName(status));
809 }
810 delete result;
811 delete copy;
812 }
813
TestBasic()814 void StringSearchTest::TestBasic()
815 {
816 int count = 0;
817 while (BASIC[count].text != NULL) {
818 //printf("count %d", count);
819 if (!assertEqual(&BASIC[count])) {
820 infoln("Error at test number %d", count);
821 }
822 count ++;
823 }
824 }
825
TestNormExact()826 void StringSearchTest::TestNormExact()
827 {
828 int count = 0;
829 UErrorCode status = U_ZERO_ERROR;
830 m_en_us_->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
831 if (U_FAILURE(status)) {
832 errln("Error setting collation normalization %s",
833 u_errorName(status));
834 }
835 while (BASIC[count].text != NULL) {
836 if (!assertEqual(&BASIC[count])) {
837 infoln("Error at test number %d", count);
838 }
839 count ++;
840 }
841 count = 0;
842 while (NORMEXACT[count].text != NULL) {
843 if (!assertEqual(&NORMEXACT[count])) {
844 infoln("Error at test number %d", count);
845 }
846 count ++;
847 }
848 m_en_us_->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
849 count = 0;
850 while (NONNORMEXACT[count].text != NULL) {
851 if (!assertEqual(&NONNORMEXACT[count])) {
852 infoln("Error at test number %d", count);
853 }
854 count ++;
855 }
856 }
857
TestStrength()858 void StringSearchTest::TestStrength()
859 {
860 int count = 0;
861 while (STRENGTH[count].text != NULL) {
862 if (!assertEqual(&STRENGTH[count])) {
863 infoln("Error at test number %d", count);
864 }
865 count ++;
866 }
867 }
868
869 #if !UCONFIG_NO_BREAK_ITERATION
870
TestBreakIterator()871 void StringSearchTest::TestBreakIterator()
872 {
873 UChar temp[128];
874 u_unescape(BREAKITERATOREXACT[0].text, temp, 128);
875 UnicodeString text;
876 text.setTo(temp, u_strlen(temp));
877 u_unescape(BREAKITERATOREXACT[0].pattern, temp, 128);
878 UnicodeString pattern;
879 pattern.setTo(temp, u_strlen(temp));
880
881 UErrorCode status = U_ZERO_ERROR;
882 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL,
883 status);
884 if (U_FAILURE(status)) {
885 errln("Error opening string search %s", u_errorName(status));
886 }
887
888 strsrch->setBreakIterator(NULL, status);
889 if (U_FAILURE(status) || strsrch->getBreakIterator() != NULL) {
890 errln("Error usearch_getBreakIterator returned wrong object");
891 }
892
893 strsrch->setBreakIterator(m_en_characterbreaker_, status);
894 if (U_FAILURE(status) ||
895 strsrch->getBreakIterator() != m_en_characterbreaker_) {
896 errln("Error usearch_getBreakIterator returned wrong object");
897 }
898
899 strsrch->setBreakIterator(m_en_wordbreaker_, status);
900 if (U_FAILURE(status) ||
901 strsrch->getBreakIterator() != m_en_wordbreaker_) {
902 errln("Error usearch_getBreakIterator returned wrong object");
903 }
904
905 delete strsrch;
906
907 int count = 0;
908 while (count < 4) {
909 // special purposes for tests numbers 0-3
910 const SearchData *search = &(BREAKITERATOREXACT[count]);
911 RuleBasedCollator *collator = getCollator(search->collator);
912 BreakIterator *breaker = getBreakIterator(search->breaker);
913 StringSearch *strsrch;
914
915 u_unescape(search->text, temp, 128);
916 text.setTo(temp, u_strlen(temp));
917 u_unescape(search->pattern, temp, 128);
918 pattern.setTo(temp, u_strlen(temp));
919 if (breaker != NULL) {
920 breaker->setText(text);
921 }
922 collator->setStrength(getECollationStrength(search->strength));
923
924 strsrch = new StringSearch(pattern, text, collator, breaker, status);
925 if (U_FAILURE(status) ||
926 strsrch->getBreakIterator() != breaker) {
927 errln("Error setting break iterator");
928 if (strsrch != NULL) {
929 delete strsrch;
930 }
931 }
932 if (!assertEqualWithStringSearch(strsrch, search)) {
933 collator->setStrength(getECollationStrength(UCOL_TERTIARY));
934 delete strsrch;
935 }
936 search = &(BREAKITERATOREXACT[count + 1]);
937 breaker = getBreakIterator(search->breaker);
938 if (breaker != NULL) {
939 breaker->setText(text);
940 }
941 strsrch->setBreakIterator(breaker, status);
942 if (U_FAILURE(status) ||
943 strsrch->getBreakIterator() != breaker) {
944 errln("Error setting break iterator");
945 delete strsrch;
946 }
947 strsrch->reset();
948 if (!assertEqualWithStringSearch(strsrch, search)) {
949 infoln("Error at test number %d", count);
950 }
951 delete strsrch;
952 count += 2;
953 }
954 count = 0;
955 while (BREAKITERATOREXACT[count].text != NULL) {
956 if (!assertEqual(&BREAKITERATOREXACT[count])) {
957 infoln("Error at test number %d", count);
958 }
959 count ++;
960 }
961 }
962
963 #endif
964
TestVariable()965 void StringSearchTest::TestVariable()
966 {
967 int count = 0;
968 UErrorCode status = U_ZERO_ERROR;
969 m_en_us_->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
970 if (U_FAILURE(status)) {
971 errln("Error setting collation alternate attribute %s",
972 u_errorName(status));
973 }
974 while (VARIABLE[count].text != NULL) {
975 logln("variable %d", count);
976 if (!assertEqual(&VARIABLE[count])) {
977 infoln("Error at test number %d", count);
978 }
979 count ++;
980 }
981 m_en_us_->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE,
982 status);
983 }
984
TestOverlap()985 void StringSearchTest::TestOverlap()
986 {
987 int count = 0;
988 while (OVERLAP[count].text != NULL) {
989 if (!assertEqualWithAttribute(&OVERLAP[count], USEARCH_OFF,
990 USEARCH_ON)) {
991 errln("Error at overlap test number %d", count);
992 }
993 count ++;
994 }
995 count = 0;
996 while (NONOVERLAP[count].text != NULL) {
997 if (!assertEqual(&NONOVERLAP[count])) {
998 errln("Error at non overlap test number %d", count);
999 }
1000 count ++;
1001 }
1002
1003 count = 0;
1004 while (count < 1) {
1005 const SearchData *search = &(OVERLAP[count]);
1006 UChar temp[128];
1007 u_unescape(search->text, temp, 128);
1008 UnicodeString text;
1009 text.setTo(temp, u_strlen(temp));
1010 u_unescape(search->pattern, temp, 128);
1011 UnicodeString pattern;
1012 pattern.setTo(temp, u_strlen(temp));
1013
1014 RuleBasedCollator *collator = getCollator(search->collator);
1015 UErrorCode status = U_ZERO_ERROR;
1016 StringSearch *strsrch = new StringSearch(pattern, text,
1017 collator, NULL,
1018 status);
1019
1020 strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_ON, status);
1021 if (U_FAILURE(status) ||
1022 strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_ON) {
1023 errln("Error setting overlap option");
1024 }
1025 if (!assertEqualWithStringSearch(strsrch, search)) {
1026 delete strsrch;
1027 return;
1028 }
1029
1030 search = &(NONOVERLAP[count]);
1031 strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_OFF, status);
1032 if (U_FAILURE(status) ||
1033 strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_OFF) {
1034 errln("Error setting overlap option");
1035 }
1036 strsrch->reset();
1037 if (!assertEqualWithStringSearch(strsrch, search)) {
1038 delete strsrch;
1039 errln("Error at test number %d", count);
1040 }
1041
1042 count ++;
1043 delete strsrch;
1044 }
1045 }
1046
TestCollator()1047 void StringSearchTest::TestCollator()
1048 {
1049 // test collator that thinks "o" and "p" are the same thing
1050 UChar temp[128];
1051 u_unescape(COLLATOR[0].text, temp, 128);
1052 UnicodeString text;
1053 text.setTo(temp, u_strlen(temp));
1054 u_unescape(COLLATOR[0].pattern, temp, 128);
1055 UnicodeString pattern;
1056 pattern.setTo(temp, u_strlen(temp));
1057
1058 UErrorCode status = U_ZERO_ERROR;
1059 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL,
1060 status);
1061 if (U_FAILURE(status)) {
1062 errln("Error opening string search %s", u_errorName(status));
1063 delete strsrch;
1064 return;
1065 }
1066 if (!assertEqualWithStringSearch(strsrch, &COLLATOR[0])) {
1067 delete strsrch;
1068 return;
1069 }
1070
1071 u_unescape(TESTCOLLATORRULE, temp, 128);
1072 UnicodeString rules;
1073 rules.setTo(temp, u_strlen(temp));
1074 RuleBasedCollator *tailored = new RuleBasedCollator(rules, status);
1075 tailored->setStrength(getECollationStrength(COLLATOR[1].strength));
1076
1077 if (U_FAILURE(status)) {
1078 errln("Error opening rule based collator %s", u_errorName(status));
1079 delete strsrch;
1080 delete tailored;
1081 return;
1082 }
1083
1084 strsrch->setCollator(tailored, status);
1085 if (U_FAILURE(status) || (*strsrch->getCollator()) != (*tailored)) {
1086 errln("Error setting rule based collator");
1087 delete strsrch;
1088 delete tailored;
1089 }
1090 strsrch->reset();
1091 if (!assertEqualWithStringSearch(strsrch, &COLLATOR[1])) {
1092 delete strsrch;
1093 delete tailored;
1094 return;
1095 }
1096
1097 strsrch->setCollator(m_en_us_, status);
1098 strsrch->reset();
1099 if (U_FAILURE(status) || (*strsrch->getCollator()) != (*m_en_us_)) {
1100 errln("Error setting rule based collator");
1101 delete strsrch;
1102 delete tailored;
1103 }
1104 if (!assertEqualWithStringSearch(strsrch, &COLLATOR[0])) {
1105 errln("Error searching collator test");
1106 }
1107 delete strsrch;
1108 delete tailored;
1109 }
1110
TestPattern()1111 void StringSearchTest::TestPattern()
1112 {
1113
1114 UChar temp[512];
1115 int templength;
1116 u_unescape(PATTERN[0].text, temp, 512);
1117 UnicodeString text;
1118 text.setTo(temp, u_strlen(temp));
1119 u_unescape(PATTERN[0].pattern, temp, 512);
1120 UnicodeString pattern;
1121 pattern.setTo(temp, u_strlen(temp));
1122
1123 m_en_us_->setStrength(getECollationStrength(PATTERN[0].strength));
1124 UErrorCode status = U_ZERO_ERROR;
1125 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL,
1126 status);
1127
1128 if (U_FAILURE(status)) {
1129 errln("Error opening string search %s", u_errorName(status));
1130 m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY));
1131 if (strsrch != NULL) {
1132 delete strsrch;
1133 }
1134 return;
1135 }
1136 if (strsrch->getPattern() != pattern) {
1137 errln("Error setting pattern");
1138 }
1139 if (!assertEqualWithStringSearch(strsrch, &PATTERN[0])) {
1140 m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY));
1141 if (strsrch != NULL) {
1142 delete strsrch;
1143 }
1144 return;
1145 }
1146
1147 u_unescape(PATTERN[1].pattern, temp, 512);
1148 pattern.setTo(temp, u_strlen(temp));
1149 strsrch->setPattern(pattern, status);
1150 if (pattern != strsrch->getPattern()) {
1151 errln("Error setting pattern");
1152 m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY));
1153 if (strsrch != NULL) {
1154 delete strsrch;
1155 }
1156 return;
1157 }
1158 strsrch->reset();
1159 if (U_FAILURE(status)) {
1160 errln("Error setting pattern %s", u_errorName(status));
1161 }
1162 if (!assertEqualWithStringSearch(strsrch, &PATTERN[1])) {
1163 m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY));
1164 if (strsrch != NULL) {
1165 delete strsrch;
1166 }
1167 return;
1168 }
1169
1170 u_unescape(PATTERN[0].pattern, temp, 512);
1171 pattern.setTo(temp, u_strlen(temp));
1172 strsrch->setPattern(pattern, status);
1173 if (pattern != strsrch->getPattern()) {
1174 errln("Error setting pattern");
1175 m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY));
1176 if (strsrch != NULL) {
1177 delete strsrch;
1178 }
1179 return;
1180 }
1181 strsrch->reset();
1182 if (U_FAILURE(status)) {
1183 errln("Error setting pattern %s", u_errorName(status));
1184 }
1185 if (!assertEqualWithStringSearch(strsrch, &PATTERN[0])) {
1186 m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY));
1187 if (strsrch != NULL) {
1188 delete strsrch;
1189 }
1190 return;
1191 }
1192 /* enormous pattern size to see if this crashes */
1193 for (templength = 0; templength != 512; templength ++) {
1194 temp[templength] = 0x61;
1195 }
1196 temp[511] = 0;
1197 pattern.setTo(temp, 511);
1198 strsrch->setPattern(pattern, status);
1199 if (U_FAILURE(status)) {
1200 errln("Error setting pattern with size 512, %s", u_errorName(status));
1201 }
1202 m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY));
1203 if (strsrch != NULL) {
1204 delete strsrch;
1205 }
1206 }
1207
TestText()1208 void StringSearchTest::TestText()
1209 {
1210 UChar temp[128];
1211 u_unescape(TEXT[0].text, temp, 128);
1212 UnicodeString text;
1213 text.setTo(temp, u_strlen(temp));
1214 u_unescape(TEXT[0].pattern, temp, 128);
1215 UnicodeString pattern;
1216 pattern.setTo(temp, u_strlen(temp));
1217
1218 UErrorCode status = U_ZERO_ERROR;
1219 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL,
1220 status);
1221 if (U_FAILURE(status)) {
1222 errln("Error opening string search %s", u_errorName(status));
1223 return;
1224 }
1225 if (text != strsrch->getText()) {
1226 errln("Error setting text");
1227 }
1228 if (!assertEqualWithStringSearch(strsrch, &TEXT[0])) {
1229 delete strsrch;
1230 return;
1231 }
1232
1233 u_unescape(TEXT[1].text, temp, 128);
1234 text.setTo(temp, u_strlen(temp));
1235 strsrch->setText(text, status);
1236 if (text != strsrch->getText()) {
1237 errln("Error setting text");
1238 delete strsrch;
1239 return;
1240 }
1241 if (U_FAILURE(status)) {
1242 errln("Error setting text %s", u_errorName(status));
1243 }
1244 if (!assertEqualWithStringSearch(strsrch, &TEXT[1])) {
1245 delete strsrch;
1246 return;
1247 }
1248
1249 u_unescape(TEXT[0].text, temp, 128);
1250 text.setTo(temp, u_strlen(temp));
1251 StringCharacterIterator chariter(text);
1252 strsrch->setText(chariter, status);
1253 if (text != strsrch->getText()) {
1254 errln("Error setting text");
1255 delete strsrch;
1256 return;
1257 }
1258 if (U_FAILURE(status)) {
1259 errln("Error setting pattern %s", u_errorName(status));
1260 }
1261 if (!assertEqualWithStringSearch(strsrch, &TEXT[0])) {
1262 errln("Error searching within set text");
1263 }
1264 delete strsrch;
1265 }
1266
TestCompositeBoundaries()1267 void StringSearchTest::TestCompositeBoundaries()
1268 {
1269 int count = 0;
1270 while (COMPOSITEBOUNDARIES[count].text != NULL) {
1271 logln("composite %d", count);
1272 if (!assertEqual(&COMPOSITEBOUNDARIES[count])) {
1273 errln("Error at test number %d", count);
1274 }
1275 count ++;
1276 }
1277 }
1278
TestGetSetOffset()1279 void StringSearchTest::TestGetSetOffset()
1280 {
1281 UErrorCode status = U_ZERO_ERROR;
1282 UnicodeString pattern("1234567890123456");
1283 UnicodeString text("12345678901234567890123456789012");
1284 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_,
1285 NULL, status);
1286 /* testing out of bounds error */
1287 strsrch->setOffset(-1, status);
1288 if (U_SUCCESS(status)) {
1289 errln("Error expecting set offset error");
1290 }
1291 strsrch->setOffset(128, status);
1292 if (U_SUCCESS(status)) {
1293 errln("Error expecting set offset error");
1294 }
1295 int index = 0;
1296 while (BASIC[index].text != NULL) {
1297 UErrorCode status = U_ZERO_ERROR;
1298 SearchData search = BASIC[index ++];
1299 UChar temp[128];
1300
1301 u_unescape(search.text, temp, 128);
1302 text.setTo(temp, u_strlen(temp));
1303 u_unescape(search.pattern, temp, 128);
1304 pattern.setTo(temp, u_strlen(temp));
1305 strsrch->setText(text, status);
1306 strsrch->setPattern(pattern, status);
1307 strsrch->getCollator()->setStrength(getECollationStrength(
1308 search.strength));
1309 strsrch->reset();
1310
1311 int count = 0;
1312 int32_t matchindex = search.offset[count];
1313 while (U_SUCCESS(status) && matchindex >= 0) {
1314 int32_t matchlength = search.size[count];
1315 strsrch->next(status);
1316 if (matchindex != strsrch->getMatchedStart() ||
1317 matchlength != strsrch->getMatchedLength()) {
1318 char *str = toCharString(strsrch->getText());
1319 errln("Text: %s", str);
1320 str = toCharString(strsrch->getPattern());
1321 errln("Pattern: %s", str);
1322 errln("Error match found at %d %d",
1323 strsrch->getMatchedStart(),
1324 strsrch->getMatchedLength());
1325 return;
1326 }
1327 matchindex = search.offset[count + 1] == -1 ? -1 :
1328 search.offset[count + 2];
1329 if (search.offset[count + 1] != -1) {
1330 strsrch->setOffset(search.offset[count + 1] + 1, status);
1331 if (strsrch->getOffset() != search.offset[count + 1] + 1) {
1332 errln("Error setting offset\n");
1333 return;
1334 }
1335 }
1336
1337 count += 2;
1338 }
1339 strsrch->next(status);
1340 if (strsrch->getMatchedStart() != USEARCH_DONE) {
1341 char *str = toCharString(strsrch->getText());
1342 errln("Text: %s", str);
1343 str = toCharString(strsrch->getPattern());
1344 errln("Pattern: %s", str);
1345 errln("Error match found at %d %d",
1346 strsrch->getMatchedStart(),
1347 strsrch->getMatchedLength());
1348 return;
1349 }
1350 }
1351 strsrch->getCollator()->setStrength(getECollationStrength(
1352 UCOL_TERTIARY));
1353 delete strsrch;
1354 }
1355
TestGetSetAttribute()1356 void StringSearchTest::TestGetSetAttribute()
1357 {
1358 UErrorCode status = U_ZERO_ERROR;
1359 UnicodeString pattern("pattern");
1360 UnicodeString text("text");
1361 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL,
1362 status);
1363 if (U_FAILURE(status)) {
1364 errln("Error opening search %s", u_errorName(status));
1365 return;
1366 }
1367
1368 strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_DEFAULT, status);
1369 if (U_FAILURE(status) ||
1370 strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_OFF) {
1371 errln("Error setting overlap to the default");
1372 }
1373 strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_ON, status);
1374 if (U_FAILURE(status) ||
1375 strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_ON) {
1376 errln("Error setting overlap true");
1377 }
1378 strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_OFF, status);
1379 if (U_FAILURE(status) ||
1380 strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_OFF) {
1381 errln("Error setting overlap false");
1382 }
1383 strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_ATTRIBUTE_VALUE_COUNT,
1384 status);
1385 if (U_SUCCESS(status)) {
1386 errln("Error setting overlap to illegal value");
1387 }
1388 status = U_ZERO_ERROR;
1389 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_DEFAULT, status);
1390 if (U_FAILURE(status) ||
1391 strsrch->getAttribute(USEARCH_CANONICAL_MATCH) != USEARCH_OFF) {
1392 errln("Error setting canonical match to the default");
1393 }
1394 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1395 if (U_FAILURE(status) ||
1396 strsrch->getAttribute(USEARCH_CANONICAL_MATCH) != USEARCH_ON) {
1397 errln("Error setting canonical match true");
1398 }
1399 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_OFF, status);
1400 if (U_FAILURE(status) ||
1401 strsrch->getAttribute(USEARCH_CANONICAL_MATCH) != USEARCH_OFF) {
1402 errln("Error setting canonical match false");
1403 }
1404 strsrch->setAttribute(USEARCH_CANONICAL_MATCH,
1405 USEARCH_ATTRIBUTE_VALUE_COUNT, status);
1406 if (U_SUCCESS(status)) {
1407 errln("Error setting canonical match to illegal value");
1408 }
1409 status = U_ZERO_ERROR;
1410 strsrch->setAttribute(USEARCH_ATTRIBUTE_COUNT, USEARCH_DEFAULT, status);
1411 if (U_SUCCESS(status)) {
1412 errln("Error setting illegal attribute success");
1413 }
1414
1415 delete strsrch;
1416 }
1417
TestGetMatch()1418 void StringSearchTest::TestGetMatch()
1419 {
1420 UChar temp[128];
1421 SearchData search = MATCH[0];
1422 u_unescape(search.text, temp, 128);
1423 UnicodeString text;
1424 text.setTo(temp, u_strlen(temp));
1425 u_unescape(search.pattern, temp, 128);
1426 UnicodeString pattern;
1427 pattern.setTo(temp, u_strlen(temp));
1428
1429 UErrorCode status = U_ZERO_ERROR;
1430 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL,
1431 status);
1432 if (U_FAILURE(status)) {
1433 errln("Error opening string search %s", u_errorName(status));
1434 if (strsrch != NULL) {
1435 delete strsrch;
1436 }
1437 return;
1438 }
1439
1440 int count = 0;
1441 int32_t matchindex = search.offset[count];
1442 UnicodeString matchtext;
1443 while (U_SUCCESS(status) && matchindex >= 0) {
1444 int32_t matchlength = search.size[count];
1445 strsrch->next(status);
1446 if (matchindex != strsrch->getMatchedStart() ||
1447 matchlength != strsrch->getMatchedLength()) {
1448 char *str = toCharString(strsrch->getText());
1449 errln("Text: %s", str);
1450 str = toCharString(strsrch->getPattern());
1451 errln("Pattern: %s", str);
1452 errln("Error match found at %d %d", strsrch->getMatchedStart(),
1453 strsrch->getMatchedLength());
1454 return;
1455 }
1456 count ++;
1457
1458 status = U_ZERO_ERROR;
1459 strsrch->getMatchedText(matchtext);
1460 if (matchtext.length() != matchlength || U_FAILURE(status)){
1461 errln("Error getting match text");
1462 }
1463 matchindex = search.offset[count];
1464 }
1465 status = U_ZERO_ERROR;
1466 strsrch->next(status);
1467 if (strsrch->getMatchedStart() != USEARCH_DONE ||
1468 strsrch->getMatchedLength() != 0) {
1469 errln("Error end of match not found");
1470 }
1471 status = U_ZERO_ERROR;
1472 strsrch->getMatchedText(matchtext);
1473 if (matchtext.length() != 0) {
1474 errln("Error getting null matches");
1475 }
1476 delete strsrch;
1477 }
1478
TestSetMatch()1479 void StringSearchTest::TestSetMatch()
1480 {
1481 int count = 0;
1482 while (MATCH[count].text != NULL) {
1483 SearchData search = MATCH[count];
1484 UChar temp[128];
1485 UErrorCode status = U_ZERO_ERROR;
1486 u_unescape(search.text, temp, 128);
1487 UnicodeString text;
1488 text.setTo(temp, u_strlen(temp));
1489 u_unescape(search.pattern, temp, 128);
1490 UnicodeString pattern;
1491 pattern.setTo(temp, u_strlen(temp));
1492
1493 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_,
1494 NULL, status);
1495 if (U_FAILURE(status)) {
1496 errln("Error opening string search %s", u_errorName(status));
1497 if (strsrch != NULL) {
1498 delete strsrch;
1499 }
1500 return;
1501 }
1502
1503 int size = 0;
1504 while (search.offset[size] != -1) {
1505 size ++;
1506 }
1507
1508 if (strsrch->first(status) != search.offset[0] || U_FAILURE(status)) {
1509 errln("Error getting first match");
1510 }
1511 if (strsrch->last(status) != search.offset[size -1] ||
1512 U_FAILURE(status)) {
1513 errln("Error getting last match");
1514 }
1515
1516 int index = 0;
1517 while (index < size) {
1518 if (index + 2 < size) {
1519 if (strsrch->following(search.offset[index + 2] - 1, status)
1520 != search.offset[index + 2] || U_FAILURE(status)) {
1521 errln("Error getting following match at index %d",
1522 search.offset[index + 2] - 1);
1523 }
1524 }
1525 if (index + 1 < size) {
1526 if (strsrch->preceding(search.offset[index + 1] +
1527 search.size[index + 1] + 1,
1528 status) != search.offset[index + 1] ||
1529 U_FAILURE(status)) {
1530 errln("Error getting preceding match at index %d",
1531 search.offset[index + 1] + 1);
1532 }
1533 }
1534 index += 2;
1535 }
1536 status = U_ZERO_ERROR;
1537 if (strsrch->following(text.length(), status) != USEARCH_DONE) {
1538 errln("Error expecting out of bounds match");
1539 }
1540 if (strsrch->preceding(0, status) != USEARCH_DONE) {
1541 errln("Error expecting out of bounds match");
1542 }
1543 count ++;
1544 delete strsrch;
1545 }
1546 }
1547
TestReset()1548 void StringSearchTest::TestReset()
1549 {
1550 UErrorCode status = U_ZERO_ERROR;
1551 UnicodeString text("fish fish");
1552 UnicodeString pattern("s");
1553 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL,
1554 status);
1555 if (U_FAILURE(status)) {
1556 errln("Error opening string search %s", u_errorName(status));
1557 if (strsrch != NULL) {
1558 delete strsrch;
1559 }
1560 return;
1561 }
1562 strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_ON, status);
1563 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1564 strsrch->setOffset(9, status);
1565 if (U_FAILURE(status)) {
1566 errln("Error setting attributes and offsets");
1567 }
1568 else {
1569 strsrch->reset();
1570 if (strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_OFF ||
1571 strsrch->getAttribute(USEARCH_CANONICAL_MATCH) != USEARCH_OFF ||
1572 strsrch->getOffset() != 0 || strsrch->getMatchedLength() != 0 ||
1573 strsrch->getMatchedStart() != USEARCH_DONE) {
1574 errln("Error resetting string search");
1575 }
1576 strsrch->previous(status);
1577 if (strsrch->getMatchedStart() != 7 ||
1578 strsrch->getMatchedLength() != 1) {
1579 errln("Error resetting string search\n");
1580 }
1581 }
1582 delete strsrch;
1583 }
1584
TestSupplementary()1585 void StringSearchTest::TestSupplementary()
1586 {
1587 int count = 0;
1588 while (SUPPLEMENTARY[count].text != NULL) {
1589 if (!assertEqual(&SUPPLEMENTARY[count])) {
1590 errln("Error at test number %d", count);
1591 }
1592 count ++;
1593 }
1594 }
1595
TestContraction()1596 void StringSearchTest::TestContraction()
1597 {
1598 UChar temp[128];
1599 UErrorCode status = U_ZERO_ERROR;
1600
1601 u_unescape(CONTRACTIONRULE, temp, 128);
1602 UnicodeString rules;
1603 rules.setTo(temp, u_strlen(temp));
1604 RuleBasedCollator *collator = new RuleBasedCollator(rules,
1605 getECollationStrength(UCOL_TERTIARY), UCOL_ON, status);
1606 if (U_FAILURE(status)) {
1607 errln("Error opening collator %s", u_errorName(status));
1608 }
1609 UnicodeString text("text");
1610 UnicodeString pattern("pattern");
1611 StringSearch *strsrch = new StringSearch(pattern, text, collator, NULL,
1612 status);
1613 if (U_FAILURE(status)) {
1614 errln("Error opening string search %s", u_errorName(status));
1615 }
1616
1617 int count = 0;
1618 while (CONTRACTION[count].text != NULL) {
1619 u_unescape(CONTRACTION[count].text, temp, 128);
1620 text.setTo(temp, u_strlen(temp));
1621 u_unescape(CONTRACTION[count].pattern, temp, 128);
1622 pattern.setTo(temp, u_strlen(temp));
1623 strsrch->setText(text, status);
1624 strsrch->setPattern(pattern, status);
1625 if (!assertEqualWithStringSearch(strsrch, &CONTRACTION[count])) {
1626 errln("Error at test number %d", count);
1627 }
1628 count ++;
1629 }
1630 delete strsrch;
1631 delete collator;
1632 }
1633
TestIgnorable()1634 void StringSearchTest::TestIgnorable()
1635 {
1636 UChar temp[128];
1637 u_unescape(IGNORABLERULE, temp, 128);
1638 UnicodeString rules;
1639 rules.setTo(temp, u_strlen(temp));
1640 UErrorCode status = U_ZERO_ERROR;
1641 int count = 0;
1642 RuleBasedCollator *collator = new RuleBasedCollator(rules,
1643 getECollationStrength(IGNORABLE[count].strength),
1644 UCOL_ON, status);
1645 if (U_FAILURE(status)) {
1646 errln("Error opening collator %s", u_errorName(status));
1647 return;
1648 }
1649 UnicodeString pattern("pattern");
1650 UnicodeString text("text");
1651 StringSearch *strsrch = new StringSearch(pattern, text, collator, NULL,
1652 status);
1653 if (U_FAILURE(status)) {
1654 errln("Error opening string search %s", u_errorName(status));
1655 delete collator;
1656 return;
1657 }
1658
1659 while (IGNORABLE[count].text != NULL) {
1660 u_unescape(IGNORABLE[count].text, temp, 128);
1661 text.setTo(temp, u_strlen(temp));
1662 u_unescape(IGNORABLE[count].pattern, temp, 128);
1663 pattern.setTo(temp, u_strlen(temp));
1664 strsrch->setText(text, status);
1665 strsrch->setPattern(pattern, status);
1666 if (!assertEqualWithStringSearch(strsrch, &IGNORABLE[count])) {
1667 errln("Error at test number %d", count);
1668 }
1669 count ++;
1670 }
1671 delete strsrch;
1672 delete collator;
1673 }
1674
TestDiacriticMatch()1675 void StringSearchTest::TestDiacriticMatch()
1676 {
1677 UChar temp[128];
1678 UErrorCode status = U_ZERO_ERROR;
1679 int count = 0;
1680 RuleBasedCollator* coll = NULL;
1681 StringSearch *strsrch = NULL;
1682
1683 UnicodeString pattern("pattern");
1684 UnicodeString text("text");
1685
1686 const SearchData *search;
1687
1688 search = &(DIACRITICMATCH[count]);
1689 while (search->text != NULL) {
1690 coll = getCollator(search->collator);
1691 coll->setStrength(getECollationStrength(search->strength));
1692 strsrch = new StringSearch(pattern, text, coll, getBreakIterator(search->breaker), status);
1693 if (U_FAILURE(status)) {
1694 errln("Error opening string search %s", u_errorName(status));
1695 return;
1696 }
1697 u_unescape(search->text, temp, 128);
1698 text.setTo(temp, u_strlen(temp));
1699 u_unescape(search->pattern, temp, 128);
1700 pattern.setTo(temp, u_strlen(temp));
1701 strsrch->setText(text, status);
1702 strsrch->setPattern(pattern, status);
1703 if (!assertEqualWithStringSearch(strsrch, search)) {
1704 errln("Error at test number %d", count);
1705 }
1706 search = &(DIACRITICMATCH[++count]);
1707 delete strsrch;
1708 }
1709
1710 }
1711
TestCanonical()1712 void StringSearchTest::TestCanonical()
1713 {
1714 int count = 0;
1715 while (BASICCANONICAL[count].text != NULL) {
1716 if (!assertCanonicalEqual(&BASICCANONICAL[count])) {
1717 errln("Error at test number %d", count);
1718 }
1719 count ++;
1720 }
1721 }
1722
TestNormCanonical()1723 void StringSearchTest::TestNormCanonical()
1724 {
1725 UErrorCode status = U_ZERO_ERROR;
1726 m_en_us_->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
1727 int count = 0;
1728 while (NORMCANONICAL[count].text != NULL) {
1729 if (!assertCanonicalEqual(&NORMCANONICAL[count])) {
1730 errln("Error at test number %d", count);
1731 }
1732 count ++;
1733 }
1734 m_en_us_->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
1735 }
1736
TestStrengthCanonical()1737 void StringSearchTest::TestStrengthCanonical()
1738 {
1739 int count = 0;
1740 while (STRENGTHCANONICAL[count].text != NULL) {
1741 if (!assertCanonicalEqual(&STRENGTHCANONICAL[count])) {
1742 errln("Error at test number %d", count);
1743 }
1744 count ++;
1745 }
1746 }
1747
1748 #if !UCONFIG_NO_BREAK_ITERATION
1749
TestBreakIteratorCanonical()1750 void StringSearchTest::TestBreakIteratorCanonical()
1751 {
1752 UErrorCode status = U_ZERO_ERROR;
1753 int count = 0;
1754
1755 while (count < 4) {
1756 // special purposes for tests numbers 0-3
1757 UChar temp[128];
1758 const SearchData *search = &(BREAKITERATORCANONICAL[count]);
1759
1760 u_unescape(search->text, temp, 128);
1761 UnicodeString text;
1762 text.setTo(temp, u_strlen(temp));
1763 u_unescape(search->pattern, temp, 128);
1764 UnicodeString pattern;
1765 pattern.setTo(temp, u_strlen(temp));
1766 RuleBasedCollator *collator = getCollator(search->collator);
1767 collator->setStrength(getECollationStrength(search->strength));
1768
1769 BreakIterator *breaker = getBreakIterator(search->breaker);
1770 StringSearch *strsrch = new StringSearch(pattern, text, collator,
1771 breaker, status);
1772 if (U_FAILURE(status)) {
1773 errln("Error creating string search data");
1774 return;
1775 }
1776 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1777 if (U_FAILURE(status) ||
1778 strsrch->getBreakIterator() != breaker) {
1779 errln("Error setting break iterator");
1780 delete strsrch;
1781 return;
1782 }
1783 if (!assertEqualWithStringSearch(strsrch, search)) {
1784 collator->setStrength(getECollationStrength(UCOL_TERTIARY));
1785 delete strsrch;
1786 return;
1787 }
1788 search = &(BREAKITERATOREXACT[count + 1]);
1789 breaker = getBreakIterator(search->breaker);
1790 if (breaker == NULL) {
1791 errln("Error creating BreakIterator");
1792 return;
1793 }
1794 breaker->setText(strsrch->getText());
1795 strsrch->setBreakIterator(breaker, status);
1796 if (U_FAILURE(status) || strsrch->getBreakIterator() != breaker) {
1797 errln("Error setting break iterator");
1798 delete strsrch;
1799 return;
1800 }
1801 strsrch->reset();
1802 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1803 if (!assertEqualWithStringSearch(strsrch, search)) {
1804 errln("Error at test number %d", count);
1805 return;
1806 }
1807 delete strsrch;
1808 count += 2;
1809 }
1810 count = 0;
1811 while (BREAKITERATORCANONICAL[count].text != NULL) {
1812 if (!assertEqual(&BREAKITERATORCANONICAL[count])) {
1813 errln("Error at test number %d", count);
1814 return;
1815 }
1816 count ++;
1817 }
1818 }
1819
1820 #endif
1821
TestVariableCanonical()1822 void StringSearchTest::TestVariableCanonical()
1823 {
1824 int count = 0;
1825 UErrorCode status = U_ZERO_ERROR;
1826 m_en_us_->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
1827 if (U_FAILURE(status)) {
1828 errln("Error setting collation alternate attribute %s",
1829 u_errorName(status));
1830 }
1831 while (VARIABLE[count].text != NULL) {
1832 logln("variable %d", count);
1833 if (!assertCanonicalEqual(&VARIABLE[count])) {
1834 errln("Error at test number %d", count);
1835 }
1836 count ++;
1837 }
1838 m_en_us_->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE,
1839 status);
1840 }
1841
TestOverlapCanonical()1842 void StringSearchTest::TestOverlapCanonical()
1843 {
1844 int count = 0;
1845 while (OVERLAPCANONICAL[count].text != NULL) {
1846 if (!assertEqualWithAttribute(&OVERLAPCANONICAL[count], USEARCH_ON,
1847 USEARCH_ON)) {
1848 errln("Error at overlap test number %d", count);
1849 }
1850 count ++;
1851 }
1852 count = 0;
1853 while (NONOVERLAP[count].text != NULL) {
1854 if (!assertCanonicalEqual(&NONOVERLAPCANONICAL[count])) {
1855 errln("Error at non overlap test number %d", count);
1856 }
1857 count ++;
1858 }
1859
1860 count = 0;
1861 while (count < 1) {
1862 UChar temp[128];
1863 const SearchData *search = &(OVERLAPCANONICAL[count]);
1864 UErrorCode status = U_ZERO_ERROR;
1865
1866 u_unescape(search->text, temp, 128);
1867 UnicodeString text;
1868 text.setTo(temp, u_strlen(temp));
1869 u_unescape(search->pattern, temp, 128);
1870 UnicodeString pattern;
1871 pattern.setTo(temp, u_strlen(temp));
1872 RuleBasedCollator *collator = getCollator(search->collator);
1873 StringSearch *strsrch = new StringSearch(pattern, text, collator,
1874 NULL, status);
1875 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1876 strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_ON, status);
1877 if (U_FAILURE(status) ||
1878 strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_ON) {
1879 errln("Error setting overlap option");
1880 }
1881 if (!assertEqualWithStringSearch(strsrch, search)) {
1882 delete strsrch;
1883 return;
1884 }
1885 search = &(NONOVERLAPCANONICAL[count]);
1886 strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_OFF, status);
1887 if (U_FAILURE(status) ||
1888 strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_OFF) {
1889 errln("Error setting overlap option");
1890 }
1891 strsrch->reset();
1892 if (!assertEqualWithStringSearch(strsrch, search)) {
1893 delete strsrch;
1894 errln("Error at test number %d", count);
1895 }
1896
1897 count ++;
1898 delete strsrch;
1899 }
1900 }
1901
TestCollatorCanonical()1902 void StringSearchTest::TestCollatorCanonical()
1903 {
1904 /* test collator that thinks "o" and "p" are the same thing */
1905 UChar temp[128];
1906 u_unescape(COLLATORCANONICAL[0].text, temp, 128);
1907 UnicodeString text;
1908 text.setTo(temp, u_strlen(temp));
1909 u_unescape(COLLATORCANONICAL[0].pattern, temp, 128);
1910 UnicodeString pattern;
1911 pattern.setTo(temp, u_strlen(temp));
1912
1913 UErrorCode status = U_ZERO_ERROR;
1914 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_,
1915 NULL, status);
1916 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1917 if (U_FAILURE(status)) {
1918 errln("Error opening string search %s", u_errorName(status));
1919 }
1920 if (!assertEqualWithStringSearch(strsrch, &COLLATORCANONICAL[0])) {
1921 delete strsrch;
1922 return;
1923 }
1924
1925 u_unescape(TESTCOLLATORRULE, temp, 128);
1926 UnicodeString rules;
1927 rules.setTo(temp, u_strlen(temp));
1928 RuleBasedCollator *tailored = new RuleBasedCollator(rules,
1929 getECollationStrength(COLLATORCANONICAL[1].strength),
1930 UCOL_ON, status);
1931
1932 if (U_FAILURE(status)) {
1933 errln("Error opening rule based collator %s", u_errorName(status));
1934 }
1935
1936 strsrch->setCollator(tailored, status);
1937 if (U_FAILURE(status) || *(strsrch->getCollator()) != *tailored) {
1938 errln("Error setting rule based collator");
1939 }
1940 strsrch->reset();
1941 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1942 if (!assertEqualWithStringSearch(strsrch, &COLLATORCANONICAL[1])) {
1943 delete strsrch;
1944 if (tailored != NULL) {
1945 delete tailored;
1946 }
1947
1948 return;
1949 }
1950
1951 strsrch->setCollator(m_en_us_, status);
1952 strsrch->reset();
1953 if (U_FAILURE(status) || *(strsrch->getCollator()) != *m_en_us_) {
1954 errln("Error setting rule based collator");
1955 }
1956 if (!assertEqualWithStringSearch(strsrch, &COLLATORCANONICAL[0])) {
1957 }
1958 delete strsrch;
1959 if (tailored != NULL) {
1960 delete tailored;
1961 }
1962 }
1963
TestPatternCanonical()1964 void StringSearchTest::TestPatternCanonical()
1965 {
1966
1967 UChar temp[128];
1968
1969 u_unescape(PATTERNCANONICAL[0].text, temp, 128);
1970 UnicodeString text;
1971 text.setTo(temp, u_strlen(temp));
1972 u_unescape(PATTERNCANONICAL[0].pattern, temp, 128);
1973 UnicodeString pattern;
1974 pattern.setTo(temp, u_strlen(temp));
1975
1976 m_en_us_->setStrength(
1977 getECollationStrength(PATTERNCANONICAL[0].strength));
1978
1979 UErrorCode status = U_ZERO_ERROR;
1980 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL,
1981 status);
1982 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1983 if (U_FAILURE(status)) {
1984 errln("Error opening string search %s", u_errorName(status));
1985 goto ENDTESTPATTERN;
1986 }
1987 if (pattern != strsrch->getPattern()) {
1988 errln("Error setting pattern");
1989 }
1990 if (!assertEqualWithStringSearch(strsrch, &PATTERNCANONICAL[0])) {
1991 goto ENDTESTPATTERN;
1992 }
1993
1994 u_unescape(PATTERNCANONICAL[1].pattern, temp, 128);
1995 pattern.setTo(temp, u_strlen(temp));
1996 strsrch->setPattern(pattern, status);
1997 if (pattern != strsrch->getPattern()) {
1998 errln("Error setting pattern");
1999 goto ENDTESTPATTERN;
2000 }
2001 strsrch->reset();
2002 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
2003 if (U_FAILURE(status)) {
2004 errln("Error setting pattern %s", u_errorName(status));
2005 }
2006 if (!assertEqualWithStringSearch(strsrch, &PATTERNCANONICAL[1])) {
2007 goto ENDTESTPATTERN;
2008 }
2009
2010 u_unescape(PATTERNCANONICAL[0].pattern, temp, 128);
2011 pattern.setTo(temp, u_strlen(temp));
2012 strsrch->setPattern(pattern, status);
2013 if (pattern != strsrch->getPattern()) {
2014 errln("Error setting pattern");
2015 goto ENDTESTPATTERN;
2016 }
2017 strsrch->reset();
2018 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
2019 if (U_FAILURE(status)) {
2020 errln("Error setting pattern %s", u_errorName(status));
2021 }
2022 if (!assertEqualWithStringSearch(strsrch, &PATTERNCANONICAL[0])) {
2023 goto ENDTESTPATTERN;
2024 }
2025 ENDTESTPATTERN:
2026 m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY));
2027 if (strsrch != NULL) {
2028 delete strsrch;
2029 }
2030 }
2031
TestTextCanonical()2032 void StringSearchTest::TestTextCanonical()
2033 {
2034 UChar temp[128];
2035 u_unescape(TEXTCANONICAL[0].text, temp, 128);
2036 UnicodeString text;
2037 text.setTo(temp, u_strlen(temp));
2038 u_unescape(TEXTCANONICAL[0].pattern, temp, 128);
2039 UnicodeString pattern;
2040 pattern.setTo(temp, u_strlen(temp));
2041
2042 UErrorCode status = U_ZERO_ERROR;
2043 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL,
2044 status);
2045 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
2046
2047 if (U_FAILURE(status)) {
2048 errln("Error opening string search %s", u_errorName(status));
2049 goto ENDTESTPATTERN;
2050 }
2051 if (text != strsrch->getText()) {
2052 errln("Error setting text");
2053 }
2054 if (!assertEqualWithStringSearch(strsrch, &TEXTCANONICAL[0])) {
2055 goto ENDTESTPATTERN;
2056 }
2057
2058 u_unescape(TEXTCANONICAL[1].text, temp, 128);
2059 text.setTo(temp, u_strlen(temp));
2060 strsrch->setText(text, status);
2061 if (text != strsrch->getText()) {
2062 errln("Error setting text");
2063 goto ENDTESTPATTERN;
2064 }
2065 if (U_FAILURE(status)) {
2066 errln("Error setting text %s", u_errorName(status));
2067 }
2068 if (!assertEqualWithStringSearch(strsrch, &TEXTCANONICAL[1])) {
2069 goto ENDTESTPATTERN;
2070 }
2071
2072 u_unescape(TEXTCANONICAL[0].text, temp, 128);
2073 text.setTo(temp, u_strlen(temp));
2074 strsrch->setText(text, status);
2075 if (text != strsrch->getText()) {
2076 errln("Error setting text");
2077 goto ENDTESTPATTERN;
2078 }
2079 if (U_FAILURE(status)) {
2080 errln("Error setting pattern %s", u_errorName(status));
2081 }
2082 if (!assertEqualWithStringSearch(strsrch, &TEXTCANONICAL[0])) {
2083 goto ENDTESTPATTERN;
2084 }
2085 ENDTESTPATTERN:
2086 if (strsrch != NULL) {
2087 delete strsrch;
2088 }
2089 }
2090
TestCompositeBoundariesCanonical()2091 void StringSearchTest::TestCompositeBoundariesCanonical()
2092 {
2093 int count = 0;
2094 while (COMPOSITEBOUNDARIESCANONICAL[count].text != NULL) {
2095 logln("composite %d", count);
2096 if (!assertCanonicalEqual(&COMPOSITEBOUNDARIESCANONICAL[count])) {
2097 errln("Error at test number %d", count);
2098 }
2099 count ++;
2100 }
2101 }
2102
TestGetSetOffsetCanonical()2103 void StringSearchTest::TestGetSetOffsetCanonical()
2104 {
2105
2106 UErrorCode status = U_ZERO_ERROR;
2107 UnicodeString text("text");
2108 UnicodeString pattern("pattern");
2109 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL,
2110 status);
2111 Collator *collator = strsrch->getCollator();
2112
2113 collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
2114
2115 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
2116 /* testing out of bounds error */
2117 strsrch->setOffset(-1, status);
2118 if (U_SUCCESS(status)) {
2119 errln("Error expecting set offset error");
2120 }
2121 strsrch->setOffset(128, status);
2122 if (U_SUCCESS(status)) {
2123 errln("Error expecting set offset error");
2124 }
2125 int index = 0;
2126 UChar temp[128];
2127 while (BASICCANONICAL[index].text != NULL) {
2128 SearchData search = BASICCANONICAL[index ++];
2129 if (BASICCANONICAL[index].text == NULL) {
2130 /* skip the last one */
2131 break;
2132 }
2133
2134 u_unescape(search.text, temp, 128);
2135 text.setTo(temp, u_strlen(temp));
2136 u_unescape(search.pattern, temp, 128);
2137 pattern.setTo(temp, u_strlen(temp));
2138
2139 UErrorCode status = U_ZERO_ERROR;
2140 strsrch->setText(text, status);
2141
2142 strsrch->setPattern(pattern, status);
2143
2144 int count = 0;
2145 int32_t matchindex = search.offset[count];
2146 while (U_SUCCESS(status) && matchindex >= 0) {
2147 int32_t matchlength = search.size[count];
2148 strsrch->next(status);
2149 if (matchindex != strsrch->getMatchedStart() ||
2150 matchlength != strsrch->getMatchedLength()) {
2151 char *str = toCharString(strsrch->getText());
2152 errln("Text: %s", str);
2153 str = toCharString(strsrch->getPattern());
2154 errln("Pattern: %s", str);
2155 errln("Error match found at %d %d",
2156 strsrch->getMatchedStart(),
2157 strsrch->getMatchedLength());
2158 goto bail;
2159 }
2160 matchindex = search.offset[count + 1] == -1 ? -1 :
2161 search.offset[count + 2];
2162 if (search.offset[count + 1] != -1) {
2163 strsrch->setOffset(search.offset[count + 1] + 1, status);
2164 if (strsrch->getOffset() != search.offset[count + 1] + 1) {
2165 errln("Error setting offset");
2166 goto bail;
2167 }
2168 }
2169
2170 count += 2;
2171 }
2172 strsrch->next(status);
2173 if (strsrch->getMatchedStart() != USEARCH_DONE) {
2174 char *str = toCharString(strsrch->getText());
2175 errln("Text: %s", str);
2176 str = toCharString(strsrch->getPattern());
2177 errln("Pattern: %s", str);
2178 errln("Error match found at %d %d", strsrch->getMatchedStart(),
2179 strsrch->getMatchedLength());
2180 goto bail;
2181 }
2182 }
2183
2184 bail:
2185 collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
2186 delete strsrch;
2187 }
2188
TestSupplementaryCanonical()2189 void StringSearchTest::TestSupplementaryCanonical()
2190 {
2191 int count = 0;
2192 while (SUPPLEMENTARYCANONICAL[count].text != NULL) {
2193 if (!assertCanonicalEqual(&SUPPLEMENTARYCANONICAL[count])) {
2194 errln("Error at test number %d", count);
2195 }
2196 count ++;
2197 }
2198 }
2199
TestContractionCanonical()2200 void StringSearchTest::TestContractionCanonical()
2201 {
2202 UChar temp[128];
2203
2204 u_unescape(CONTRACTIONRULE, temp, 128);
2205 UnicodeString rules;
2206 rules.setTo(temp, u_strlen(temp));
2207
2208 UErrorCode status = U_ZERO_ERROR;
2209 RuleBasedCollator *collator = new RuleBasedCollator(rules,
2210 getECollationStrength(UCOL_TERTIARY), UCOL_ON, status);
2211 if (U_FAILURE(status)) {
2212 errln("Error opening collator %s", u_errorName(status));
2213 }
2214 UnicodeString text("text");
2215 UnicodeString pattern("pattern");
2216 StringSearch *strsrch = new StringSearch(pattern, text, collator, NULL,
2217 status);
2218 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
2219 if (U_FAILURE(status)) {
2220 errln("Error opening string search %s", u_errorName(status));
2221 }
2222
2223 int count = 0;
2224 while (CONTRACTIONCANONICAL[count].text != NULL) {
2225 u_unescape(CONTRACTIONCANONICAL[count].text, temp, 128);
2226 text.setTo(temp, u_strlen(temp));
2227 u_unescape(CONTRACTIONCANONICAL[count].pattern, temp, 128);
2228 pattern.setTo(temp, u_strlen(temp));
2229 strsrch->setText(text, status);
2230 strsrch->setPattern(pattern, status);
2231 if (!assertEqualWithStringSearch(strsrch,
2232 &CONTRACTIONCANONICAL[count])) {
2233 errln("Error at test number %d", count);
2234 }
2235 count ++;
2236 }
2237 delete strsrch;
2238 delete collator;
2239 }
2240
TestUClassID()2241 void StringSearchTest::TestUClassID()
2242 {
2243 char id = *((char *)StringSearch::getStaticClassID());
2244 if (id != 0) {
2245 errln("Static class id for StringSearch should be 0");
2246 }
2247 UErrorCode status = U_ZERO_ERROR;
2248 UnicodeString text("text");
2249 UnicodeString pattern("pattern");
2250 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL,
2251 status);
2252 id = *((char *)strsrch->getDynamicClassID());
2253 if (id != 0) {
2254 errln("Dynamic class id for StringSearch should be 0");
2255 }
2256 delete strsrch;
2257 }
2258
2259 class TestSearch : public SearchIterator
2260 {
2261 public:
2262 TestSearch(const TestSearch &obj);
2263 TestSearch(const UnicodeString &text,
2264 BreakIterator *breakiter,
2265 const UnicodeString &pattern);
2266 ~TestSearch();
2267
2268 void setOffset(int32_t position, UErrorCode &status) override;
2269 int32_t getOffset() const override;
2270 SearchIterator* safeClone() const override;
2271
2272
2273 /**
2274 * ICU "poor man's RTTI", returns a UClassID for the actual class.
2275 *
2276 * @draft ICU 2.2
2277 */
getDynamicClassID() const2278 virtual inline UClassID getDynamicClassID() const override { return getStaticClassID(); }
2279
2280 /**
2281 * ICU "poor man's RTTI", returns a UClassID for this class.
2282 *
2283 * @draft ICU 2.2
2284 */
getStaticClassID()2285 static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
2286
2287 bool operator!=(const TestSearch &that) const;
2288
2289 UnicodeString m_pattern_;
2290
2291 protected:
2292 int32_t handleNext(int32_t position, UErrorCode &status) override;
2293 int32_t handlePrev(int32_t position, UErrorCode &status) override;
2294 TestSearch & operator=(const TestSearch &that);
2295
2296 private:
2297
2298 /**
2299 * The address of this static class variable serves as this class's ID
2300 * for ICU "poor man's RTTI".
2301 */
2302 static const char fgClassID;
2303 uint32_t m_offset_;
2304 };
2305
2306 const char TestSearch::fgClassID=0;
2307
TestSearch(const TestSearch & obj)2308 TestSearch::TestSearch(const TestSearch &obj) : SearchIterator(obj)
2309 {
2310 m_offset_ = obj.m_offset_;
2311 m_pattern_ = obj.m_pattern_;
2312 }
2313
TestSearch(const UnicodeString & text,BreakIterator * breakiter,const UnicodeString & pattern)2314 TestSearch::TestSearch(const UnicodeString &text,
2315 BreakIterator *breakiter,
2316 const UnicodeString &pattern) : SearchIterator()
2317 {
2318 m_breakiterator_ = breakiter;
2319 m_pattern_ = pattern;
2320 m_text_ = text;
2321 m_offset_ = 0;
2322 m_pattern_ = pattern;
2323 }
2324
~TestSearch()2325 TestSearch::~TestSearch()
2326 {
2327 }
2328
2329
setOffset(int32_t position,UErrorCode & status)2330 void TestSearch::setOffset(int32_t position, UErrorCode &status)
2331 {
2332 if (position >= 0 && position <= m_text_.length()) {
2333 m_offset_ = position;
2334 }
2335 else {
2336 status = U_INDEX_OUTOFBOUNDS_ERROR;
2337 }
2338 }
2339
getOffset() const2340 int32_t TestSearch::getOffset() const
2341 {
2342 return m_offset_;
2343 }
2344
safeClone() const2345 SearchIterator * TestSearch::safeClone() const
2346 {
2347 return new TestSearch(m_text_, m_breakiterator_, m_pattern_);
2348 }
2349
operator !=(const TestSearch & that) const2350 bool TestSearch::operator!=(const TestSearch &that) const
2351 {
2352 if (SearchIterator::operator !=(that)) {
2353 return false;
2354 }
2355 return m_offset_ != that.m_offset_ || m_pattern_ != that.m_pattern_;
2356 }
2357
handleNext(int32_t start,UErrorCode & status)2358 int32_t TestSearch::handleNext(int32_t start, UErrorCode &status)
2359 {
2360 if(U_SUCCESS(status)) {
2361 int match = m_text_.indexOf(m_pattern_, start);
2362 if (match < 0) {
2363 m_offset_ = m_text_.length();
2364 setMatchStart(m_offset_);
2365 setMatchLength(0);
2366 return USEARCH_DONE;
2367 }
2368 setMatchStart(match);
2369 m_offset_ = match;
2370 setMatchLength(m_pattern_.length());
2371 return match;
2372 } else {
2373 return USEARCH_DONE;
2374 }
2375 }
2376
handlePrev(int32_t start,UErrorCode & status)2377 int32_t TestSearch::handlePrev(int32_t start, UErrorCode &status)
2378 {
2379 if(U_SUCCESS(status)) {
2380 int match = m_text_.lastIndexOf(m_pattern_, 0, start);
2381 if (match < 0) {
2382 m_offset_ = 0;
2383 setMatchStart(m_offset_);
2384 setMatchLength(0);
2385 return USEARCH_DONE;
2386 }
2387 setMatchStart(match);
2388 m_offset_ = match;
2389 setMatchLength(m_pattern_.length());
2390 return match;
2391 } else {
2392 return USEARCH_DONE;
2393 }
2394 }
2395
operator =(const TestSearch & that)2396 TestSearch & TestSearch::operator=(const TestSearch &that)
2397 {
2398 SearchIterator::operator=(that);
2399 m_offset_ = that.m_offset_;
2400 m_pattern_ = that.m_pattern_;
2401 return *this;
2402 }
2403
TestSubclass()2404 void StringSearchTest::TestSubclass()
2405 {
2406 UnicodeString text("abc abcd abc");
2407 UnicodeString pattern("abc");
2408 TestSearch search(text, NULL, pattern);
2409 TestSearch search2(search);
2410 int expected[] = {0, 4, 9};
2411 UErrorCode status = U_ZERO_ERROR;
2412 int i;
2413 StringCharacterIterator chariter(text);
2414
2415 search.setText(text, status);
2416 if (search.getText() != search2.getText()) {
2417 errln("Error setting text");
2418 }
2419
2420 search.setText(chariter, status);
2421 if (search.getText() != search2.getText()) {
2422 errln("Error setting text");
2423 }
2424
2425 search.reset();
2426 // comparing constructors
2427
2428 for (i = 0; i < UPRV_LENGTHOF(expected); i ++) {
2429 if (search.next(status) != expected[i]) {
2430 errln("Error getting next match");
2431 }
2432 if (search.getMatchedLength() != search.m_pattern_.length()) {
2433 errln("Error getting next match length");
2434 }
2435 }
2436 if (search.next(status) != USEARCH_DONE) {
2437 errln("Error should have reached the end of the iteration");
2438 }
2439 for (i = UPRV_LENGTHOF(expected) - 1; i >= 0; i --) {
2440 if (search.previous(status) != expected[i]) {
2441 errln("Error getting previous match");
2442 }
2443 if (search.getMatchedLength() != search.m_pattern_.length()) {
2444 errln("Error getting previous match length");
2445 }
2446 }
2447 if (search.previous(status) != USEARCH_DONE) {
2448 errln("Error should have reached the start of the iteration");
2449 }
2450 }
2451
2452 class StubSearchIterator:public SearchIterator{
2453 public:
StubSearchIterator()2454 StubSearchIterator(){}
setOffset(int32_t,UErrorCode &)2455 virtual void setOffset(int32_t , UErrorCode &) override {}
getOffset(void) const2456 virtual int32_t getOffset(void) const override {return 0;}
safeClone(void) const2457 virtual SearchIterator* safeClone(void) const override {return NULL;}
handleNext(int32_t,UErrorCode &)2458 virtual int32_t handleNext(int32_t , UErrorCode &) override {return 0;}
handlePrev(int32_t,UErrorCode &)2459 virtual int32_t handlePrev(int32_t , UErrorCode &) override {return 0;}
getDynamicClassID() const2460 virtual UClassID getDynamicClassID() const override {
2461 static char classID = 0;
2462 return (UClassID)&classID;
2463 }
2464 };
2465
TestCoverage()2466 void StringSearchTest::TestCoverage(){
2467 StubSearchIterator stub1, stub2;
2468 UErrorCode status = U_ZERO_ERROR;
2469
2470 if (stub1 != stub2){
2471 errln("new StubSearchIterator should be equal");
2472 }
2473
2474 stub2.setText(UnicodeString("ABC"), status);
2475 if (U_FAILURE(status)) {
2476 errln("Error: SearchIterator::SetText");
2477 }
2478
2479 stub1 = stub2;
2480 if (stub1 != stub2){
2481 errln("SearchIterator::operator = assigned object should be equal");
2482 }
2483 }
2484
2485 #endif /* !UCONFIG_NO_BREAK_ITERATION */
2486
2487 #endif /* #if !UCONFIG_NO_COLLATION */
2488